From c37bb26654bb8981ea237076e333eb37d4aa2dc6 Mon Sep 17 00:00:00 2001 From: Dennis Munsie Date: Tue, 20 Jun 2006 14:55:55 -0400 Subject: intelfb: add preliminary i2c support [01/07] i2c: add intelfb bit algorithm id Adds the intelfb bit algorithm id to i2c-id.h. Signed-off-by: Dennis Munsie --- include/linux/i2c-id.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index c8b81f419fd8..cce6074dd754 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -189,6 +189,7 @@ #define I2C_HW_B_RADEON 0x01001e /* radeon framebuffer driver */ #define I2C_HW_B_EM28XX 0x01001f /* em28xx video capture cards */ #define I2C_HW_B_CX2341X 0x010020 /* Conexant CX2341X MPEG encoder cards */ +#define I2C_HW_B_INTELFB 0x010021 /* intel framebuffer driver */ /* --- PCF 8584 based algorithms */ #define I2C_HW_P_LP 0x020000 /* Parallel port interface */ -- cgit v1.2.3-71-gd317 From 804af2cf6e7af31d2e664b54e657dddd9b531dbd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 26 Jul 2006 21:39:49 +0100 Subject: [AGPGART] remove private page protection map AGP keeps its own copy of the protection_map, upcoming DRM changes will also require access to this map from modules. Signed-off-by: Hugh Dickins Signed-off-by: Dave Airlie Signed-off-by: Dave Jones --- drivers/char/agp/frontend.c | 27 ++------------------------- include/linux/mm.h | 1 + mm/mmap.c | 7 +++++++ 3 files changed, 10 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index d9c5a9142ad1..0f2ed2aa2d81 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -151,35 +151,12 @@ static void agp_add_seg_to_client(struct agp_client *client, client->segments = seg; } -/* Originally taken from linux/mm/mmap.c from the array - * protection_map. - * The original really should be exported to modules, or - * some routine which does the conversion for you - */ - -static const pgprot_t my_protect_map[16] = -{ - __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, - __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 -}; - static pgprot_t agp_convert_mmap_flags(int prot) { -#define _trans(x,bit1,bit2) \ -((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0) - unsigned long prot_bits; - pgprot_t temp; - - prot_bits = _trans(prot, PROT_READ, VM_READ) | - _trans(prot, PROT_WRITE, VM_WRITE) | - _trans(prot, PROT_EXEC, VM_EXEC); - - prot_bits |= VM_SHARED; - temp = my_protect_map[prot_bits & 0x0000000f]; - - return temp; + prot_bits = calc_vm_prot_bits(prot) | VM_SHARED; + return vm_get_page_prot(prot_bits); } static int agp_create_segment(struct agp_client *client, struct agp_region *region) diff --git a/include/linux/mm.h b/include/linux/mm.h index 990957e0929f..4fba4560699b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1012,6 +1012,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +pgprot_t vm_get_page_prot(unsigned long vm_flags); struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); struct page *vmalloc_to_page(void *addr); unsigned long vmalloc_to_pfn(void *addr); diff --git a/mm/mmap.c b/mm/mmap.c index c1868ecdbc5f..c7ed061f4507 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -60,6 +60,13 @@ pgprot_t protection_map[16] = { __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 }; +pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + return protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; +} +EXPORT_SYMBOL(vm_get_page_prot); + int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; -- cgit v1.2.3-71-gd317 From 1c57e86d75cf162bdadb3a5fe0cd3f65aa1a9ca3 Mon Sep 17 00:00:00 2001 From: Erich Chen Date: Wed, 12 Jul 2006 08:59:32 -0700 Subject: [SCSI] arcmsr: initial driver, version 1.20.00.13 arcmsr is a driver for the Areca Raid controller, a host based RAID subsystem that speaks SCSI at the firmware level. This patch is quite a clean up over the initial submission with contributions from: Randy Dunlap Christoph Hellwig Matthew Wilcox Adrian Bunk Signed-off-by: Erich Chen Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- Documentation/scsi/ChangeLog.arcmsr | 56 ++ Documentation/scsi/arcmsr_spec.txt | 574 ++++++++++++++ drivers/scsi/Kconfig | 14 + drivers/scsi/Makefile | 1 + drivers/scsi/arcmsr/Makefile | 6 + drivers/scsi/arcmsr/arcmsr.h | 472 +++++++++++ drivers/scsi/arcmsr/arcmsr_attr.c | 392 +++++++++ drivers/scsi/arcmsr/arcmsr_hba.c | 1496 +++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 17 + 9 files changed, 3028 insertions(+) create mode 100644 Documentation/scsi/ChangeLog.arcmsr create mode 100644 Documentation/scsi/arcmsr_spec.txt create mode 100644 drivers/scsi/arcmsr/Makefile create mode 100644 drivers/scsi/arcmsr/arcmsr.h create mode 100644 drivers/scsi/arcmsr/arcmsr_attr.c create mode 100644 drivers/scsi/arcmsr/arcmsr_hba.c (limited to 'include/linux') diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr new file mode 100644 index 000000000000..162c47fdf45f --- /dev/null +++ b/Documentation/scsi/ChangeLog.arcmsr @@ -0,0 +1,56 @@ +************************************************************************** +** History +** +** REV# DATE NAME DESCRIPTION +** 1.00.00.00 3/31/2004 Erich Chen First release +** 1.10.00.04 7/28/2004 Erich Chen modify for ioctl +** 1.10.00.06 8/28/2004 Erich Chen modify for 2.6.x +** 1.10.00.08 9/28/2004 Erich Chen modify for x86_64 +** 1.10.00.10 10/10/2004 Erich Chen bug fix for SMP & ioctl +** 1.20.00.00 11/29/2004 Erich Chen bug fix with arcmsr_bus_reset when PHY error +** 1.20.00.02 12/09/2004 Erich Chen bug fix with over 2T bytes RAID Volume +** 1.20.00.04 1/09/2005 Erich Chen fits for Debian linux kernel version 2.2.xx +** 1.20.00.05 2/20/2005 Erich Chen cleanly as look like a Linux driver at 2.6.x +** thanks for peoples kindness comment +** Kornel Wieliczek +** Christoph Hellwig +** Adrian Bunk +** Andrew Morton +** Christoph Hellwig +** James Bottomley +** Arjan van de Ven +** 1.20.00.06 3/12/2005 Erich Chen fix with arcmsr_pci_unmap_dma "unsigned long" cast, +** modify PCCB POOL allocated by "dma_alloc_coherent" +** (Kornel Wieliczek's comment) +** 1.20.00.07 3/23/2005 Erich Chen bug fix with arcmsr_scsi_host_template_init +** occur segmentation fault, +** if RAID adapter does not on PCI slot +** and modprobe/rmmod this driver twice. +** bug fix enormous stack usage (Adrian Bunk's comment) +** 1.20.00.08 6/23/2005 Erich Chen bug fix with abort command, +** in case of heavy loading when sata cable +** working on low quality connection +** 1.20.00.09 9/12/2005 Erich Chen bug fix with abort command handling, firmware version check +** and firmware update notify for hardware bug fix +** 1.20.00.10 9/23/2005 Erich Chen enhance sysfs function for change driver's max tag Q number. +** add DMA_64BIT_MASK for backward compatible with all 2.6.x +** add some useful message for abort command +** add ioctl code 'ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE' +** customer can send this command for sync raid volume data +** 1.20.00.11 9/29/2005 Erich Chen by comment of Arjan van de Ven fix incorrect msleep redefine +** cast off sizeof(dma_addr_t) condition for 64bit pci_set_dma_mask +** 1.20.00.12 9/30/2005 Erich Chen bug fix with 64bit platform's ccbs using if over 4G system memory +** change 64bit pci_set_consistent_dma_mask into 32bit +** increcct adapter count if adapter initialize fail. +** miss edit at arcmsr_build_ccb.... +** psge += sizeof(struct _SG64ENTRY *) => +** psge += sizeof(struct _SG64ENTRY) +** 64 bits sg entry would be incorrectly calculated +** thanks Kornel Wieliczek give me kindly notify +** and detail description +** 1.20.00.13 11/15/2005 Erich Chen scheduling pending ccb with FIFO +** change the architecture of arcmsr command queue list +** for linux standard list +** enable usage of pci message signal interrupt +** follow Randy.Danlup kindness suggestion cleanup this code +************************************************************************** \ No newline at end of file diff --git a/Documentation/scsi/arcmsr_spec.txt b/Documentation/scsi/arcmsr_spec.txt new file mode 100644 index 000000000000..5e0042340fd3 --- /dev/null +++ b/Documentation/scsi/arcmsr_spec.txt @@ -0,0 +1,574 @@ +******************************************************************************* +** ARECA FIRMWARE SPEC +******************************************************************************* +** Usage of IOP331 adapter +** (All In/Out is in IOP331's view) +** 1. Message 0 --> InitThread message and retrun code +** 2. Doorbell is used for RS-232 emulation +** inDoorBell : bit0 -- data in ready +** (DRIVER DATA WRITE OK) +** bit1 -- data out has been read +** (DRIVER DATA READ OK) +** outDooeBell: bit0 -- data out ready +** (IOP331 DATA WRITE OK) +** bit1 -- data in has been read +** (IOP331 DATA READ OK) +** 3. Index Memory Usage +** offset 0xf00 : for RS232 out (request buffer) +** offset 0xe00 : for RS232 in (scratch buffer) +** offset 0xa00 : for inbound message code message_rwbuffer +** (driver send to IOP331) +** offset 0xa00 : for outbound message code message_rwbuffer +** (IOP331 send to driver) +** 4. RS-232 emulation +** Currently 128 byte buffer is used +** 1st uint32_t : Data length (1--124) +** Byte 4--127 : Max 124 bytes of data +** 5. PostQ +** All SCSI Command must be sent through postQ: +** (inbound queue port) Request frame must be 32 bytes aligned +** #bit27--bit31 => flag for post ccb +** #bit0--bit26 => real address (bit27--bit31) of post arcmsr_cdb +** bit31 : +** 0 : 256 bytes frame +** 1 : 512 bytes frame +** bit30 : +** 0 : normal request +** 1 : BIOS request +** bit29 : reserved +** bit28 : reserved +** bit27 : reserved +** --------------------------------------------------------------------------- +** (outbount queue port) Request reply +** #bit27--bit31 +** => flag for reply +** #bit0--bit26 +** => real address (bit27--bit31) of reply arcmsr_cdb +** bit31 : must be 0 (for this type of reply) +** bit30 : reserved for BIOS handshake +** bit29 : reserved +** bit28 : +** 0 : no error, ignore AdapStatus/DevStatus/SenseData +** 1 : Error, error code in AdapStatus/DevStatus/SenseData +** bit27 : reserved +** 6. BIOS request +** All BIOS request is the same with request from PostQ +** Except : +** Request frame is sent from configuration space +** offset: 0x78 : Request Frame (bit30 == 1) +** offset: 0x18 : writeonly to generate +** IRQ to IOP331 +** Completion of request: +** (bit30 == 0, bit28==err flag) +** 7. Definition of SGL entry (structure) +** 8. Message1 Out - Diag Status Code (????) +** 9. Message0 message code : +** 0x00 : NOP +** 0x01 : Get Config +** ->offset 0xa00 :for outbound message code message_rwbuffer +** (IOP331 send to driver) +** Signature 0x87974060(4) +** Request len 0x00000200(4) +** numbers of queue 0x00000100(4) +** SDRAM Size 0x00000100(4)-->256 MB +** IDE Channels 0x00000008(4) +** vendor 40 bytes char +** model 8 bytes char +** FirmVer 16 bytes char +** Device Map 16 bytes char +** FirmwareVersion DWORD <== Added for checking of +** new firmware capability +** 0x02 : Set Config +** ->offset 0xa00 :for inbound message code message_rwbuffer +** (driver send to IOP331) +** Signature 0x87974063(4) +** UPPER32 of Request Frame (4)-->Driver Only +** 0x03 : Reset (Abort all queued Command) +** 0x04 : Stop Background Activity +** 0x05 : Flush Cache +** 0x06 : Start Background Activity +** (re-start if background is halted) +** 0x07 : Check If Host Command Pending +** (Novell May Need This Function) +** 0x08 : Set controller time +** ->offset 0xa00 : for inbound message code message_rwbuffer +** (driver to IOP331) +** byte 0 : 0xaa <-- signature +** byte 1 : 0x55 <-- signature +** byte 2 : year (04) +** byte 3 : month (1..12) +** byte 4 : date (1..31) +** byte 5 : hour (0..23) +** byte 6 : minute (0..59) +** byte 7 : second (0..59) +******************************************************************************* +******************************************************************************* +** RS-232 Interface for Areca Raid Controller +** The low level command interface is exclusive with VT100 terminal +** -------------------------------------------------------------------- +** 1. Sequence of command execution +** -------------------------------------------------------------------- +** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61) +** (B) Command block : variable length of data including length, +** command code, data and checksum byte +** (C) Return data : variable length of data +** -------------------------------------------------------------------- +** 2. Command block +** -------------------------------------------------------------------- +** (A) 1st byte : command block length (low byte) +** (B) 2nd byte : command block length (high byte) +** note ..command block length shouldn't > 2040 bytes, +** length excludes these two bytes +** (C) 3rd byte : command code +** (D) 4th and following bytes : variable length data bytes +** depends on command code +** (E) last byte : checksum byte (sum of 1st byte until last data byte) +** -------------------------------------------------------------------- +** 3. Command code and associated data +** -------------------------------------------------------------------- +** The following are command code defined in raid controller Command +** code 0x10--0x1? are used for system level management, +** no password checking is needed and should be implemented in separate +** well controlled utility and not for end user access. +** Command code 0x20--0x?? always check the password, +** password must be entered to enable these command. +** enum +** { +** GUI_SET_SERIAL=0x10, +** GUI_SET_VENDOR, +** GUI_SET_MODEL, +** GUI_IDENTIFY, +** GUI_CHECK_PASSWORD, +** GUI_LOGOUT, +** GUI_HTTP, +** GUI_SET_ETHERNET_ADDR, +** GUI_SET_LOGO, +** GUI_POLL_EVENT, +** GUI_GET_EVENT, +** GUI_GET_HW_MONITOR, +** // GUI_QUICK_CREATE=0x20, (function removed) +** GUI_GET_INFO_R=0x20, +** GUI_GET_INFO_V, +** GUI_GET_INFO_P, +** GUI_GET_INFO_S, +** GUI_CLEAR_EVENT, +** GUI_MUTE_BEEPER=0x30, +** GUI_BEEPER_SETTING, +** GUI_SET_PASSWORD, +** GUI_HOST_INTERFACE_MODE, +** GUI_REBUILD_PRIORITY, +** GUI_MAX_ATA_MODE, +** GUI_RESET_CONTROLLER, +** GUI_COM_PORT_SETTING, +** GUI_NO_OPERATION, +** GUI_DHCP_IP, +** GUI_CREATE_PASS_THROUGH=0x40, +** GUI_MODIFY_PASS_THROUGH, +** GUI_DELETE_PASS_THROUGH, +** GUI_IDENTIFY_DEVICE, +** GUI_CREATE_RAIDSET=0x50, +** GUI_DELETE_RAIDSET, +** GUI_EXPAND_RAIDSET, +** GUI_ACTIVATE_RAIDSET, +** GUI_CREATE_HOT_SPARE, +** GUI_DELETE_HOT_SPARE, +** GUI_CREATE_VOLUME=0x60, +** GUI_MODIFY_VOLUME, +** GUI_DELETE_VOLUME, +** GUI_START_CHECK_VOLUME, +** GUI_STOP_CHECK_VOLUME +** }; +** Command description : +** GUI_SET_SERIAL : Set the controller serial# +** byte 0,1 : length +** byte 2 : command code 0x10 +** byte 3 : password length (should be 0x0f) +** byte 4-0x13 : should be "ArEcATecHnoLogY" +** byte 0x14--0x23 : Serial number string (must be 16 bytes) +** GUI_SET_VENDOR : Set vendor string for the controller +** byte 0,1 : length +** byte 2 : command code 0x11 +** byte 3 : password length (should be 0x08) +** byte 4-0x13 : should be "ArEcAvAr" +** byte 0x14--0x3B : vendor string (must be 40 bytes) +** GUI_SET_MODEL : Set the model name of the controller +** byte 0,1 : length +** byte 2 : command code 0x12 +** byte 3 : password length (should be 0x08) +** byte 4-0x13 : should be "ArEcAvAr" +** byte 0x14--0x1B : model string (must be 8 bytes) +** GUI_IDENTIFY : Identify device +** byte 0,1 : length +** byte 2 : command code 0x13 +** return "Areca RAID Subsystem " +** GUI_CHECK_PASSWORD : Verify password +** byte 0,1 : length +** byte 2 : command code 0x14 +** byte 3 : password length +** byte 4-0x?? : user password to be checked +** GUI_LOGOUT : Logout GUI (force password checking on next command) +** byte 0,1 : length +** byte 2 : command code 0x15 +** GUI_HTTP : HTTP interface (reserved for Http proxy service)(0x16) +** +** GUI_SET_ETHERNET_ADDR : Set the ethernet MAC address +** byte 0,1 : length +** byte 2 : command code 0x17 +** byte 3 : password length (should be 0x08) +** byte 4-0x13 : should be "ArEcAvAr" +** byte 0x14--0x19 : Ethernet MAC address (must be 6 bytes) +** GUI_SET_LOGO : Set logo in HTTP +** byte 0,1 : length +** byte 2 : command code 0x18 +** byte 3 : Page# (0/1/2/3) (0xff --> clear OEM logo) +** byte 4/5/6/7 : 0x55/0xaa/0xa5/0x5a +** byte 8 : TITLE.JPG data (each page must be 2000 bytes) +** note page0 1st 2 byte must be +** actual length of the JPG file +** GUI_POLL_EVENT : Poll If Event Log Changed +** byte 0,1 : length +** byte 2 : command code 0x19 +** GUI_GET_EVENT : Read Event +** byte 0,1 : length +** byte 2 : command code 0x1a +** byte 3 : Event Page (0:1st page/1/2/3:last page) +** GUI_GET_HW_MONITOR : Get HW monitor data +** byte 0,1 : length +** byte 2 : command code 0x1b +** byte 3 : # of FANs(example 2) +** byte 4 : # of Voltage sensor(example 3) +** byte 5 : # of temperature sensor(example 2) +** byte 6 : # of power +** byte 7/8 : Fan#0 (RPM) +** byte 9/10 : Fan#1 +** byte 11/12 : Voltage#0 original value in *1000 +** byte 13/14 : Voltage#0 value +** byte 15/16 : Voltage#1 org +** byte 17/18 : Voltage#1 +** byte 19/20 : Voltage#2 org +** byte 21/22 : Voltage#2 +** byte 23 : Temp#0 +** byte 24 : Temp#1 +** byte 25 : Power indicator (bit0 : power#0, +** bit1 : power#1) +** byte 26 : UPS indicator +** GUI_QUICK_CREATE : Quick create raid/volume set +** byte 0,1 : length +** byte 2 : command code 0x20 +** byte 3/4/5/6 : raw capacity +** byte 7 : raid level +** byte 8 : stripe size +** byte 9 : spare +** byte 10/11/12/13: device mask (the devices to create raid/volume) +** This function is removed, application like +** to implement quick create function +** need to use GUI_CREATE_RAIDSET and GUI_CREATE_VOLUMESET function. +** GUI_GET_INFO_R : Get Raid Set Information +** byte 0,1 : length +** byte 2 : command code 0x20 +** byte 3 : raidset# +** typedef struct sGUI_RAIDSET +** { +** BYTE grsRaidSetName[16]; +** DWORD grsCapacity; +** DWORD grsCapacityX; +** DWORD grsFailMask; +** BYTE grsDevArray[32]; +** BYTE grsMemberDevices; +** BYTE grsNewMemberDevices; +** BYTE grsRaidState; +** BYTE grsVolumes; +** BYTE grsVolumeList[16]; +** BYTE grsRes1; +** BYTE grsRes2; +** BYTE grsRes3; +** BYTE grsFreeSegments; +** DWORD grsRawStripes[8]; +** DWORD grsRes4; +** DWORD grsRes5; // Total to 128 bytes +** DWORD grsRes6; // Total to 128 bytes +** } sGUI_RAIDSET, *pGUI_RAIDSET; +** GUI_GET_INFO_V : Get Volume Set Information +** byte 0,1 : length +** byte 2 : command code 0x21 +** byte 3 : volumeset# +** typedef struct sGUI_VOLUMESET +** { +** BYTE gvsVolumeName[16]; // 16 +** DWORD gvsCapacity; +** DWORD gvsCapacityX; +** DWORD gvsFailMask; +** DWORD gvsStripeSize; +** DWORD gvsNewFailMask; +** DWORD gvsNewStripeSize; +** DWORD gvsVolumeStatus; +** DWORD gvsProgress; // 32 +** sSCSI_ATTR gvsScsi; +** BYTE gvsMemberDisks; +** BYTE gvsRaidLevel; // 8 +** BYTE gvsNewMemberDisks; +** BYTE gvsNewRaidLevel; +** BYTE gvsRaidSetNumber; +** BYTE gvsRes0; // 4 +** BYTE gvsRes1[4]; // 64 bytes +** } sGUI_VOLUMESET, *pGUI_VOLUMESET; +** GUI_GET_INFO_P : Get Physical Drive Information +** byte 0,1 : length +** byte 2 : command code 0x22 +** byte 3 : drive # (from 0 to max-channels - 1) +** typedef struct sGUI_PHY_DRV +** { +** BYTE gpdModelName[40]; +** BYTE gpdSerialNumber[20]; +** BYTE gpdFirmRev[8]; +** DWORD gpdCapacity; +** DWORD gpdCapacityX; // Reserved for expansion +** BYTE gpdDeviceState; +** BYTE gpdPioMode; +** BYTE gpdCurrentUdmaMode; +** BYTE gpdUdmaMode; +** BYTE gpdDriveSelect; +** BYTE gpdRaidNumber; // 0xff if not belongs to a raid set +** sSCSI_ATTR gpdScsi; +** BYTE gpdReserved[40]; // Total to 128 bytes +** } sGUI_PHY_DRV, *pGUI_PHY_DRV; +** GUI_GET_INFO_S : Get System Information +** byte 0,1 : length +** byte 2 : command code 0x23 +** typedef struct sCOM_ATTR +** { +** BYTE comBaudRate; +** BYTE comDataBits; +** BYTE comStopBits; +** BYTE comParity; +** BYTE comFlowControl; +** } sCOM_ATTR, *pCOM_ATTR; +** typedef struct sSYSTEM_INFO +** { +** BYTE gsiVendorName[40]; +** BYTE gsiSerialNumber[16]; +** BYTE gsiFirmVersion[16]; +** BYTE gsiBootVersion[16]; +** BYTE gsiMbVersion[16]; +** BYTE gsiModelName[8]; +** BYTE gsiLocalIp[4]; +** BYTE gsiCurrentIp[4]; +** DWORD gsiTimeTick; +** DWORD gsiCpuSpeed; +** DWORD gsiICache; +** DWORD gsiDCache; +** DWORD gsiScache; +** DWORD gsiMemorySize; +** DWORD gsiMemorySpeed; +** DWORD gsiEvents; +** BYTE gsiMacAddress[6]; +** BYTE gsiDhcp; +** BYTE gsiBeeper; +** BYTE gsiChannelUsage; +** BYTE gsiMaxAtaMode; +** BYTE gsiSdramEcc; // 1:if ECC enabled +** BYTE gsiRebuildPriority; +** sCOM_ATTR gsiComA; // 5 bytes +** sCOM_ATTR gsiComB; // 5 bytes +** BYTE gsiIdeChannels; +** BYTE gsiScsiHostChannels; +** BYTE gsiIdeHostChannels; +** BYTE gsiMaxVolumeSet; +** BYTE gsiMaxRaidSet; +** BYTE gsiEtherPort; // 1:if ether net port supported +** BYTE gsiRaid6Engine; // 1:Raid6 engine supported +** BYTE gsiRes[75]; +** } sSYSTEM_INFO, *pSYSTEM_INFO; +** GUI_CLEAR_EVENT : Clear System Event +** byte 0,1 : length +** byte 2 : command code 0x24 +** GUI_MUTE_BEEPER : Mute current beeper +** byte 0,1 : length +** byte 2 : command code 0x30 +** GUI_BEEPER_SETTING : Disable beeper +** byte 0,1 : length +** byte 2 : command code 0x31 +** byte 3 : 0->disable, 1->enable +** GUI_SET_PASSWORD : Change password +** byte 0,1 : length +** byte 2 : command code 0x32 +** byte 3 : pass word length ( must <= 15 ) +** byte 4 : password (must be alpha-numerical) +** GUI_HOST_INTERFACE_MODE : Set host interface mode +** byte 0,1 : length +** byte 2 : command code 0x33 +** byte 3 : 0->Independent, 1->cluster +** GUI_REBUILD_PRIORITY : Set rebuild priority +** byte 0,1 : length +** byte 2 : command code 0x34 +** byte 3 : 0/1/2/3 (low->high) +** GUI_MAX_ATA_MODE : Set maximum ATA mode to be used +** byte 0,1 : length +** byte 2 : command code 0x35 +** byte 3 : 0/1/2/3 (133/100/66/33) +** GUI_RESET_CONTROLLER : Reset Controller +** byte 0,1 : length +** byte 2 : command code 0x36 +** *Response with VT100 screen (discard it) +** GUI_COM_PORT_SETTING : COM port setting +** byte 0,1 : length +** byte 2 : command code 0x37 +** byte 3 : 0->COMA (term port), +** 1->COMB (debug port) +** byte 4 : 0/1/2/3/4/5/6/7 +** (1200/2400/4800/9600/19200/38400/57600/115200) +** byte 5 : data bit +** (0:7 bit, 1:8 bit : must be 8 bit) +** byte 6 : stop bit (0:1, 1:2 stop bits) +** byte 7 : parity (0:none, 1:off, 2:even) +** byte 8 : flow control +** (0:none, 1:xon/xoff, 2:hardware => must use none) +** GUI_NO_OPERATION : No operation +** byte 0,1 : length +** byte 2 : command code 0x38 +** GUI_DHCP_IP : Set DHCP option and local IP address +** byte 0,1 : length +** byte 2 : command code 0x39 +** byte 3 : 0:dhcp disabled, 1:dhcp enabled +** byte 4/5/6/7 : IP address +** GUI_CREATE_PASS_THROUGH : Create pass through disk +** byte 0,1 : length +** byte 2 : command code 0x40 +** byte 3 : device # +** byte 4 : scsi channel (0/1) +** byte 5 : scsi id (0-->15) +** byte 6 : scsi lun (0-->7) +** byte 7 : tagged queue (1 : enabled) +** byte 8 : cache mode (1 : enabled) +** byte 9 : max speed (0/1/2/3/4, +** async/20/40/80/160 for scsi) +** (0/1/2/3/4, 33/66/100/133/150 for ide ) +** GUI_MODIFY_PASS_THROUGH : Modify pass through disk +** byte 0,1 : length +** byte 2 : command code 0x41 +** byte 3 : device # +** byte 4 : scsi channel (0/1) +** byte 5 : scsi id (0-->15) +** byte 6 : scsi lun (0-->7) +** byte 7 : tagged queue (1 : enabled) +** byte 8 : cache mode (1 : enabled) +** byte 9 : max speed (0/1/2/3/4, +** async/20/40/80/160 for scsi) +** (0/1/2/3/4, 33/66/100/133/150 for ide ) +** GUI_DELETE_PASS_THROUGH : Delete pass through disk +** byte 0,1 : length +** byte 2 : command code 0x42 +** byte 3 : device# to be deleted +** GUI_IDENTIFY_DEVICE : Identify Device +** byte 0,1 : length +** byte 2 : command code 0x43 +** byte 3 : Flash Method +** (0:flash selected, 1:flash not selected) +** byte 4/5/6/7 : IDE device mask to be flashed +** note .... no response data available +** GUI_CREATE_RAIDSET : Create Raid Set +** byte 0,1 : length +** byte 2 : command code 0x50 +** byte 3/4/5/6 : device mask +** byte 7-22 : raidset name (if byte 7 == 0:use default) +** GUI_DELETE_RAIDSET : Delete Raid Set +** byte 0,1 : length +** byte 2 : command code 0x51 +** byte 3 : raidset# +** GUI_EXPAND_RAIDSET : Expand Raid Set +** byte 0,1 : length +** byte 2 : command code 0x52 +** byte 3 : raidset# +** byte 4/5/6/7 : device mask for expansion +** byte 8/9/10 : (8:0 no change, 1 change, 0xff:terminate, +** 9:new raid level, +** 10:new stripe size +** 0/1/2/3/4/5->4/8/16/32/64/128K ) +** byte 11/12/13 : repeat for each volume in the raidset +** GUI_ACTIVATE_RAIDSET : Activate incomplete raid set +** byte 0,1 : length +** byte 2 : command code 0x53 +** byte 3 : raidset# +** GUI_CREATE_HOT_SPARE : Create hot spare disk +** byte 0,1 : length +** byte 2 : command code 0x54 +** byte 3/4/5/6 : device mask for hot spare creation +** GUI_DELETE_HOT_SPARE : Delete hot spare disk +** byte 0,1 : length +** byte 2 : command code 0x55 +** byte 3/4/5/6 : device mask for hot spare deletion +** GUI_CREATE_VOLUME : Create volume set +** byte 0,1 : length +** byte 2 : command code 0x60 +** byte 3 : raidset# +** byte 4-19 : volume set name +** (if byte4 == 0, use default) +** byte 20-27 : volume capacity (blocks) +** byte 28 : raid level +** byte 29 : stripe size +** (0/1/2/3/4/5->4/8/16/32/64/128K) +** byte 30 : channel +** byte 31 : ID +** byte 32 : LUN +** byte 33 : 1 enable tag +** byte 34 : 1 enable cache +** byte 35 : speed +** (0/1/2/3/4->async/20/40/80/160 for scsi) +** (0/1/2/3/4->33/66/100/133/150 for IDE ) +** byte 36 : 1 to select quick init +** +** GUI_MODIFY_VOLUME : Modify volume Set +** byte 0,1 : length +** byte 2 : command code 0x61 +** byte 3 : volumeset# +** byte 4-19 : new volume set name +** (if byte4 == 0, not change) +** byte 20-27 : new volume capacity (reserved) +** byte 28 : new raid level +** byte 29 : new stripe size +** (0/1/2/3/4/5->4/8/16/32/64/128K) +** byte 30 : new channel +** byte 31 : new ID +** byte 32 : new LUN +** byte 33 : 1 enable tag +** byte 34 : 1 enable cache +** byte 35 : speed +** (0/1/2/3/4->async/20/40/80/160 for scsi) +** (0/1/2/3/4->33/66/100/133/150 for IDE ) +** GUI_DELETE_VOLUME : Delete volume set +** byte 0,1 : length +** byte 2 : command code 0x62 +** byte 3 : volumeset# +** GUI_START_CHECK_VOLUME : Start volume consistency check +** byte 0,1 : length +** byte 2 : command code 0x63 +** byte 3 : volumeset# +** GUI_STOP_CHECK_VOLUME : Stop volume consistency check +** byte 0,1 : length +** byte 2 : command code 0x64 +** --------------------------------------------------------------------- +** 4. Returned data +** --------------------------------------------------------------------- +** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61) +** (B) Length : 2 bytes +** (low byte 1st, excludes length and checksum byte) +** (C) status or data : +** <1> If length == 1 ==> 1 byte status code +** #define GUI_OK 0x41 +** #define GUI_RAIDSET_NOT_NORMAL 0x42 +** #define GUI_VOLUMESET_NOT_NORMAL 0x43 +** #define GUI_NO_RAIDSET 0x44 +** #define GUI_NO_VOLUMESET 0x45 +** #define GUI_NO_PHYSICAL_DRIVE 0x46 +** #define GUI_PARAMETER_ERROR 0x47 +** #define GUI_UNSUPPORTED_COMMAND 0x48 +** #define GUI_DISK_CONFIG_CHANGED 0x49 +** #define GUI_INVALID_PASSWORD 0x4a +** #define GUI_NO_DISK_SPACE 0x4b +** #define GUI_CHECKSUM_ERROR 0x4c +** #define GUI_PASSWORD_REQUIRED 0x4d +** <2> If length > 1 ==> +** data block returned from controller +** and the contents depends on the command code +** (E) Checksum : checksum of length and status or data byte +************************************************************************** diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 96a81cd17617..d61662c1a0ee 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -469,6 +469,20 @@ config SCSI_IN2000 To compile this driver as a module, choose M here: the module will be called in2000. +config SCSI_ARCMSR + tristate "ARECA ARC11X0[PCI-X]/ARC12X0[PCI-EXPRESS] SATA-RAID support" + depends on PCI && SCSI + help + This driver supports all of ARECA's SATA RAID controller cards. + This is an ARECA-maintained driver by Erich Chen. + If you have any problems, please mail to: < erich@areca.com.tw > + Areca supports Linux RAID config tools. + + < http://www.areca.com.tw > + + To compile this driver as a module, choose M here: the + module will be called arcmsr (modprobe arcmsr). + source "drivers/scsi/megaraid/Kconfig.megaraid" config SCSI_SATA diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index ebd0cf00bf3e..b2de9bfdfdcd 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_SCSI_PSI240I) += psi240i.o obj-$(CONFIG_SCSI_BUSLOGIC) += BusLogic.o obj-$(CONFIG_SCSI_DPT_I2O) += dpt_i2o.o obj-$(CONFIG_SCSI_U14_34F) += u14-34f.o +obj-$(CONFIG_SCSI_ARCMSR) += arcmsr/ obj-$(CONFIG_SCSI_ULTRASTOR) += ultrastor.o obj-$(CONFIG_SCSI_AHA152X) += aha152x.o obj-$(CONFIG_SCSI_AHA1542) += aha1542.o diff --git a/drivers/scsi/arcmsr/Makefile b/drivers/scsi/arcmsr/Makefile new file mode 100644 index 000000000000..721aced39168 --- /dev/null +++ b/drivers/scsi/arcmsr/Makefile @@ -0,0 +1,6 @@ +# File: drivers/arcmsr/Makefile +# Makefile for the ARECA PCI-X PCI-EXPRESS SATA RAID controllers SCSI driver. + +arcmsr-objs := arcmsr_attr.o arcmsr_hba.o + +obj-$(CONFIG_SCSI_ARCMSR) := arcmsr.o diff --git a/drivers/scsi/arcmsr/arcmsr.h b/drivers/scsi/arcmsr/arcmsr.h new file mode 100644 index 000000000000..aff96db9ccf6 --- /dev/null +++ b/drivers/scsi/arcmsr/arcmsr.h @@ -0,0 +1,472 @@ +/* +******************************************************************************* +** O.S : Linux +** FILE NAME : arcmsr.h +** BY : Erich Chen +** Description: SCSI RAID Device Driver for +** ARECA RAID Host adapter +******************************************************************************* +** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved. +** +** Web site: www.areca.com.tw +** E-mail: erich@areca.com.tw +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License version 2 as +** published by the Free Software Foundation. +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +******************************************************************************* +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +**(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +******************************************************************************* +*/ +#include + +struct class_device_attribute; + +#define ARCMSR_MAX_OUTSTANDING_CMD 256 +#define ARCMSR_MAX_FREECCB_NUM 288 +#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.13" +#define ARCMSR_SCSI_INITIATOR_ID 255 +#define ARCMSR_MAX_XFER_SECTORS 512 +#define ARCMSR_MAX_TARGETID 17 +#define ARCMSR_MAX_TARGETLUN 8 +#define ARCMSR_MAX_CMD_PERLUN ARCMSR_MAX_OUTSTANDING_CMD +#define ARCMSR_MAX_QBUFFER 4096 +#define ARCMSR_MAX_SG_ENTRIES 38 + +/* +******************************************************************************* +** split 64bits dma addressing +******************************************************************************* +*/ +#define dma_addr_hi32(addr) (uint32_t) ((addr>>16)>>16) +#define dma_addr_lo32(addr) (uint32_t) (addr & 0xffffffff) +/* +******************************************************************************* +** MESSAGE CONTROL CODE +******************************************************************************* +*/ +struct CMD_MESSAGE +{ + uint32_t HeaderLength; + uint8_t Signature[8]; + uint32_t Timeout; + uint32_t ControlCode; + uint32_t ReturnCode; + uint32_t Length; +}; +/* +******************************************************************************* +** IOP Message Transfer Data for user space +******************************************************************************* +*/ +struct CMD_MESSAGE_FIELD +{ + struct CMD_MESSAGE cmdmessage; + uint8_t messagedatabuffer[1032]; +}; +/* IOP message transfer */ +#define ARCMSR_MESSAGE_FAIL 0x0001 +/* DeviceType */ +#define ARECA_SATA_RAID 0x90000000 +/* FunctionCode */ +#define FUNCTION_READ_RQBUFFER 0x0801 +#define FUNCTION_WRITE_WQBUFFER 0x0802 +#define FUNCTION_CLEAR_RQBUFFER 0x0803 +#define FUNCTION_CLEAR_WQBUFFER 0x0804 +#define FUNCTION_CLEAR_ALLQBUFFER 0x0805 +#define FUNCTION_RETURN_CODE_3F 0x0806 +#define FUNCTION_SAY_HELLO 0x0807 +#define FUNCTION_SAY_GOODBYE 0x0808 +#define FUNCTION_FLUSH_ADAPTER_CACHE 0x0809 +/* ARECA IO CONTROL CODE*/ +#define ARCMSR_MESSAGE_READ_RQBUFFER \ + ARECA_SATA_RAID | FUNCTION_READ_RQBUFFER +#define ARCMSR_MESSAGE_WRITE_WQBUFFER \ + ARECA_SATA_RAID | FUNCTION_WRITE_WQBUFFER +#define ARCMSR_MESSAGE_CLEAR_RQBUFFER \ + ARECA_SATA_RAID | FUNCTION_CLEAR_RQBUFFER +#define ARCMSR_MESSAGE_CLEAR_WQBUFFER \ + ARECA_SATA_RAID | FUNCTION_CLEAR_WQBUFFER +#define ARCMSR_MESSAGE_CLEAR_ALLQBUFFER \ + ARECA_SATA_RAID | FUNCTION_CLEAR_ALLQBUFFER +#define ARCMSR_MESSAGE_RETURN_CODE_3F \ + ARECA_SATA_RAID | FUNCTION_RETURN_CODE_3F +#define ARCMSR_MESSAGE_SAY_HELLO \ + ARECA_SATA_RAID | FUNCTION_SAY_HELLO +#define ARCMSR_MESSAGE_SAY_GOODBYE \ + ARECA_SATA_RAID | FUNCTION_SAY_GOODBYE +#define ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE \ + ARECA_SATA_RAID | FUNCTION_FLUSH_ADAPTER_CACHE +/* ARECA IOCTL ReturnCode */ +#define ARCMSR_MESSAGE_RETURNCODE_OK 0x00000001 +#define ARCMSR_MESSAGE_RETURNCODE_ERROR 0x00000006 +#define ARCMSR_MESSAGE_RETURNCODE_3F 0x0000003F +/* +************************************************************* +** structure for holding DMA address data +************************************************************* +*/ +#define IS_SG64_ADDR 0x01000000 /* bit24 */ +struct SG32ENTRY +{ + uint32_t length; + uint32_t address; +}; +struct SG64ENTRY +{ + uint32_t length; + uint32_t address; + uint32_t addresshigh; +}; +struct SGENTRY_UNION +{ + union + { + struct SG32ENTRY sg32entry; + struct SG64ENTRY sg64entry; + }u; +}; +/* +******************************************************************** +** Q Buffer of IOP Message Transfer +******************************************************************** +*/ +struct QBUFFER +{ + uint32_t data_len; + uint8_t data[124]; +}; +/* +******************************************************************************* +** FIRMWARE INFO +******************************************************************************* +*/ +struct FIRMWARE_INFO +{ + uint32_t signature; /*0, 00-03*/ + uint32_t request_len; /*1, 04-07*/ + uint32_t numbers_queue; /*2, 08-11*/ + uint32_t sdram_size; /*3, 12-15*/ + uint32_t ide_channels; /*4, 16-19*/ + char vendor[40]; /*5, 20-59*/ + char model[8]; /*15, 60-67*/ + char firmware_ver[16]; /*17, 68-83*/ + char device_map[16]; /*21, 84-99*/ +}; +/* signature of set and get firmware config */ +#define ARCMSR_SIGNATURE_GET_CONFIG 0x87974060 +#define ARCMSR_SIGNATURE_SET_CONFIG 0x87974063 +/* message code of inbound message register */ +#define ARCMSR_INBOUND_MESG0_NOP 0x00000000 +#define ARCMSR_INBOUND_MESG0_GET_CONFIG 0x00000001 +#define ARCMSR_INBOUND_MESG0_SET_CONFIG 0x00000002 +#define ARCMSR_INBOUND_MESG0_ABORT_CMD 0x00000003 +#define ARCMSR_INBOUND_MESG0_STOP_BGRB 0x00000004 +#define ARCMSR_INBOUND_MESG0_FLUSH_CACHE 0x00000005 +#define ARCMSR_INBOUND_MESG0_START_BGRB 0x00000006 +#define ARCMSR_INBOUND_MESG0_CHK331PENDING 0x00000007 +#define ARCMSR_INBOUND_MESG0_SYNC_TIMER 0x00000008 +/* doorbell interrupt generator */ +#define ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK 0x00000001 +#define ARCMSR_INBOUND_DRIVER_DATA_READ_OK 0x00000002 +#define ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK 0x00000001 +#define ARCMSR_OUTBOUND_IOP331_DATA_READ_OK 0x00000002 +/* ccb areca cdb flag */ +#define ARCMSR_CCBPOST_FLAG_SGL_BSIZE 0x80000000 +#define ARCMSR_CCBPOST_FLAG_IAM_BIOS 0x40000000 +#define ARCMSR_CCBREPLY_FLAG_IAM_BIOS 0x40000000 +#define ARCMSR_CCBREPLY_FLAG_ERROR 0x10000000 +/* outbound firmware ok */ +#define ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK 0x80000000 +/* +******************************************************************************* +** ARECA SCSI COMMAND DESCRIPTOR BLOCK size 0x1F8 (504) +******************************************************************************* +*/ +struct ARCMSR_CDB +{ + uint8_t Bus; + uint8_t TargetID; + uint8_t LUN; + uint8_t Function; + + uint8_t CdbLength; + uint8_t sgcount; + uint8_t Flags; +#define ARCMSR_CDB_FLAG_SGL_BSIZE 0x01 +#define ARCMSR_CDB_FLAG_BIOS 0x02 +#define ARCMSR_CDB_FLAG_WRITE 0x04 +#define ARCMSR_CDB_FLAG_SIMPLEQ 0x00 +#define ARCMSR_CDB_FLAG_HEADQ 0x08 +#define ARCMSR_CDB_FLAG_ORDEREDQ 0x10 + uint8_t Reserved1; + + uint32_t Context; + uint32_t DataLength; + + uint8_t Cdb[16]; + + uint8_t DeviceStatus; +#define ARCMSR_DEV_CHECK_CONDITION 0x02 +#define ARCMSR_DEV_SELECT_TIMEOUT 0xF0 +#define ARCMSR_DEV_ABORTED 0xF1 +#define ARCMSR_DEV_INIT_FAIL 0xF2 + uint8_t SenseData[15]; + + union + { + struct SG32ENTRY sg32entry[ARCMSR_MAX_SG_ENTRIES]; + struct SG64ENTRY sg64entry[ARCMSR_MAX_SG_ENTRIES]; + } u; +}; +/* +******************************************************************************* +** Messaging Unit (MU) of the Intel R 80331 I/O processor (80331) +******************************************************************************* +*/ +struct MessageUnit +{ + uint32_t resrved0[4]; /*0000 000F*/ + uint32_t inbound_msgaddr0; /*0010 0013*/ + uint32_t inbound_msgaddr1; /*0014 0017*/ + uint32_t outbound_msgaddr0; /*0018 001B*/ + uint32_t outbound_msgaddr1; /*001C 001F*/ + uint32_t inbound_doorbell; /*0020 0023*/ + uint32_t inbound_intstatus; /*0024 0027*/ + uint32_t inbound_intmask; /*0028 002B*/ + uint32_t outbound_doorbell; /*002C 002F*/ + uint32_t outbound_intstatus; /*0030 0033*/ + uint32_t outbound_intmask; /*0034 0037*/ + uint32_t reserved1[2]; /*0038 003F*/ + uint32_t inbound_queueport; /*0040 0043*/ + uint32_t outbound_queueport; /*0044 0047*/ + uint32_t reserved2[2]; /*0048 004F*/ + uint32_t reserved3[492]; /*0050 07FF 492*/ + uint32_t reserved4[128]; /*0800 09FF 128*/ + uint32_t message_rwbuffer[256]; /*0a00 0DFF 256*/ + uint32_t message_wbuffer[32]; /*0E00 0E7F 32*/ + uint32_t reserved5[32]; /*0E80 0EFF 32*/ + uint32_t message_rbuffer[32]; /*0F00 0F7F 32*/ + uint32_t reserved6[32]; /*0F80 0FFF 32*/ +}; +/* +******************************************************************************* +** Adapter Control Block +******************************************************************************* +*/ +struct AdapterControlBlock +{ + struct pci_dev * pdev; + struct Scsi_Host * host; + unsigned long vir2phy_offset; + /* Offset is used in making arc cdb physical to virtual calculations */ + uint32_t outbound_int_enable; + + struct MessageUnit __iomem * pmu; + /* message unit ATU inbound base address0 */ + + uint32_t acb_flags; +#define ACB_F_SCSISTOPADAPTER 0x0001 +#define ACB_F_MSG_STOP_BGRB 0x0002 + /* stop RAID background rebuild */ +#define ACB_F_MSG_START_BGRB 0x0004 + /* stop RAID background rebuild */ +#define ACB_F_IOPDATA_OVERFLOW 0x0008 + /* iop message data rqbuffer overflow */ +#define ACB_F_MESSAGE_WQBUFFER_CLEARED 0x0010 + /* message clear wqbuffer */ +#define ACB_F_MESSAGE_RQBUFFER_CLEARED 0x0020 + /* message clear rqbuffer */ +#define ACB_F_MESSAGE_WQBUFFER_READED 0x0040 +#define ACB_F_BUS_RESET 0x0080 +#define ACB_F_IOP_INITED 0x0100 + /* iop init */ + + struct CommandControlBlock * pccb_pool[ARCMSR_MAX_FREECCB_NUM]; + /* used for memory free */ + struct list_head ccb_free_list; + /* head of free ccb list */ + atomic_t ccboutstandingcount; + + void * dma_coherent; + /* dma_coherent used for memory free */ + dma_addr_t dma_coherent_handle; + /* dma_coherent_handle used for memory free */ + + uint8_t rqbuffer[ARCMSR_MAX_QBUFFER]; + /* data collection buffer for read from 80331 */ + int32_t rqbuf_firstindex; + /* first of read buffer */ + int32_t rqbuf_lastindex; + /* last of read buffer */ + uint8_t wqbuffer[ARCMSR_MAX_QBUFFER]; + /* data collection buffer for write to 80331 */ + int32_t wqbuf_firstindex; + /* first of write buffer */ + int32_t wqbuf_lastindex; + /* last of write buffer */ + uint8_t devstate[ARCMSR_MAX_TARGETID][ARCMSR_MAX_TARGETLUN]; + /* id0 ..... id15, lun0...lun7 */ +#define ARECA_RAID_GONE 0x55 +#define ARECA_RAID_GOOD 0xaa + uint32_t num_resets; + uint32_t num_aborts; + uint32_t firm_request_len; + uint32_t firm_numbers_queue; + uint32_t firm_sdram_size; + uint32_t firm_hd_channels; + char firm_model[12]; + char firm_version[20]; +};/* HW_DEVICE_EXTENSION */ +/* +******************************************************************************* +** Command Control Block +** this CCB length must be 32 bytes boundary +******************************************************************************* +*/ +struct CommandControlBlock +{ + struct ARCMSR_CDB arcmsr_cdb; + /* + ** 0-503 (size of CDB=504): + ** arcmsr messenger scsi command descriptor size 504 bytes + */ + uint32_t cdb_shifted_phyaddr; + /* 504-507 */ + uint32_t reserved1; + /* 508-511 */ +#if BITS_PER_LONG == 64 + /* ======================512+64 bytes======================== */ + struct list_head list; + /* 512-527 16 bytes next/prev ptrs for ccb lists */ + struct scsi_cmnd * pcmd; + /* 528-535 8 bytes pointer of linux scsi command */ + struct AdapterControlBlock * acb; + /* 536-543 8 bytes pointer of acb */ + + uint16_t ccb_flags; + /* 544-545 */ + #define CCB_FLAG_READ 0x0000 + #define CCB_FLAG_WRITE 0x0001 + #define CCB_FLAG_ERROR 0x0002 + #define CCB_FLAG_FLUSHCACHE 0x0004 + #define CCB_FLAG_MASTER_ABORTED 0x0008 + uint16_t startdone; + /* 546-547 */ + #define ARCMSR_CCB_DONE 0x0000 + #define ARCMSR_CCB_START 0x55AA + #define ARCMSR_CCB_ABORTED 0xAA55 + #define ARCMSR_CCB_ILLEGAL 0xFFFF + uint32_t reserved2[7]; + /* 548-551 552-555 556-559 560-563 564-567 568-571 572-575 */ +#else + /* ======================512+32 bytes======================== */ + struct list_head list; + /* 512-519 8 bytes next/prev ptrs for ccb lists */ + struct scsi_cmnd * pcmd; + /* 520-523 4 bytes pointer of linux scsi command */ + struct AdapterControlBlock * acb; + /* 524-527 4 bytes pointer of acb */ + + uint16_t ccb_flags; + /* 528-529 */ + #define CCB_FLAG_READ 0x0000 + #define CCB_FLAG_WRITE 0x0001 + #define CCB_FLAG_ERROR 0x0002 + #define CCB_FLAG_FLUSHCACHE 0x0004 + #define CCB_FLAG_MASTER_ABORTED 0x0008 + uint16_t startdone; + /* 530-531 */ + #define ARCMSR_CCB_DONE 0x0000 + #define ARCMSR_CCB_START 0x55AA + #define ARCMSR_CCB_ABORTED 0xAA55 + #define ARCMSR_CCB_ILLEGAL 0xFFFF + uint32_t reserved2[3]; + /* 532-535 536-539 540-543 */ +#endif + /* ========================================================== */ +}; +/* +******************************************************************************* +** ARECA SCSI sense data +******************************************************************************* +*/ +struct SENSE_DATA +{ + uint8_t ErrorCode:7; +#define SCSI_SENSE_CURRENT_ERRORS 0x70 +#define SCSI_SENSE_DEFERRED_ERRORS 0x71 + uint8_t Valid:1; + uint8_t SegmentNumber; + uint8_t SenseKey:4; + uint8_t Reserved:1; + uint8_t IncorrectLength:1; + uint8_t EndOfMedia:1; + uint8_t FileMark:1; + uint8_t Information[4]; + uint8_t AdditionalSenseLength; + uint8_t CommandSpecificInformation[4]; + uint8_t AdditionalSenseCode; + uint8_t AdditionalSenseCodeQualifier; + uint8_t FieldReplaceableUnitCode; + uint8_t SenseKeySpecific[3]; +}; +/* +******************************************************************************* +** Outbound Interrupt Status Register - OISR +******************************************************************************* +*/ +#define ARCMSR_MU_OUTBOUND_INTERRUPT_STATUS_REG 0x30 +#define ARCMSR_MU_OUTBOUND_PCI_INT 0x10 +#define ARCMSR_MU_OUTBOUND_POSTQUEUE_INT 0x08 +#define ARCMSR_MU_OUTBOUND_DOORBELL_INT 0x04 +#define ARCMSR_MU_OUTBOUND_MESSAGE1_INT 0x02 +#define ARCMSR_MU_OUTBOUND_MESSAGE0_INT 0x01 +#define ARCMSR_MU_OUTBOUND_HANDLE_INT \ + (ARCMSR_MU_OUTBOUND_MESSAGE0_INT \ + |ARCMSR_MU_OUTBOUND_MESSAGE1_INT \ + |ARCMSR_MU_OUTBOUND_DOORBELL_INT \ + |ARCMSR_MU_OUTBOUND_POSTQUEUE_INT \ + |ARCMSR_MU_OUTBOUND_PCI_INT) +/* +******************************************************************************* +** Outbound Interrupt Mask Register - OIMR +******************************************************************************* +*/ +#define ARCMSR_MU_OUTBOUND_INTERRUPT_MASK_REG 0x34 +#define ARCMSR_MU_OUTBOUND_PCI_INTMASKENABLE 0x10 +#define ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE 0x08 +#define ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE 0x04 +#define ARCMSR_MU_OUTBOUND_MESSAGE1_INTMASKENABLE 0x02 +#define ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE 0x01 +#define ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE 0x1F + +extern void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb); +extern struct class_device_attribute *arcmsr_host_attrs[]; +extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb); +void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb); + diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c new file mode 100644 index 000000000000..0459f4194d7c --- /dev/null +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -0,0 +1,392 @@ +/* +******************************************************************************* +** O.S : Linux +** FILE NAME : arcmsr_attr.c +** BY : Erich Chen +** Description: attributes exported to sysfs and device host +******************************************************************************* +** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved +** +** Web site: www.areca.com.tw +** E-mail: erich@areca.com.tw +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License version 2 as +** published by the Free Software Foundation. +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +******************************************************************************* +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +******************************************************************************* +** For history of changes, see Documentation/scsi/ChangeLog.arcmsr +** Firmware Specification, see Documentation/scsi/arcmsr_spec.txt +******************************************************************************* +*/ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "arcmsr.h" + +struct class_device_attribute *arcmsr_host_attrs[]; + +static ssize_t +arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + uint8_t *pQbuffer,*ptmpQbuffer; + int32_t allxfer_len = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + /* do message unit read. */ + ptmpQbuffer = (uint8_t *)buf; + while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex) + && (allxfer_len < 1031)) { + pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex]; + memcpy(ptmpQbuffer, pQbuffer, 1); + acb->rqbuf_firstindex++; + acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + ptmpQbuffer++; + allxfer_len++; + } + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *) + ®->message_rbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data; + int32_t iop_len; + + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + iop_len = readl(&prbuffer->data_len); + while (iop_len > 0) { + acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data); + acb->rqbuf_lastindex++; + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + iop_len--; + } + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } + return (allxfer_len); +} + +static ssize_t +arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex; + uint8_t *pQbuffer, *ptmpuserbuffer; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (count > 1032) + return -EINVAL; + /* do message unit write. */ + ptmpuserbuffer = (uint8_t *)buf; + user_len = (int32_t)count; + wqbuf_lastindex = acb->wqbuf_lastindex; + wqbuf_firstindex = acb->wqbuf_firstindex; + if (wqbuf_lastindex != wqbuf_firstindex) { + arcmsr_post_Qbuffer(acb); + return 0; /*need retry*/ + } else { + my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1) + &(ARCMSR_MAX_QBUFFER - 1); + if (my_empty_len >= user_len) { + while (user_len > 0) { + pQbuffer = + &acb->wqbuffer[acb->wqbuf_lastindex]; + memcpy(pQbuffer, ptmpuserbuffer, 1); + acb->wqbuf_lastindex++; + acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + ptmpuserbuffer++; + user_len--; + } + if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) { + acb->acb_flags &= + ~ACB_F_MESSAGE_WQBUFFER_CLEARED; + arcmsr_post_Qbuffer(acb); + } + return count; + } else { + return 0; /*need retry*/ + } + } +} + +static ssize_t +arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + uint8_t *pQbuffer; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK + , ®->inbound_doorbell); + } + acb->acb_flags |= + (ACB_F_MESSAGE_WQBUFFER_CLEARED + | ACB_F_MESSAGE_RQBUFFER_CLEARED + | ACB_F_MESSAGE_WQBUFFER_READED); + acb->rqbuf_firstindex = 0; + acb->rqbuf_lastindex = 0; + acb->wqbuf_firstindex = 0; + acb->wqbuf_lastindex = 0; + pQbuffer = acb->rqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + pQbuffer = acb->wqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + return 1; +} + +static struct bin_attribute arcmsr_sysfs_message_read_attr = { + .attr = { + .name = "mu_read", + .mode = S_IRUSR , + .owner = THIS_MODULE, + }, + .size = 1032, + .read = arcmsr_sysfs_iop_message_read, +}; + +static struct bin_attribute arcmsr_sysfs_message_write_attr = { + .attr = { + .name = "mu_write", + .mode = S_IWUSR, + .owner = THIS_MODULE, + }, + .size = 1032, + .write = arcmsr_sysfs_iop_message_write, +}; + +static struct bin_attribute arcmsr_sysfs_message_clear_attr = { + .attr = { + .name = "mu_clear", + .mode = S_IWUSR, + .owner = THIS_MODULE, + }, + .size = 1, + .write = arcmsr_sysfs_iop_message_clear, +}; + +int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb) +{ + struct Scsi_Host *host = acb->host; + int error; + + error = sysfs_create_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_read_attr); + if (error) { + printk(KERN_ERR "arcmsr: alloc sysfs mu_read failed\n"); + goto error_bin_file_message_read; + } + error = sysfs_create_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_write_attr); + if (error) { + printk(KERN_ERR "arcmsr: alloc sysfs mu_write failed\n"); + goto error_bin_file_message_write; + } + error = sysfs_create_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_clear_attr); + if (error) { + printk(KERN_ERR "arcmsr: alloc sysfs mu_clear failed\n"); + goto error_bin_file_message_clear; + } + return 0; +error_bin_file_message_clear: + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_write_attr); + if (error) + printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_write failed\n"); +error_bin_file_message_write: + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_read_attr); + if (error) + printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_read failed\n"); +error_bin_file_message_read: + return error; +} + +void +arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) { + struct Scsi_Host *host = acb->host; + int error; + + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_clear_attr); + if (error) + printk(KERN_ERR "arcmsr: free sysfs mu_clear failed\n"); + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_write_attr); + if (error) + printk(KERN_ERR "arcmsr: free sysfs mu_write failed\n"); + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_read_attr); + if (error) + printk(KERN_ERR "arcmsr: free sysfss mu_read failed\n"); +} + + +static ssize_t +arcmsr_attr_host_driver_version(struct class_device *cdev, char *buf) { + return snprintf(buf, PAGE_SIZE, + "ARCMSR: %s\n", + ARCMSR_DRIVER_VERSION); +} + +static ssize_t +arcmsr_attr_host_driver_posted_cmd(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "Current commands posted: %4d\n", + atomic_read(&acb->ccboutstandingcount)); +} + +static ssize_t +arcmsr_attr_host_driver_reset(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "SCSI Host Resets: %4d\n", + acb->num_resets); +} + +static ssize_t +arcmsr_attr_host_driver_abort(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "SCSI Aborts/Timeouts: %4d\n", + acb->num_aborts); +} + +static ssize_t +arcmsr_attr_host_fw_model(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "Adapter Model: %s\n", + acb->firm_model); +} + +static ssize_t +arcmsr_attr_host_fw_version(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Firmware Version: %s\n", + acb->firm_version); +} + +static ssize_t +arcmsr_attr_host_fw_request_len(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Reguest Lenth: %4d\n", + acb->firm_request_len); +} + +static ssize_t +arcmsr_attr_host_fw_numbers_queue(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Numbers of Queue: %4d\n", + acb->firm_numbers_queue); +} + +static ssize_t +arcmsr_attr_host_fw_sdram_size(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "SDRAM Size: %4d\n", + acb->firm_sdram_size); +} + +static ssize_t +arcmsr_attr_host_fw_hd_channels(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Hard Disk Channels: %4d\n", + acb->firm_hd_channels); +} + +static CLASS_DEVICE_ATTR(host_driver_version, S_IRUGO, arcmsr_attr_host_driver_version, NULL); +static CLASS_DEVICE_ATTR(host_driver_posted_cmd, S_IRUGO, arcmsr_attr_host_driver_posted_cmd, NULL); +static CLASS_DEVICE_ATTR(host_driver_reset, S_IRUGO, arcmsr_attr_host_driver_reset, NULL); +static CLASS_DEVICE_ATTR(host_driver_abort, S_IRUGO, arcmsr_attr_host_driver_abort, NULL); +static CLASS_DEVICE_ATTR(host_fw_model, S_IRUGO, arcmsr_attr_host_fw_model, NULL); +static CLASS_DEVICE_ATTR(host_fw_version, S_IRUGO, arcmsr_attr_host_fw_version, NULL); +static CLASS_DEVICE_ATTR(host_fw_request_len, S_IRUGO, arcmsr_attr_host_fw_request_len, NULL); +static CLASS_DEVICE_ATTR(host_fw_numbers_queue, S_IRUGO, arcmsr_attr_host_fw_numbers_queue, NULL); +static CLASS_DEVICE_ATTR(host_fw_sdram_size, S_IRUGO, arcmsr_attr_host_fw_sdram_size, NULL); +static CLASS_DEVICE_ATTR(host_fw_hd_channels, S_IRUGO, arcmsr_attr_host_fw_hd_channels, NULL); + +struct class_device_attribute *arcmsr_host_attrs[] = { + &class_device_attr_host_driver_version, + &class_device_attr_host_driver_posted_cmd, + &class_device_attr_host_driver_reset, + &class_device_attr_host_driver_abort, + &class_device_attr_host_fw_model, + &class_device_attr_host_fw_version, + &class_device_attr_host_fw_request_len, + &class_device_attr_host_fw_numbers_queue, + &class_device_attr_host_fw_sdram_size, + &class_device_attr_host_fw_hd_channels, + NULL, +}; diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c new file mode 100644 index 000000000000..475f978ff8f0 --- /dev/null +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -0,0 +1,1496 @@ +/* +******************************************************************************* +** O.S : Linux +** FILE NAME : arcmsr_hba.c +** BY : Erich Chen +** Description: SCSI RAID Device Driver for +** ARECA RAID Host adapter +******************************************************************************* +** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved +** +** Web site: www.areca.com.tw +** E-mail: erich@areca.com.tw +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License version 2 as +** published by the Free Software Foundation. +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +******************************************************************************* +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +******************************************************************************* +** For history of changes, see Documentation/scsi/ChangeLog.arcmsr +** Firmware Specification, see Documentation/scsi/arcmsr_spec.txt +******************************************************************************* +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "arcmsr.h" + +MODULE_AUTHOR("Erich Chen "); +MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(ARCMSR_DRIVER_VERSION); + +static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd); +static int arcmsr_abort(struct scsi_cmnd *); +static int arcmsr_bus_reset(struct scsi_cmnd *); +static int arcmsr_bios_param(struct scsi_device *sdev, + struct block_device *bdev, sector_t capacity, int *info); +static int arcmsr_queue_command(struct scsi_cmnd * cmd, + void (*done) (struct scsi_cmnd *)); +static int arcmsr_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void arcmsr_remove(struct pci_dev *pdev); +static void arcmsr_shutdown(struct pci_dev *pdev); +static void arcmsr_iop_init(struct AdapterControlBlock *acb); +static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb); +static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb); +static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb); +static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb); +static const char *arcmsr_info(struct Scsi_Host *); +static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb); + +static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth) +{ + if (queue_depth > ARCMSR_MAX_CMD_PERLUN) + queue_depth = ARCMSR_MAX_CMD_PERLUN; + scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth); + return queue_depth; +} + +static struct scsi_host_template arcmsr_scsi_host_template = { + .module = THIS_MODULE, + .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, + .info = arcmsr_info, + .queuecommand = arcmsr_queue_command, + .eh_abort_handler = arcmsr_abort, + .eh_bus_reset_handler = arcmsr_bus_reset, + .bios_param = arcmsr_bios_param, + .change_queue_depth = arcmsr_adjust_disk_queue_depth, + .can_queue = ARCMSR_MAX_OUTSTANDING_CMD, + .this_id = ARCMSR_SCSI_INITIATOR_ID, + .sg_tablesize = ARCMSR_MAX_SG_ENTRIES, + .max_sectors = ARCMSR_MAX_XFER_SECTORS, + .cmd_per_lun = ARCMSR_MAX_CMD_PERLUN, + .use_clustering = ENABLE_CLUSTERING, + .shost_attrs = arcmsr_host_attrs, +}; + +static struct pci_device_id arcmsr_device_id_table[] = { + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1120)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1130)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1160)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1170)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1210)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1220)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1230)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1260)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1270)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1280)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1380)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1381)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1680)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1681)}, + {0, 0}, /* Terminating entry */ +}; +MODULE_DEVICE_TABLE(pci, arcmsr_device_id_table); +static struct pci_driver arcmsr_pci_driver = { + .name = "arcmsr", + .id_table = arcmsr_device_id_table, + .probe = arcmsr_probe, + .remove = arcmsr_remove, + .shutdown = arcmsr_shutdown +}; + +static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + irqreturn_t handle_state; + struct AdapterControlBlock *acb; + unsigned long flags; + + acb = (struct AdapterControlBlock *)dev_id; + + spin_lock_irqsave(acb->host->host_lock, flags); + handle_state = arcmsr_interrupt(acb); + spin_unlock_irqrestore(acb->host->host_lock, flags); + return handle_state; +} + +static int arcmsr_bios_param(struct scsi_device *sdev, + struct block_device *bdev, sector_t capacity, int *geom) +{ + int ret, heads, sectors, cylinders, total_capacity; + unsigned char *buffer;/* return copy of block device's partition table */ + + buffer = scsi_bios_ptable(bdev); + if (buffer) { + ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]); + kfree(buffer); + if (ret != -1) + return ret; + } + total_capacity = capacity; + heads = 64; + sectors = 32; + cylinders = total_capacity / (heads * sectors); + if (cylinders > 1024) { + heads = 255; + sectors = 63; + cylinders = total_capacity / (heads * sectors); + } + geom[0] = heads; + geom[1] = sectors; + geom[2] = cylinders; + return 0; +} + +static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb) +{ + struct pci_dev *pdev = acb->pdev; + struct MessageUnit __iomem *reg = acb->pmu; + u32 ccb_phyaddr_hi32; + void *dma_coherent; + dma_addr_t dma_coherent_handle, dma_addr; + struct CommandControlBlock *ccb_tmp; + int i, j; + + dma_coherent = dma_alloc_coherent(&pdev->dev, + ARCMSR_MAX_FREECCB_NUM * + sizeof (struct CommandControlBlock) + 0x20, + &dma_coherent_handle, GFP_KERNEL); + if (!dma_coherent) + return -ENOMEM; + + acb->dma_coherent = dma_coherent; + acb->dma_coherent_handle = dma_coherent_handle; + + if (((unsigned long)dma_coherent & 0x1F)) { + dma_coherent = dma_coherent + + (0x20 - ((unsigned long)dma_coherent & 0x1F)); + dma_coherent_handle = dma_coherent_handle + + (0x20 - ((unsigned long)dma_coherent_handle & 0x1F)); + } + + dma_addr = dma_coherent_handle; + ccb_tmp = (struct CommandControlBlock *)dma_coherent; + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + ccb_tmp->cdb_shifted_phyaddr = dma_addr >> 5; + ccb_tmp->acb = acb; + acb->pccb_pool[i] = ccb_tmp; + list_add_tail(&ccb_tmp->list, &acb->ccb_free_list); + dma_addr = dma_addr + sizeof (struct CommandControlBlock); + ccb_tmp++; + } + + acb->vir2phy_offset = (unsigned long)ccb_tmp - + (unsigned long)dma_addr; + for (i = 0; i < ARCMSR_MAX_TARGETID; i++) + for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++) + acb->devstate[i][j] = ARECA_RAID_GOOD; + + /* + ** here we need to tell iop 331 our ccb_tmp.HighPart + ** if ccb_tmp.HighPart is not zero + */ + ccb_phyaddr_hi32 = (uint32_t) ((dma_coherent_handle >> 16) >> 16); + if (ccb_phyaddr_hi32 != 0) { + writel(ARCMSR_SIGNATURE_SET_CONFIG, ®->message_rwbuffer[0]); + writel(ccb_phyaddr_hi32, ®->message_rwbuffer[1]); + writel(ARCMSR_INBOUND_MESG0_SET_CONFIG, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE "arcmsr%d: " + "'set ccb high part physical address' timeout\n", + acb->host->host_no); + } + + writel(readl(®->outbound_intmask) | + ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, + ®->outbound_intmask); + return 0; +} + +static int arcmsr_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct Scsi_Host *host; + struct AdapterControlBlock *acb; + uint8_t bus, dev_fun; + int error; + + error = pci_enable_device(pdev); + if (error) + goto out; + pci_set_master(pdev); + + host = scsi_host_alloc(&arcmsr_scsi_host_template, + sizeof(struct AdapterControlBlock)); + if (!host) { + error = -ENOMEM; + goto out_disable_device; + } + acb = (struct AdapterControlBlock *)host->hostdata; + memset(acb, 0, sizeof (struct AdapterControlBlock)); + + error = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + if (error) { + error = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (error) { + printk(KERN_WARNING + "scsi%d: No suitable DMA mask available\n", + host->host_no); + goto out_host_put; + } + } + bus = pdev->bus->number; + dev_fun = pdev->devfn; + acb->host = host; + acb->pdev = pdev; + host->max_sectors = ARCMSR_MAX_XFER_SECTORS; + host->max_lun = ARCMSR_MAX_TARGETLUN; + host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/ + host->max_cmd_len = 16; /*this is issue of 64bit LBA, over 2T byte*/ + host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES; + host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */ + host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN; + host->this_id = ARCMSR_SCSI_INITIATOR_ID; + host->unique_id = (bus << 8) | dev_fun; + host->irq = pdev->irq; + error = pci_request_regions(pdev, "arcmsr"); + if (error) + goto out_host_put; + + acb->pmu = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!acb->pmu) { + printk(KERN_NOTICE "arcmsr%d: memory" + " mapping region fail \n", acb->host->host_no); + goto out_release_regions; + } + acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED | + ACB_F_MESSAGE_RQBUFFER_CLEARED | + ACB_F_MESSAGE_WQBUFFER_READED); + acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER; + INIT_LIST_HEAD(&acb->ccb_free_list); + + error = arcmsr_alloc_ccb_pool(acb); + if (error) + goto out_iounmap; + + error = request_irq(pdev->irq, arcmsr_do_interrupt, + SA_INTERRUPT | SA_SHIRQ, "arcmsr", acb); + if (error) + goto out_free_ccb_pool; + + arcmsr_iop_init(acb); + pci_set_drvdata(pdev, host); + + error = scsi_add_host(host, &pdev->dev); + if (error) + goto out_free_irq; + + error = arcmsr_alloc_sysfs_attr(acb); + if (error) + goto out_free_sysfs; + + scsi_scan_host(host); + return 0; + out_free_sysfs: + out_free_irq: + free_irq(pdev->irq, acb); + out_free_ccb_pool: + arcmsr_free_ccb_pool(acb); + out_iounmap: + iounmap(acb->pmu); + out_release_regions: + pci_release_regions(pdev); + out_host_put: + scsi_host_put(host); + out_disable_device: + pci_disable_device(pdev); + out: + return error; +} + +static void arcmsr_abort_allcmd(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + + writel(ARCMSR_INBOUND_MESG0_ABORT_CMD, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait 'abort all outstanding command' timeout \n" + , acb->host->host_no); +} + +static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb) +{ + struct AdapterControlBlock *acb = ccb->acb; + struct scsi_cmnd *pcmd = ccb->pcmd; + + if (pcmd->use_sg != 0) { + struct scatterlist *sl; + + sl = (struct scatterlist *)pcmd->request_buffer; + pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction); + } + else if (pcmd->request_bufflen != 0) + pci_unmap_single(acb->pdev, + pcmd->SCp.dma_handle, + pcmd->request_bufflen, pcmd->sc_data_direction); +} + +static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag) +{ + struct AdapterControlBlock *acb = ccb->acb; + struct scsi_cmnd *pcmd = ccb->pcmd; + + arcmsr_pci_unmap_dma(ccb); + if (stand_flag == 1) + atomic_dec(&acb->ccboutstandingcount); + ccb->startdone = ARCMSR_CCB_DONE; + ccb->ccb_flags = 0; + list_add_tail(&ccb->list, &acb->ccb_free_list); + pcmd->scsi_done(pcmd); +} + +static void arcmsr_remove(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + int poll_count = 0; + + arcmsr_free_sysfs_attr(acb); + scsi_remove_host(host); + arcmsr_stop_adapter_bgrb(acb); + arcmsr_flush_adapter_cache(acb); + writel(readl(®->outbound_intmask) | + ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, + ®->outbound_intmask); + acb->acb_flags |= ACB_F_SCSISTOPADAPTER; + acb->acb_flags &= ~ACB_F_IOP_INITED; + + for (poll_count = 0; poll_count < 256; poll_count++) { + if (!atomic_read(&acb->ccboutstandingcount)) + break; + arcmsr_interrupt(acb); + msleep(25); + } + + if (atomic_read(&acb->ccboutstandingcount)) { + int i; + + arcmsr_abort_allcmd(acb); + for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++) + readl(®->outbound_queueport); + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + struct CommandControlBlock *ccb = acb->pccb_pool[i]; + if (ccb->startdone == ARCMSR_CCB_START) { + ccb->startdone = ARCMSR_CCB_ABORTED; + ccb->pcmd->result = DID_ABORT << 16; + arcmsr_ccb_complete(ccb, 1); + } + } + } + + free_irq(pdev->irq, acb); + iounmap(acb->pmu); + arcmsr_free_ccb_pool(acb); + pci_release_regions(pdev); + + scsi_host_put(host); + + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static void arcmsr_shutdown(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *)host->hostdata; + + arcmsr_stop_adapter_bgrb(acb); + arcmsr_flush_adapter_cache(acb); +} + +static int arcmsr_module_init(void) +{ + int error = 0; + + error = pci_register_driver(&arcmsr_pci_driver); + return error; +} + +static void arcmsr_module_exit(void) +{ + pci_unregister_driver(&arcmsr_pci_driver); +} +module_init(arcmsr_module_init); +module_exit(arcmsr_module_exit); + +static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + u32 orig_mask = readl(®->outbound_intmask); + + writel(orig_mask | ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, + ®->outbound_intmask); + return orig_mask; +} + +static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb, + u32 orig_mask) +{ + struct MessageUnit __iomem *reg = acb->pmu; + u32 mask; + + mask = orig_mask & ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE | + ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE); + writel(mask, ®->outbound_intmask); +} + +static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg=acb->pmu; + + writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait 'flush adapter cache' timeout \n" + , acb->host->host_no); +} + +static void arcmsr_report_sense_info(struct CommandControlBlock *ccb) +{ + struct scsi_cmnd *pcmd = ccb->pcmd; + struct SENSE_DATA *sensebuffer = (struct SENSE_DATA *)pcmd->sense_buffer; + + pcmd->result = DID_OK << 16; + if (sensebuffer) { + int sense_data_length = + sizeof (struct SENSE_DATA) < sizeof (pcmd->sense_buffer) + ? sizeof (struct SENSE_DATA) : sizeof (pcmd->sense_buffer); + memset(sensebuffer, 0, sizeof (pcmd->sense_buffer)); + memcpy(sensebuffer, ccb->arcmsr_cdb.SenseData, sense_data_length); + sensebuffer->ErrorCode = SCSI_SENSE_CURRENT_ERRORS; + sensebuffer->Valid = 1; + } +} + +static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + uint32_t Index; + uint8_t Retries = 0x00; + + do { + for (Index = 0; Index < 100; Index++) { + if (readl(®->outbound_intstatus) + & ARCMSR_MU_OUTBOUND_MESSAGE0_INT) { + writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT + , ®->outbound_intstatus); + return 0x00; + } + msleep_interruptible(10); + }/*max 1 seconds*/ + } while (Retries++ < 20);/*max 20 sec*/ + return 0xff; +} + +static void arcmsr_build_ccb(struct AdapterControlBlock *acb, + struct CommandControlBlock *ccb, struct scsi_cmnd *pcmd) +{ + struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb; + int8_t *psge = (int8_t *)&arcmsr_cdb->u; + uint32_t address_lo, address_hi; + int arccdbsize = 0x30; + + ccb->pcmd = pcmd; + memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB)); + arcmsr_cdb->Bus = 0; + arcmsr_cdb->TargetID = pcmd->device->id; + arcmsr_cdb->LUN = pcmd->device->lun; + arcmsr_cdb->Function = 1; + arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len; + arcmsr_cdb->Context = (unsigned long)arcmsr_cdb; + memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len); + if (pcmd->use_sg) { + int length, sgcount, i, cdb_sgcount = 0; + struct scatterlist *sl; + + /* Get Scatter Gather List from scsiport. */ + sl = (struct scatterlist *) pcmd->request_buffer; + sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg, + pcmd->sc_data_direction); + /* map stor port SG list to our iop SG List. */ + for (i = 0; i < sgcount; i++) { + /* Get the physical address of the current data pointer */ + length = cpu_to_le32(sg_dma_len(sl)); + address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl))); + address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl))); + if (address_hi == 0) { + struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; + + pdma_sg->address = address_lo; + pdma_sg->length = length; + psge += sizeof (struct SG32ENTRY); + arccdbsize += sizeof (struct SG32ENTRY); + } else { + struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge; + + pdma_sg->addresshigh = address_hi; + pdma_sg->address = address_lo; + pdma_sg->length = length|IS_SG64_ADDR; + psge += sizeof (struct SG64ENTRY); + arccdbsize += sizeof (struct SG64ENTRY); + } + sl++; + cdb_sgcount++; + } + arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount; + arcmsr_cdb->DataLength = pcmd->request_bufflen; + if ( arccdbsize > 256) + arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE; + } else if (pcmd->request_bufflen) { + dma_addr_t dma_addr; + dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer, + pcmd->request_bufflen, pcmd->sc_data_direction); + pcmd->SCp.dma_handle = dma_addr; + address_lo = cpu_to_le32(dma_addr_lo32(dma_addr)); + address_hi = cpu_to_le32(dma_addr_hi32(dma_addr)); + if (address_hi == 0) { + struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; + pdma_sg->address = address_lo; + pdma_sg->length = pcmd->request_bufflen; + } else { + struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge; + pdma_sg->addresshigh = address_hi; + pdma_sg->address = address_lo; + pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR; + } + arcmsr_cdb->sgcount = 1; + arcmsr_cdb->DataLength = pcmd->request_bufflen; + } + if (pcmd->sc_data_direction == DMA_TO_DEVICE ) { + arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE; + ccb->ccb_flags |= CCB_FLAG_WRITE; + } +} + +static void arcmsr_post_ccb(struct AdapterControlBlock *acb, struct CommandControlBlock *ccb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + uint32_t cdb_shifted_phyaddr = ccb->cdb_shifted_phyaddr; + struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb; + + atomic_inc(&acb->ccboutstandingcount); + ccb->startdone = ARCMSR_CCB_START; + if (arcmsr_cdb->Flags & ARCMSR_CDB_FLAG_SGL_BSIZE) + writel(cdb_shifted_phyaddr | ARCMSR_CCBPOST_FLAG_SGL_BSIZE, + ®->inbound_queueport); + else + writel(cdb_shifted_phyaddr, ®->inbound_queueport); +} + +void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct QBUFFER __iomem *pwbuffer = (struct QBUFFER __iomem *) ®->message_wbuffer; + uint8_t __iomem *iop_data = (uint8_t __iomem *) pwbuffer->data; + int32_t allxfer_len = 0; + + if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_READED) { + acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED); + while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex) + && (allxfer_len < 124)) { + writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data); + acb->wqbuf_firstindex++; + acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + allxfer_len++; + } + writel(allxfer_len, &pwbuffer->data_len); + writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK + , ®->inbound_doorbell); + } +} + +static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + + acb->acb_flags &= ~ACB_F_MSG_START_BGRB; + writel(ARCMSR_INBOUND_MESG0_STOP_BGRB, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait 'stop adapter background rebulid' timeout \n" + , acb->host->host_no); +} + +static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb) +{ + dma_free_coherent(&acb->pdev->dev, + ARCMSR_MAX_FREECCB_NUM * sizeof (struct CommandControlBlock) + 0x20, + acb->dma_coherent, + acb->dma_coherent_handle); +} + +static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t flag_ccb, outbound_intstatus, outbound_doorbell; + + outbound_intstatus = readl(®->outbound_intstatus) + & acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus); + if (outbound_intstatus & ARCMSR_MU_OUTBOUND_DOORBELL_INT) { + outbound_doorbell = readl(®->outbound_doorbell); + writel(outbound_doorbell, ®->outbound_doorbell); + if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK) { + struct QBUFFER __iomem * prbuffer = + (struct QBUFFER __iomem *) ®->message_rbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data; + int32_t my_empty_len, iop_len, rqbuf_firstindex, rqbuf_lastindex; + + rqbuf_lastindex = acb->rqbuf_lastindex; + rqbuf_firstindex = acb->rqbuf_firstindex; + iop_len = readl(&prbuffer->data_len); + my_empty_len = (rqbuf_firstindex - rqbuf_lastindex - 1) + &(ARCMSR_MAX_QBUFFER - 1); + if (my_empty_len >= iop_len) { + while (iop_len > 0) { + acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data); + acb->rqbuf_lastindex++; + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + iop_len--; + } + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } else + acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW; + } + if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_READ_OK) { + acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_READED; + if (acb->wqbuf_firstindex != acb->wqbuf_lastindex) { + struct QBUFFER __iomem * pwbuffer = + (struct QBUFFER __iomem *) ®->message_wbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *) pwbuffer->data; + int32_t allxfer_len = 0; + + acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED); + while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex) + && (allxfer_len < 124)) { + writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data); + acb->wqbuf_firstindex++; + acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + allxfer_len++; + } + writel(allxfer_len, &pwbuffer->data_len); + writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK, + ®->inbound_doorbell); + } + if (acb->wqbuf_firstindex == acb->wqbuf_lastindex) + acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_CLEARED; + } + } + if (outbound_intstatus & ARCMSR_MU_OUTBOUND_POSTQUEUE_INT) { + int id, lun; + /* + **************************************************************** + ** areca cdb command done + **************************************************************** + */ + while (1) { + if ((flag_ccb = readl(®->outbound_queueport)) == 0xFFFFFFFF) + break;/*chip FIFO no ccb for completion already*/ + /* check if command done with no error*/ + ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + + (flag_ccb << 5)); + if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) { + if (ccb->startdone == ARCMSR_CCB_ABORTED) { + struct scsi_cmnd *abortcmd=ccb->pcmd; + if (abortcmd) { + abortcmd->result |= DID_ABORT >> 16; + arcmsr_ccb_complete(ccb, 1); + printk(KERN_NOTICE + "arcmsr%d: ccb='0x%p' isr got aborted command \n" + , acb->host->host_no, ccb); + } + continue; + } + printk(KERN_NOTICE + "arcmsr%d: isr get an illegal ccb command done acb='0x%p'" + "ccb='0x%p' ccbacb='0x%p' startdone = 0x%x" + " ccboutstandingcount=%d \n" + , acb->host->host_no + , acb + , ccb + , ccb->acb + , ccb->startdone + , atomic_read(&acb->ccboutstandingcount)); + continue; + } + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { + if (acb->devstate[id][lun] == ARECA_RAID_GONE) + acb->devstate[id][lun] = ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_TIME_OUT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_ABORTED: + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + default: + printk(KERN_NOTICE + "arcmsr%d: scsi id=%d lun=%d" + " isr get command error done," + "but got unknown DeviceStatus = 0x%x \n" + , acb->host->host_no + , id + , lun + , ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + }/*drain reply FIFO*/ + } + if (!(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT)) + return IRQ_NONE; + return IRQ_HANDLED; +} + +static void arcmsr_iop_parking(struct AdapterControlBlock *acb) +{ + if (acb) { + /* stop adapter background rebuild */ + if (acb->acb_flags & ACB_F_MSG_START_BGRB) { + acb->acb_flags &= ~ACB_F_MSG_START_BGRB; + arcmsr_stop_adapter_bgrb(acb); + arcmsr_flush_adapter_cache(acb); + } + } +} + +static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CMD_MESSAGE_FIELD *pcmdmessagefld; + int retvalue = 0, transfer_len = 0; + char *buffer; + uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 | + (uint32_t ) cmd->cmnd[6] << 16 | + (uint32_t ) cmd->cmnd[7] << 8 | + (uint32_t ) cmd->cmnd[8]; + /* 4 bytes: Areca io control code */ + if (cmd->use_sg) { + struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer; + + buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + if (cmd->use_sg > 1) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + transfer_len += sg->length; + } else { + buffer = cmd->request_buffer; + transfer_len = cmd->request_bufflen; + } + if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + pcmdmessagefld = (struct CMD_MESSAGE_FIELD *) buffer; + switch(controlcode) { + case ARCMSR_MESSAGE_READ_RQBUFFER: { + unsigned long *ver_addr; + dma_addr_t buf_handle; + uint8_t *pQbuffer, *ptmpQbuffer; + int32_t allxfer_len = 0; + + ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle); + if (!ver_addr) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + ptmpQbuffer = (uint8_t *) ver_addr; + while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex) + && (allxfer_len < 1031)) { + pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex]; + memcpy(ptmpQbuffer, pQbuffer, 1); + acb->rqbuf_firstindex++; + acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + ptmpQbuffer++; + allxfer_len++; + } + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *) + ®->message_rbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data; + int32_t iop_len; + + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + iop_len = readl(&prbuffer->data_len); + while (iop_len > 0) { + acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data); + acb->rqbuf_lastindex++; + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + iop_len--; + } + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } + memcpy(pcmdmessagefld->messagedatabuffer, + (uint8_t *)ver_addr, allxfer_len); + pcmdmessagefld->cmdmessage.Length = allxfer_len; + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; + pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle); + } + break; + case ARCMSR_MESSAGE_WRITE_WQBUFFER: { + unsigned long *ver_addr; + dma_addr_t buf_handle; + int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex; + uint8_t *pQbuffer, *ptmpuserbuffer; + + ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle); + if (!ver_addr) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + ptmpuserbuffer = (uint8_t *)ver_addr; + user_len = pcmdmessagefld->cmdmessage.Length; + memcpy(ptmpuserbuffer, pcmdmessagefld->messagedatabuffer, user_len); + wqbuf_lastindex = acb->wqbuf_lastindex; + wqbuf_firstindex = acb->wqbuf_firstindex; + if (wqbuf_lastindex != wqbuf_firstindex) { + struct SENSE_DATA *sensebuffer = + (struct SENSE_DATA *)cmd->sense_buffer; + arcmsr_post_Qbuffer(acb); + /* has error report sensedata */ + sensebuffer->ErrorCode = 0x70; + sensebuffer->SenseKey = ILLEGAL_REQUEST; + sensebuffer->AdditionalSenseLength = 0x0A; + sensebuffer->AdditionalSenseCode = 0x20; + sensebuffer->Valid = 1; + retvalue = ARCMSR_MESSAGE_FAIL; + } else { + my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1) + &(ARCMSR_MAX_QBUFFER - 1); + if (my_empty_len >= user_len) { + while (user_len > 0) { + pQbuffer = + &acb->wqbuffer[acb->wqbuf_lastindex]; + memcpy(pQbuffer, ptmpuserbuffer, 1); + acb->wqbuf_lastindex++; + acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + ptmpuserbuffer++; + user_len--; + } + if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) { + acb->acb_flags &= + ~ACB_F_MESSAGE_WQBUFFER_CLEARED; + arcmsr_post_Qbuffer(acb); + } + } else { + /* has error report sensedata */ + struct SENSE_DATA *sensebuffer = + (struct SENSE_DATA *)cmd->sense_buffer; + sensebuffer->ErrorCode = 0x70; + sensebuffer->SenseKey = ILLEGAL_REQUEST; + sensebuffer->AdditionalSenseLength = 0x0A; + sensebuffer->AdditionalSenseCode = 0x20; + sensebuffer->Valid = 1; + retvalue = ARCMSR_MESSAGE_FAIL; + } + } + pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle); + } + break; + case ARCMSR_MESSAGE_CLEAR_RQBUFFER: { + uint8_t *pQbuffer = acb->rqbuffer; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } + acb->acb_flags |= ACB_F_MESSAGE_RQBUFFER_CLEARED; + acb->rqbuf_firstindex = 0; + acb->rqbuf_lastindex = 0; + memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER); + pcmdmessagefld->cmdmessage.ReturnCode = + ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_CLEAR_WQBUFFER: { + uint8_t *pQbuffer = acb->wqbuffer; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK + , ®->inbound_doorbell); + } + acb->acb_flags |= + (ACB_F_MESSAGE_WQBUFFER_CLEARED | + ACB_F_MESSAGE_WQBUFFER_READED); + acb->wqbuf_firstindex = 0; + acb->wqbuf_lastindex = 0; + memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER); + pcmdmessagefld->cmdmessage.ReturnCode = + ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_CLEAR_ALLQBUFFER: { + uint8_t *pQbuffer; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK + , ®->inbound_doorbell); + } + acb->acb_flags |= + (ACB_F_MESSAGE_WQBUFFER_CLEARED + | ACB_F_MESSAGE_RQBUFFER_CLEARED + | ACB_F_MESSAGE_WQBUFFER_READED); + acb->rqbuf_firstindex = 0; + acb->rqbuf_lastindex = 0; + acb->wqbuf_firstindex = 0; + acb->wqbuf_lastindex = 0; + pQbuffer = acb->rqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + pQbuffer = acb->wqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_RETURN_CODE_3F: { + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_3F; + } + break; + case ARCMSR_MESSAGE_SAY_HELLO: { + int8_t * hello_string = "Hello! I am ARCMSR"; + + memcpy(pcmdmessagefld->messagedatabuffer, hello_string + , (int16_t)strlen(hello_string)); + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_SAY_GOODBYE: + arcmsr_iop_parking(acb); + break; + case ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE: + arcmsr_flush_adapter_cache(acb); + break; + default: + retvalue = ARCMSR_MESSAGE_FAIL; + } + message_out: + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + kunmap_atomic(buffer - sg->offset, KM_IRQ0); + } + return retvalue; +} + +static struct CommandControlBlock *arcmsr_get_freeccb(struct AdapterControlBlock *acb) +{ + struct list_head *head = &acb->ccb_free_list; + struct CommandControlBlock *ccb = NULL; + + if (!list_empty(head)) { + ccb = list_entry(head->next, struct CommandControlBlock, list); + list_del(head->next); + } + return ccb; +} + +static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb, + struct scsi_cmnd *cmd) +{ + switch (cmd->cmnd[0]) { + case INQUIRY: { + unsigned char inqdata[36]; + char *buffer; + + if (cmd->device->lun) { + cmd->result = (DID_TIME_OUT << 16); + cmd->scsi_done(cmd); + return; + } + inqdata[0] = TYPE_PROCESSOR; + /* Periph Qualifier & Periph Dev Type */ + inqdata[1] = 0; + /* rem media bit & Dev Type Modifier */ + inqdata[2] = 0; + /* ISO,ECMA,& ANSI versions */ + inqdata[4] = 31; + /* length of additional data */ + strncpy(&inqdata[8], "Areca ", 8); + /* Vendor Identification */ + strncpy(&inqdata[16], "RAID controller ", 16); + /* Product Identification */ + strncpy(&inqdata[32], "R001", 4); /* Product Revision */ + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + } else { + buffer = cmd->request_buffer; + } + memcpy(buffer, inqdata, sizeof(inqdata)); + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + kunmap_atomic(buffer - sg->offset, KM_IRQ0); + } + cmd->scsi_done(cmd); + } + break; + case WRITE_BUFFER: + case READ_BUFFER: { + if (arcmsr_iop_message_xfer(acb, cmd)) + cmd->result = (DID_ERROR << 16); + cmd->scsi_done(cmd); + } + break; + default: + cmd->scsi_done(cmd); + } +} + +static int arcmsr_queue_command(struct scsi_cmnd *cmd, + void (* done)(struct scsi_cmnd *)) +{ + struct Scsi_Host *host = cmd->device->host; + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *) host->hostdata; + struct CommandControlBlock *ccb; + int target = cmd->device->id; + int lun = cmd->device->lun; + + cmd->scsi_done = done; + cmd->host_scribble = NULL; + cmd->result = 0; + if (acb->acb_flags & ACB_F_BUS_RESET) { + printk(KERN_NOTICE "arcmsr%d: bus reset" + " and return busy \n" + , acb->host->host_no); + return SCSI_MLQUEUE_HOST_BUSY; + } + if(target == 16) { + /* virtual device for iop message transfer */ + arcmsr_handle_virtual_command(acb, cmd); + return 0; + } + if (acb->devstate[target][lun] == ARECA_RAID_GONE) { + uint8_t block_cmd; + + block_cmd = cmd->cmnd[0] & 0x0f; + if (block_cmd == 0x08 || block_cmd == 0x0a) { + printk(KERN_NOTICE + "arcmsr%d: block 'read/write'" + "command with gone raid volume" + " Cmd=%2x, TargetId=%d, Lun=%d \n" + , acb->host->host_no + , cmd->cmnd[0] + , target, lun); + cmd->result = (DID_NO_CONNECT << 16); + cmd->scsi_done(cmd); + return 0; + } + } + if (atomic_read(&acb->ccboutstandingcount) >= + ARCMSR_MAX_OUTSTANDING_CMD) + return SCSI_MLQUEUE_HOST_BUSY; + + ccb = arcmsr_get_freeccb(acb); + if (!ccb) + return SCSI_MLQUEUE_HOST_BUSY; + arcmsr_build_ccb(acb, ccb, cmd); + arcmsr_post_ccb(acb, ccb); + return 0; +} + +static void arcmsr_get_firmware_spec(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + char *acb_firm_model = acb->firm_model; + char *acb_firm_version = acb->firm_version; + char __iomem *iop_firm_model = (char __iomem *) ®->message_rwbuffer[15]; + char __iomem *iop_firm_version = (char __iomem *) ®->message_rwbuffer[17]; + int count; + + writel(ARCMSR_INBOUND_MESG0_GET_CONFIG, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait " + "'get adapter firmware miscellaneous data' timeout \n" + , acb->host->host_no); + count = 8; + while (count) { + *acb_firm_model = readb(iop_firm_model); + acb_firm_model++; + iop_firm_model++; + count--; + } + count = 16; + while (count) { + *acb_firm_version = readb(iop_firm_version); + acb_firm_version++; + iop_firm_version++; + count--; + } + printk(KERN_INFO + "ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n" + , acb->host->host_no + , acb->firm_version); + acb->firm_request_len = readl(®->message_rwbuffer[1]); + acb->firm_numbers_queue = readl(®->message_rwbuffer[2]); + acb->firm_sdram_size = readl(®->message_rwbuffer[3]); + acb->firm_hd_channels = readl(®->message_rwbuffer[4]); +} + +static void arcmsr_polling_ccbdone(struct AdapterControlBlock *acb, + struct CommandControlBlock *poll_ccb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t flag_ccb, outbound_intstatus, poll_ccb_done = 0, poll_count = 0; + int id, lun; + + polling_ccb_retry: + poll_count++; + outbound_intstatus = readl(®->outbound_intstatus) + & acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus);/*clear interrupt*/ + while (1) { + if ((flag_ccb = readl(®->outbound_queueport)) == 0xFFFFFFFF) { + if (poll_ccb_done) + break; + else { + msleep(25); + if (poll_count > 100) + break; + goto polling_ccb_retry; + } + } + ccb = (struct CommandControlBlock *) + (acb->vir2phy_offset + (flag_ccb << 5)); + if ((ccb->acb != acb) || + (ccb->startdone != ARCMSR_CCB_START)) { + if ((ccb->startdone == ARCMSR_CCB_ABORTED) || + (ccb == poll_ccb)) { + printk(KERN_NOTICE + "arcmsr%d: scsi id=%d lun=%d ccb='0x%p'" + " poll command abort successfully \n" + , acb->host->host_no + , ccb->pcmd->device->id + , ccb->pcmd->device->lun + , ccb); + ccb->pcmd->result = DID_ABORT << 16; + arcmsr_ccb_complete(ccb, 1); + poll_ccb_done = 1; + continue; + } + printk(KERN_NOTICE + "arcmsr%d: polling get an illegal ccb" + " command done ccb='0x%p'" + "ccboutstandingcount=%d \n" + , acb->host->host_no + , ccb + , atomic_read(&acb->ccboutstandingcount)); + continue; + } + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { + if (acb->devstate[id][lun] == ARECA_RAID_GONE) + acb->devstate[id][lun] = ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_TIME_OUT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_ABORTED: + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + default: + printk(KERN_NOTICE + "arcmsr%d: scsi id=%d lun=%d" + " polling and getting command error done" + "but got unknown DeviceStatus = 0x%x \n" + , acb->host->host_no + , id + , lun + , ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + } +} + +static void arcmsr_iop_init(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + uint32_t intmask_org, mask, outbound_doorbell, firmware_state = 0; + + do { + firmware_state = readl(®->outbound_msgaddr1); + } while (!(firmware_state & ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK)); + intmask_org = readl(®->outbound_intmask) + | ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE; + arcmsr_get_firmware_spec(acb); + + acb->acb_flags |= ACB_F_MSG_START_BGRB; + writel(ARCMSR_INBOUND_MESG0_START_BGRB, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) { + printk(KERN_NOTICE "arcmsr%d: " + "wait 'start adapter background rebulid' timeout\n", + acb->host->host_no); + } + + outbound_doorbell = readl(®->outbound_doorbell); + writel(outbound_doorbell, ®->outbound_doorbell); + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, ®->inbound_doorbell); + mask = ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE + | ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE); + writel(intmask_org & mask, ®->outbound_intmask); + acb->outbound_int_enable = ~(intmask_org & mask) & 0x000000ff; + acb->acb_flags |= ACB_F_IOP_INITED; +} + +static void arcmsr_iop_reset(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t intmask_org; + int i = 0; + + if (atomic_read(&acb->ccboutstandingcount) != 0) { + /* talk to iop 331 outstanding command aborted */ + arcmsr_abort_allcmd(acb); + /* wait for 3 sec for all command aborted*/ + msleep_interruptible(3000); + /* disable all outbound interrupt */ + intmask_org = arcmsr_disable_outbound_ints(acb); + /* clear all outbound posted Q */ + for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++) + readl(®->outbound_queueport); + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + ccb = acb->pccb_pool[i]; + if ((ccb->startdone == ARCMSR_CCB_START) || + (ccb->startdone == ARCMSR_CCB_ABORTED)) { + ccb->startdone = ARCMSR_CCB_ABORTED; + ccb->pcmd->result = DID_ABORT << 16; + arcmsr_ccb_complete(ccb, 1); + } + } + /* enable all outbound interrupt */ + arcmsr_enable_outbound_ints(acb, intmask_org); + } + atomic_set(&acb->ccboutstandingcount, 0); +} + +static int arcmsr_bus_reset(struct scsi_cmnd *cmd) +{ + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *)cmd->device->host->hostdata; + int i; + + acb->num_resets++; + acb->acb_flags |= ACB_F_BUS_RESET; + for (i = 0; i < 400; i++) { + if (!atomic_read(&acb->ccboutstandingcount)) + break; + arcmsr_interrupt(acb); + msleep(25); + } + arcmsr_iop_reset(acb); + acb->acb_flags &= ~ACB_F_BUS_RESET; + return SUCCESS; +} + +static void arcmsr_abort_one_cmd(struct AdapterControlBlock *acb, + struct CommandControlBlock *ccb) +{ + u32 intmask; + + ccb->startdone = ARCMSR_CCB_ABORTED; + + /* + ** Wait for 3 sec for all command done. + */ + msleep_interruptible(3000); + + intmask = arcmsr_disable_outbound_ints(acb); + arcmsr_polling_ccbdone(acb, ccb); + arcmsr_enable_outbound_ints(acb, intmask); +} + +static int arcmsr_abort(struct scsi_cmnd *cmd) +{ + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *)cmd->device->host->hostdata; + int i = 0; + + printk(KERN_NOTICE + "arcmsr%d: abort device command of scsi id=%d lun=%d \n", + acb->host->host_no, cmd->device->id, cmd->device->lun); + acb->num_aborts++; + + /* + ************************************************ + ** the all interrupt service routine is locked + ** we need to handle it as soon as possible and exit + ************************************************ + */ + if (!atomic_read(&acb->ccboutstandingcount)) + return SUCCESS; + + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + struct CommandControlBlock *ccb = acb->pccb_pool[i]; + if (ccb->startdone == ARCMSR_CCB_START && ccb->pcmd == cmd) { + arcmsr_abort_one_cmd(acb, ccb); + break; + } + } + + return SUCCESS; +} + +static const char *arcmsr_info(struct Scsi_Host *host) +{ + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *) host->hostdata; + static char buf[256]; + char *type; + int raid6 = 1; + + switch (acb->pdev->device) { + case PCI_DEVICE_ID_ARECA_1110: + case PCI_DEVICE_ID_ARECA_1210: + raid6 = 0; + /*FALLTHRU*/ + case PCI_DEVICE_ID_ARECA_1120: + case PCI_DEVICE_ID_ARECA_1130: + case PCI_DEVICE_ID_ARECA_1160: + case PCI_DEVICE_ID_ARECA_1170: + case PCI_DEVICE_ID_ARECA_1220: + case PCI_DEVICE_ID_ARECA_1230: + case PCI_DEVICE_ID_ARECA_1260: + case PCI_DEVICE_ID_ARECA_1270: + case PCI_DEVICE_ID_ARECA_1280: + type = "SATA"; + break; + case PCI_DEVICE_ID_ARECA_1380: + case PCI_DEVICE_ID_ARECA_1381: + case PCI_DEVICE_ID_ARECA_1680: + case PCI_DEVICE_ID_ARECA_1681: + type = "SAS"; + break; + default: + type = "X-TYPE"; + break; + } + sprintf(buf, "Areca %s Host Adapter RAID Controller%s\n %s", + type, raid6 ? "( RAID6 capable)" : "", + ARCMSR_DRIVER_VERSION); + return buf; +} + + diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c09396d2c77b..df7b62676d87 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2004,6 +2004,23 @@ #define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea #define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb +#define PCI_VENDOR_ID_ARECA 0x17d3 +#define PCI_DEVICE_ID_ARECA_1110 0x1110 +#define PCI_DEVICE_ID_ARECA_1120 0x1120 +#define PCI_DEVICE_ID_ARECA_1130 0x1130 +#define PCI_DEVICE_ID_ARECA_1160 0x1160 +#define PCI_DEVICE_ID_ARECA_1170 0x1170 +#define PCI_DEVICE_ID_ARECA_1210 0x1210 +#define PCI_DEVICE_ID_ARECA_1220 0x1220 +#define PCI_DEVICE_ID_ARECA_1230 0x1230 +#define PCI_DEVICE_ID_ARECA_1260 0x1260 +#define PCI_DEVICE_ID_ARECA_1270 0x1270 +#define PCI_DEVICE_ID_ARECA_1280 0x1280 +#define PCI_DEVICE_ID_ARECA_1380 0x1380 +#define PCI_DEVICE_ID_ARECA_1381 0x1381 +#define PCI_DEVICE_ID_ARECA_1680 0x1680 +#define PCI_DEVICE_ID_ARECA_1681 0x1681 + #define PCI_VENDOR_ID_S2IO 0x17d5 #define PCI_DEVICE_ID_S2IO_WIN 0x5731 #define PCI_DEVICE_ID_S2IO_UNI 0x5831 -- cgit v1.2.3-71-gd317 From 3c5100c1c40cc5e27b4da4a736994c76d93392a0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Jul 2006 16:58:33 +0900 Subject: [PATCH] libata: cosmetic changes to PM functions Unify pm_message_t argument to the new-style @mesg. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 12 ++++++------ drivers/scsi/libata-scsi.c | 8 ++++---- include/linux/libata.h | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 386e5f21e191..66feebdb01c3 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5767,11 +5767,11 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) return (tmp == bits->val) ? 1 : 0; } -void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t state) +void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg) { pci_save_state(pdev); - if (state.event == PM_EVENT_SUSPEND) { + if (mesg.event == PM_EVENT_SUSPEND) { pci_disable_device(pdev); pci_set_power_state(pdev, PCI_D3hot); } @@ -5785,24 +5785,24 @@ void ata_pci_device_do_resume(struct pci_dev *pdev) pci_set_master(pdev); } -int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state) +int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) { struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); int rc = 0; - rc = ata_host_set_suspend(host_set, state); + rc = ata_host_set_suspend(host_set, mesg); if (rc) return rc; if (host_set->next) { - rc = ata_host_set_suspend(host_set->next, state); + rc = ata_host_set_suspend(host_set->next, mesg); if (rc) { ata_host_set_resume(host_set); return rc; } } - ata_pci_device_do_suspend(pdev, state); + ata_pci_device_do_suspend(pdev, mesg); return 0; } diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 7ced41ecde86..1638e57028cb 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -400,7 +400,7 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf) /** * ata_scsi_device_suspend - suspend ATA device associated with sdev * @sdev: the SCSI device to suspend - * @state: target power management state + * @mesg: target power management message * * Request suspend EH action on the ATA device associated with * @sdev and wait for the operation to complete. @@ -411,7 +411,7 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf) * RETURNS: * 0 on success, -errno otherwise. */ -int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state) +int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t mesg) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = ata_scsi_find_dev(ap, sdev); @@ -438,7 +438,7 @@ int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state) /* request suspend */ action = ATA_EH_SUSPEND; - if (state.event != PM_EVENT_SUSPEND) + if (mesg.event != PM_EVENT_SUSPEND) action |= ATA_EH_PM_FREEZE; ap->eh_info.dev_action[dev->devno] |= action; ap->eh_info.flags |= ATA_EHI_QUIET; @@ -463,7 +463,7 @@ int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state) spin_unlock_irqrestore(ap->lock, flags); out: if (rc == 0) - sdev->sdev_gendev.power.power_state = state; + sdev->sdev_gendev.power.power_state = mesg; return rc; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 66c3100c2b94..b9416708bba2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -676,9 +676,9 @@ extern void ata_std_ports(struct ata_ioports *ioaddr); extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, unsigned int n_ports); extern void ata_pci_remove_one (struct pci_dev *pdev); -extern void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t state); +extern void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg); extern void ata_pci_device_do_resume(struct pci_dev *pdev); -extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state); +extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); extern int ata_pci_device_resume(struct pci_dev *pdev); extern int ata_pci_clear_simplex(struct pci_dev *pdev); #endif /* CONFIG_PCI */ @@ -697,7 +697,7 @@ extern int sata_scr_write_flush(struct ata_port *ap, int reg, u32 val); extern int ata_port_online(struct ata_port *ap); extern int ata_port_offline(struct ata_port *ap); extern int ata_scsi_device_resume(struct scsi_device *); -extern int ata_scsi_device_suspend(struct scsi_device *, pm_message_t state); +extern int ata_scsi_device_suspend(struct scsi_device *, pm_message_t mesg); extern int ata_host_set_suspend(struct ata_host_set *host_set, pm_message_t mesg); extern void ata_host_set_resume(struct ata_host_set *host_set); -- cgit v1.2.3-71-gd317 From b03732f006bd1ecee32587ec8235c41af5ad905f Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 7 Aug 2006 14:27:10 -0500 Subject: [PATCH] libata: Add ata_host_set_init Add ata_host_set_init in preparation for SAS attached SATA. Signed-off-by: Brian King Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 28 ++++++++++++++++++++++++---- include/linux/libata.h | 2 ++ 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 351544d3653a..886440b128a5 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5350,6 +5350,28 @@ err_out: return NULL; } +/** + * ata_sas_host_init - Initialize a host_set struct + * @host_set: host_set to initialize + * @dev: device host_set is attached to + * @flags: host_set flags + * @ops: port_ops + * + * LOCKING: + * PCI/etc. bus probe sem. + * + */ + +void ata_host_set_init(struct ata_host_set *host_set, + struct device *dev, unsigned long flags, + const struct ata_port_operations *ops) +{ + spin_lock_init(&host_set->lock); + host_set->dev = dev; + host_set->flags = flags; + host_set->ops = ops; +} + /** * ata_device_add - Register hardware device with ATA and SCSI layers * @ent: Probe information describing hardware device to be registered @@ -5381,15 +5403,12 @@ int ata_device_add(const struct ata_probe_ent *ent) (ent->n_ports * sizeof(void *)), GFP_KERNEL); if (!host_set) return 0; - spin_lock_init(&host_set->lock); - host_set->dev = dev; + ata_host_set_init(host_set, dev, ent->host_set_flags, ent->port_ops); host_set->n_ports = ent->n_ports; host_set->irq = ent->irq; host_set->mmio_base = ent->mmio_base; host_set->private_data = ent->private_data; - host_set->ops = ent->port_ops; - host_set->flags = ent->host_set_flags; /* register each port bound to this device */ for (i = 0; i < ent->n_ports; i++) { @@ -5908,6 +5927,7 @@ EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug); EXPORT_SYMBOL_GPL(sata_deb_timing_long); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_std_ports); +EXPORT_SYMBOL_GPL(ata_host_set_init); EXPORT_SYMBOL_GPL(ata_device_add); EXPORT_SYMBOL_GPL(ata_port_detach); EXPORT_SYMBOL_GPL(ata_host_set_remove); diff --git a/include/linux/libata.h b/include/linux/libata.h index b9416708bba2..be15ef5d8d85 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -684,6 +684,8 @@ extern int ata_pci_clear_simplex(struct pci_dev *pdev); #endif /* CONFIG_PCI */ extern int ata_device_add(const struct ata_probe_ent *ent); extern void ata_port_detach(struct ata_port *ap); +extern void ata_host_set_init(struct ata_host_set *, struct device *, + unsigned long, const struct ata_port_operations *); extern void ata_host_set_remove(struct ata_host_set *host_set); extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); -- cgit v1.2.3-71-gd317 From 80289167fd3ebaeb7b2641e69cbec44b61165fe7 Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 7 Aug 2006 14:27:31 -0500 Subject: [PATCH] libata: Add support for SATA attachment to SAS adapters The following patch enhances libata to allow SAS device drivers to utilize libata to talk to SATA devices. It introduces some new APIs which allow libata to be used without allocating a virtual scsi host. New APIs: ata_sas_port_alloc - Allocate an ata_port ata_sas_port_init - Initialize an ata_port (probe device, etc) ata_sas_port_destroy - Free an ata_port allocated by ata_sas_port_alloc ata_sas_slave_configure - configure scsi device ata_sas_queuecmd - queue a scsi command, similar to ata_scsi_queuecomand These new APIs can be used either directly by a SAS LLDD or could be used by the SAS transport class. Possible usage for a SAS LLDD would be: scsi_scan_host target_alloc ata_sas_port_alloc slave_alloc ata_sas_port_init slave_configure ata_sas_slave_configure Commands received by the LLDD for SATA devices would call ata_sas_queuecmd. Device teardown would occur with: slave_destroy port_disable target_destroy ata_sas_port_destroy Signed-off-by: Brian King Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 2 +- drivers/scsi/libata-scsi.c | 149 +++++++++++++++++++++++++++++++++++++++++++++ drivers/scsi/libata.h | 1 + include/linux/libata.h | 9 +++ 4 files changed, 160 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index dc35cccd2898..83d93fc0751b 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1528,7 +1528,7 @@ err_out_nosup: * Zero on success, negative errno otherwise. */ -static int ata_bus_probe(struct ata_port *ap) +int ata_bus_probe(struct ata_port *ap) { unsigned int classes[ATA_MAX_DEVICES]; int tries[ATA_MAX_DEVICES]; diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 1638e57028cb..37ec02661433 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -3158,3 +3158,152 @@ void ata_scsi_dev_rescan(void *data) scsi_rescan_device(&(dev->sdev->sdev_gendev)); } } + +/** + * ata_sas_port_alloc - Allocate port for a SAS attached SATA device + * @pdev: PCI device that the scsi device is attached to + * @port_info: Information from low-level host driver + * @host: SCSI host that the scsi device is attached to + * + * LOCKING: + * PCI/etc. bus probe sem. + * + * RETURNS: + * ata_port pointer on success / NULL on failure. + */ + +struct ata_port *ata_sas_port_alloc(struct ata_host_set *host_set, + struct ata_port_info *port_info, + struct Scsi_Host *host) +{ + struct ata_port *ap = kzalloc(sizeof(*ap), GFP_KERNEL); + struct ata_probe_ent *ent; + + if (!ap) + return NULL; + + ent = ata_probe_ent_alloc(host_set->dev, port_info); + if (!ent) { + kfree(ap); + return NULL; + } + + ata_port_init(ap, host_set, ent, 0); + ap->lock = host->host_lock; + kfree(ent); + return ap; +} +EXPORT_SYMBOL_GPL(ata_sas_port_alloc); + +/** + * ata_sas_port_start - Set port up for dma. + * @ap: Port to initialize + * + * Called just after data structures for each port are + * initialized. Allocates DMA pad. + * + * May be used as the port_start() entry in ata_port_operations. + * + * LOCKING: + * Inherited from caller. + */ +int ata_sas_port_start(struct ata_port *ap) +{ + return ata_pad_alloc(ap, ap->dev); +} +EXPORT_SYMBOL_GPL(ata_sas_port_start); + +/** + * ata_port_stop - Undo ata_sas_port_start() + * @ap: Port to shut down + * + * Frees the DMA pad. + * + * May be used as the port_stop() entry in ata_port_operations. + * + * LOCKING: + * Inherited from caller. + */ + +void ata_sas_port_stop(struct ata_port *ap) +{ + ata_pad_free(ap, ap->dev); +} +EXPORT_SYMBOL_GPL(ata_sas_port_stop); + +/** + * ata_sas_port_init - Initialize a SATA device + * @ap: SATA port to initialize + * + * LOCKING: + * PCI/etc. bus probe sem. + * + * RETURNS: + * Zero on success, non-zero on error. + */ + +int ata_sas_port_init(struct ata_port *ap) +{ + int rc = ap->ops->port_start(ap); + + if (!rc) + rc = ata_bus_probe(ap); + + return rc; +} +EXPORT_SYMBOL_GPL(ata_sas_port_init); + +/** + * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc + * @ap: SATA port to destroy + * + */ + +void ata_sas_port_destroy(struct ata_port *ap) +{ + ap->ops->port_stop(ap); + kfree(ap); +} +EXPORT_SYMBOL_GPL(ata_sas_port_destroy); + +/** + * ata_sas_slave_configure - Default slave_config routine for libata devices + * @sdev: SCSI device to configure + * @ap: ATA port to which SCSI device is attached + * + * RETURNS: + * Zero. + */ + +int ata_sas_slave_configure(struct scsi_device *sdev, struct ata_port *ap) +{ + ata_scsi_sdev_config(sdev); + ata_scsi_dev_config(sdev, ap->device); + return 0; +} +EXPORT_SYMBOL_GPL(ata_sas_slave_configure); + +/** + * ata_sas_queuecmd - Issue SCSI cdb to libata-managed device + * @cmd: SCSI command to be sent + * @done: Completion function, called when command is complete + * @ap: ATA port to which the command is being sent + * + * RETURNS: + * Zero. + */ + +int ata_sas_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), + struct ata_port *ap) +{ + ata_scsi_dump_cdb(ap, cmd); + + if (likely(ata_scsi_dev_enabled(ap->device))) + __ata_scsi_queuecmd(cmd, done, ap->device); + else { + cmd->result = (DID_BAD_TARGET << 16); + done(cmd); + } + return 0; +} +EXPORT_SYMBOL_GPL(ata_sas_queuecmd); diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 0b7a37c2785d..d4a4f82360ec 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -111,6 +111,7 @@ extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, u8 *rbuf, unsigned int buflen)); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); extern void ata_scsi_dev_rescan(void *data); +extern int ata_bus_probe(struct ata_port *ap); /* libata-eh.c */ extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); diff --git a/include/linux/libata.h b/include/linux/libata.h index be15ef5d8d85..cf5eb1da3e32 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -691,6 +691,15 @@ extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); extern int ata_scsi_release(struct Scsi_Host *host); +extern void ata_sas_port_destroy(struct ata_port *); +extern struct ata_port *ata_sas_port_alloc(struct ata_host_set *, + struct ata_port_info *, struct Scsi_Host *); +extern int ata_sas_port_init(struct ata_port *); +extern int ata_sas_port_start(struct ata_port *ap); +extern void ata_sas_port_stop(struct ata_port *ap); +extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); +extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), + struct ata_port *ap); extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern int sata_scr_valid(struct ata_port *ap); extern int sata_scr_read(struct ata_port *ap, int reg, u32 *val); -- cgit v1.2.3-71-gd317 From 2ec7df0457b710d9201f211dbccdbecf0ad38b7e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 10 Aug 2006 16:59:10 +0900 Subject: [PATCH] libata: rework legacy handling to remove much of the cruft Kill host_set->next Fix simplex support Allow per platform setting of IDE legacy bases Some of this can be tidied further later on, in particular all the legacy port gunge belongs as a PCI quirk/PCI header decode to understand the special legacy IDE rules in the PCI spec. Longer term Jeff also wants to move the request_irq/free_irq out of core which will make this even cleaner. tj: folded in three followup patches - ata_piix-fix, broken-arch-fix and fix-new-legacy-handling, and separated per-dev xfermask into separate patch preceding this one. Folded in fixes are... * ata_piix-fix: fix build failure due to host_set->next removal * broken-arch-fix: add missing include/asm-*/libata-portmap.h * fix-new-legacy-handling: * In ata_pci_init_legacy_port(), probe_num was incorrectly incremented during initialization of the secondary port and probe_ent->n_ports was incorrectly fixed to 1. * Both legacy ports ended up having the same hard_port_no. * When printing port information, both legacy ports printed the first irq. Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Tejun Heo --- drivers/scsi/ata_piix.c | 2 - drivers/scsi/libata-bmdma.c | 135 +++++++++++++++++------------------ drivers/scsi/libata-core.c | 59 +++++++++------ include/asm-alpha/libata-portmap.h | 1 + include/asm-generic/libata-portmap.h | 12 ++++ include/asm-i386/libata-portmap.h | 1 + include/asm-ia64/libata-portmap.h | 1 + include/asm-powerpc/libata-portmap.h | 1 + include/asm-sparc/libata-portmap.h | 1 + include/asm-sparc64/libata-portmap.h | 1 + include/asm-x86_64/libata-portmap.h | 1 + include/linux/libata.h | 5 +- 12 files changed, 123 insertions(+), 97 deletions(-) create mode 100644 include/asm-alpha/libata-portmap.h create mode 100644 include/asm-generic/libata-portmap.h create mode 100644 include/asm-i386/libata-portmap.h create mode 100644 include/asm-ia64/libata-portmap.h create mode 100644 include/asm-powerpc/libata-portmap.h create mode 100644 include/asm-sparc/libata-portmap.h create mode 100644 include/asm-sparc64/libata-portmap.h create mode 100644 include/asm-x86_64/libata-portmap.h (limited to 'include/linux') diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 5e8afc876980..40ecab5793e3 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -932,8 +932,6 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) static void piix_host_stop(struct ata_host_set *host_set) { - if (host_set->next == NULL) - kfree(host_set->private_data); ata_host_stop(host_set); } diff --git a/drivers/scsi/libata-bmdma.c b/drivers/scsi/libata-bmdma.c index 3268cf5375ea..e694f6075c3b 100644 --- a/drivers/scsi/libata-bmdma.c +++ b/drivers/scsi/libata-bmdma.c @@ -854,7 +854,7 @@ ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int if (bmdma) { bmdma += 8; if(inb(bmdma + 2) & 0x80) - probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; probe_ent->port[p].bmdma_addr = bmdma; } ata_std_ports(&probe_ent->port[p]); @@ -867,44 +867,55 @@ ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, - struct ata_port_info *port, int port_num) + struct ata_port_info **port, int port_mask) { struct ata_probe_ent *probe_ent; - unsigned long bmdma; + unsigned long bmdma = pci_resource_start(pdev, 4); - probe_ent = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port); + int port_num = 0; + + probe_ent = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]); if (!probe_ent) return NULL; probe_ent->legacy_mode = 1; - probe_ent->n_ports = 1; - probe_ent->hard_port_no = port_num; - probe_ent->private_data = port->private_data; - - switch(port_num) - { - case 0: - probe_ent->irq = 14; - probe_ent->port[0].cmd_addr = 0x1f0; - probe_ent->port[0].altstatus_addr = - probe_ent->port[0].ctl_addr = 0x3f6; - break; - case 1: + probe_ent->hard_port_no = 0; + probe_ent->private_data = port[0]->private_data; + + if (port_mask & ATA_PORT_PRIMARY) { + probe_ent->irq = 14; + probe_ent->port[port_num].cmd_addr = ATA_PRIMARY_CMD; + probe_ent->port[port_num].altstatus_addr = + probe_ent->port[port_num].ctl_addr = ATA_PRIMARY_CTL; + if (bmdma) { + probe_ent->port[0].bmdma_addr = bmdma; + if (inb(bmdma + 2) & 0x80) + probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + } + ata_std_ports(&probe_ent->port[port_num]); + port_num ++; + } + if (port_mask & ATA_PORT_SECONDARY) { + if (port_num == 1) + probe_ent->irq2 = 15; + else { + /* Secondary only. IRQ 15 only and "first" port is port 1 */ probe_ent->irq = 15; - probe_ent->port[0].cmd_addr = 0x170; - probe_ent->port[0].altstatus_addr = - probe_ent->port[0].ctl_addr = 0x376; - break; + probe_ent->hard_port_no = 1; + } + probe_ent->port[port_num].cmd_addr = ATA_SECONDARY_CMD; + probe_ent->port[port_num].altstatus_addr = + probe_ent->port[port_num].ctl_addr = ATA_SECONDARY_CTL; + if (bmdma) { + probe_ent->port[port_num].bmdma_addr = bmdma + 8; + if (inb(bmdma + 10) & 0x80) + probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + } + ata_std_ports(&probe_ent->port[port_num]); + port_num ++; } - bmdma = pci_resource_start(pdev, 4); - if (bmdma != 0) { - bmdma += 8 * port_num; - probe_ent->port[0].bmdma_addr = bmdma; - if (inb(bmdma + 2) & 0x80) - probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; - } - ata_std_ports(&probe_ent->port[0]); + probe_ent->n_ports = port_num; return probe_ent; } @@ -924,6 +935,10 @@ static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, * regions, sets the dma mask, enables bus master mode, and calls * ata_device_add() * + * ASSUMPTION: + * Nobody makes a single channel controller that appears solely as + * the secondary legacy port on PCI. + * * LOCKING: * Inherited from PCI layer (may sleep). * @@ -934,7 +949,7 @@ static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, unsigned int n_ports) { - struct ata_probe_ent *probe_ent = NULL, *probe_ent2 = NULL; + struct ata_probe_ent *probe_ent = NULL; struct ata_port_info *port[2]; u8 tmp8, mask; unsigned int legacy_mode = 0; @@ -983,35 +998,34 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, goto err_out; } - /* FIXME: Should use platform specific mappers for legacy port ranges */ if (legacy_mode) { - if (!request_region(0x1f0, 8, "libata")) { + if (!request_region(ATA_PRIMARY_CMD, 8, "libata")) { struct resource *conflict, res; - res.start = 0x1f0; - res.end = 0x1f0 + 8 - 1; + res.start = ATA_PRIMARY_CMD; + res.end = ATA_PRIMARY_CMD + 8 - 1; conflict = ____request_resource(&ioport_resource, &res); if (!strcmp(conflict->name, "libata")) - legacy_mode |= (1 << 0); + legacy_mode |= ATA_PORT_PRIMARY; else { disable_dev_on_err = 0; - printk(KERN_WARNING "ata: 0x1f0 IDE port busy\n"); + printk(KERN_WARNING "ata: 0x%0X IDE port busy\n", ATA_PRIMARY_CMD); } } else - legacy_mode |= (1 << 0); + legacy_mode |= ATA_PORT_PRIMARY; - if (!request_region(0x170, 8, "libata")) { + if (!request_region(ATA_SECONDARY_CMD, 8, "libata")) { struct resource *conflict, res; - res.start = 0x170; - res.end = 0x170 + 8 - 1; + res.start = ATA_SECONDARY_CMD; + res.end = ATA_SECONDARY_CMD + 8 - 1; conflict = ____request_resource(&ioport_resource, &res); if (!strcmp(conflict->name, "libata")) - legacy_mode |= (1 << 1); + legacy_mode |= ATA_PORT_SECONDARY; else { disable_dev_on_err = 0; - printk(KERN_WARNING "ata: 0x170 IDE port busy\n"); + printk(KERN_WARNING "ata: 0x%X IDE port busy\n", ATA_SECONDARY_CMD); } } else - legacy_mode |= (1 << 1); + legacy_mode |= ATA_PORT_SECONDARY; } /* we have legacy mode, but all ports are unavailable */ @@ -1029,17 +1043,14 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, goto err_out_regions; if (legacy_mode) { - if (legacy_mode & (1 << 0)) - probe_ent = ata_pci_init_legacy_port(pdev, port[0], 0); - if (legacy_mode & (1 << 1)) - probe_ent2 = ata_pci_init_legacy_port(pdev, port[1], 1); + probe_ent = ata_pci_init_legacy_port(pdev, port, legacy_mode); } else { if (n_ports == 2) probe_ent = ata_pci_init_native_mode(pdev, port, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); else probe_ent = ata_pci_init_native_mode(pdev, port, ATA_PORT_PRIMARY); } - if (!probe_ent && !probe_ent2) { + if (!probe_ent) { rc = -ENOMEM; goto err_out_regions; } @@ -1047,35 +1058,17 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, pci_set_master(pdev); /* FIXME: check ata_device_add return */ - if (legacy_mode) { - struct device *dev = &pdev->dev; - struct ata_host_set *host_set = NULL; - - if (legacy_mode & (1 << 0)) { - ata_device_add(probe_ent); - host_set = dev_get_drvdata(dev); - } - - if (legacy_mode & (1 << 1)) { - ata_device_add(probe_ent2); - if (host_set) { - host_set->next = dev_get_drvdata(dev); - dev_set_drvdata(dev, host_set); - } - } - } else - ata_device_add(probe_ent); + ata_device_add(probe_ent); kfree(probe_ent); - kfree(probe_ent2); return 0; err_out_regions: - if (legacy_mode & (1 << 0)) - release_region(0x1f0, 8); - if (legacy_mode & (1 << 1)) - release_region(0x170, 8); + if (legacy_mode & ATA_PORT_PRIMARY) + release_region(ATA_PRIMARY_CMD, 8); + if (legacy_mode & ATA_PORT_SECONDARY) + release_region(ATA_SECONDARY_CMD, 8); pci_release_regions(pdev); err_out: if (disable_dev_on_err) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 4e6c2e8ac0f6..3634279d896d 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5223,8 +5223,9 @@ void ata_port_init(struct ata_port *ap, struct ata_host_set *host_set, ap->host_set = host_set; ap->dev = ent->dev; ap->port_no = port_no; - ap->hard_port_no = - ent->legacy_mode ? ent->hard_port_no : port_no; + ap->hard_port_no = port_no; + if (ent->legacy_mode) + ap->hard_port_no += ent->hard_port_no; ap->pio_mask = ent->pio_mask; ap->mwdma_mask = ent->mwdma_mask; ap->udma_mask = ent->udma_mask; @@ -5400,6 +5401,7 @@ int ata_device_add(const struct ata_probe_ent *ent) ata_host_set_init(host_set, dev, ent->host_set_flags, ent->port_ops); host_set->n_ports = ent->n_ports; host_set->irq = ent->irq; + host_set->irq2 = ent->irq2; host_set->mmio_base = ent->mmio_base; host_set->private_data = ent->private_data; @@ -5407,11 +5409,16 @@ int ata_device_add(const struct ata_probe_ent *ent) for (i = 0; i < host_set->n_ports; i++) { struct ata_port *ap; unsigned long xfer_mode_mask; + int irq_line = ent->irq; ap = ata_port_add(ent, host_set, i); if (!ap) goto err_out; + /* Report the secondary IRQ for second channel legacy */ + if (i == 1 && ent->irq2) + irq_line = ent->irq2; + host_set->ports[i] = ap; xfer_mode_mask =(ap->udma_mask << ATA_SHIFT_UDMA) | (ap->mwdma_mask << ATA_SHIFT_MWDMA) | @@ -5419,20 +5426,20 @@ int ata_device_add(const struct ata_probe_ent *ent) /* print per-port info to dmesg */ ata_port_printk(ap, KERN_INFO, "%cATA max %s cmd 0x%lX " - "ctl 0x%lX bmdma 0x%lX irq %lu\n", + "ctl 0x%lX bmdma 0x%lX irq %d\n", ap->flags & ATA_FLAG_SATA ? 'S' : 'P', ata_mode_string(xfer_mode_mask), ap->ioaddr.cmd_addr, ap->ioaddr.ctl_addr, ap->ioaddr.bmdma_addr, - ent->irq); + irq_line); ata_chk_status(ap); host_set->ops->irq_clear(ap); ata_eh_freeze_port(ap); /* freeze port before requesting IRQ */ } - /* obtain irq, that is shared between channels */ + /* obtain irq, that may be shared between channels */ rc = request_irq(ent->irq, ent->port_ops->irq_handler, ent->irq_flags, DRV_NAME, host_set); if (rc) { @@ -5441,6 +5448,21 @@ int ata_device_add(const struct ata_probe_ent *ent) goto err_out; } + /* do we have a second IRQ for the other channel, eg legacy mode */ + if (ent->irq2) { + /* We will get weird core code crashes later if this is true + so trap it now */ + BUG_ON(ent->irq == ent->irq2); + + rc = request_irq(ent->irq2, ent->port_ops->irq_handler, ent->irq_flags, + DRV_NAME, host_set); + if (rc) { + dev_printk(KERN_ERR, dev, "irq %lu request failed: %d\n", + ent->irq2, rc); + goto err_out_free_irq; + } + } + /* perform each probe synchronously */ DPRINTK("probe begin\n"); for (i = 0; i < host_set->n_ports; i++) { @@ -5514,6 +5536,8 @@ int ata_device_add(const struct ata_probe_ent *ent) VPRINTK("EXIT, returning %u\n", ent->n_ports); return ent->n_ports; /* success */ +err_out_free_irq: + free_irq(ent->irq, host_set); err_out: for (i = 0; i < host_set->n_ports; i++) { struct ata_port *ap = host_set->ports[i]; @@ -5605,6 +5629,8 @@ void ata_host_set_remove(struct ata_host_set *host_set) ata_port_detach(host_set->ports[i]); free_irq(host_set->irq, host_set); + if (host_set->irq2) + free_irq(host_set->irq2, host_set); for (i = 0; i < host_set->n_ports; i++) { struct ata_port *ap = host_set->ports[i]; @@ -5614,10 +5640,11 @@ void ata_host_set_remove(struct ata_host_set *host_set) if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) { struct ata_ioports *ioaddr = &ap->ioaddr; - if (ioaddr->cmd_addr == 0x1f0) - release_region(0x1f0, 8); - else if (ioaddr->cmd_addr == 0x170) - release_region(0x170, 8); + /* FIXME: Add -ac IDE pci mods to remove these special cases */ + if (ioaddr->cmd_addr == ATA_PRIMARY_CMD) + release_region(ATA_PRIMARY_CMD, 8); + else if (ioaddr->cmd_addr == ATA_SECONDARY_CMD) + release_region(ATA_SECONDARY_CMD, 8); } scsi_host_put(ap->host); @@ -5735,11 +5762,8 @@ void ata_pci_remove_one (struct pci_dev *pdev) { struct device *dev = pci_dev_to_dev(pdev); struct ata_host_set *host_set = dev_get_drvdata(dev); - struct ata_host_set *host_set2 = host_set->next; ata_host_set_remove(host_set); - if (host_set2) - ata_host_set_remove(host_set2); pci_release_regions(pdev); pci_disable_device(pdev); @@ -5807,14 +5831,6 @@ int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) if (rc) return rc; - if (host_set->next) { - rc = ata_host_set_suspend(host_set->next, mesg); - if (rc) { - ata_host_set_resume(host_set); - return rc; - } - } - ata_pci_device_do_suspend(pdev, mesg); return 0; @@ -5826,9 +5842,6 @@ int ata_pci_device_resume(struct pci_dev *pdev) ata_pci_device_do_resume(pdev); ata_host_set_resume(host_set); - if (host_set->next) - ata_host_set_resume(host_set->next); - return 0; } #endif /* CONFIG_PCI */ diff --git a/include/asm-alpha/libata-portmap.h b/include/asm-alpha/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-alpha/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-generic/libata-portmap.h b/include/asm-generic/libata-portmap.h new file mode 100644 index 000000000000..9202fd02d5be --- /dev/null +++ b/include/asm-generic/libata-portmap.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_LIBATA_PORTMAP_H +#define __ASM_GENERIC_LIBATA_PORTMAP_H + +#define ATA_PRIMARY_CMD 0x1F0 +#define ATA_PRIMARY_CTL 0x3F6 +#define ATA_PRIMARY_IRQ 14 + +#define ATA_SECONDARY_CMD 0x170 +#define ATA_SECONDARY_CTL 0x376 +#define ATA_SECONDARY_IRQ 15 + +#endif diff --git a/include/asm-i386/libata-portmap.h b/include/asm-i386/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-i386/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-ia64/libata-portmap.h b/include/asm-ia64/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-ia64/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/libata-portmap.h b/include/asm-powerpc/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-powerpc/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-sparc/libata-portmap.h b/include/asm-sparc/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-sparc/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-sparc64/libata-portmap.h b/include/asm-sparc64/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-sparc64/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-x86_64/libata-portmap.h b/include/asm-x86_64/libata-portmap.h new file mode 100644 index 000000000000..75484ef0c743 --- /dev/null +++ b/include/asm-x86_64/libata-portmap.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/libata.h b/include/linux/libata.h index cf5eb1da3e32..4504776570e4 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -36,6 +36,8 @@ #include #include +#include + /* * compile-time options: to be removed as soon as all the drivers are * converted to the new debugging mechanism @@ -356,6 +358,7 @@ struct ata_probe_ent { unsigned int udma_mask; unsigned int legacy_mode; unsigned long irq; + unsigned long irq2; unsigned int irq_flags; unsigned long host_flags; unsigned long host_set_flags; @@ -367,6 +370,7 @@ struct ata_host_set { spinlock_t lock; struct device *dev; unsigned long irq; + unsigned long irq2; void __iomem *mmio_base; unsigned int n_ports; void *private_data; @@ -374,7 +378,6 @@ struct ata_host_set { unsigned long flags; int simplex_claimed; /* Keep seperate in case we ever need to do this locked */ - struct ata_host_set *next; /* for legacy mode */ struct ata_port *ports[0]; }; -- cgit v1.2.3-71-gd317 From dd5b06c490de72440ec39f814de99a714a45a1a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 10 Aug 2006 16:59:12 +0900 Subject: [PATCH] libata: implement dummy port Implement dummy port which can be requested by setting appropriate bit in probe_ent->dummy_port_mask. The dummy port is used as placeholder for stolen legacy port. This allows libata to guarantee that index_of(ap) == ap->port_no == actual_device_port_no, and thus to remove error-prone ap->hard_port_no. As it's used only when one port of a legacy controller is reserved by some other entity (e.g. IDE), the focus is on keeping the added *code* complexity at minimum, so dummy port allocates all libata core resources and acts as a normal port. It just has all dummy port_ops. This patch only implements dummy port. The following patch will make libata use it for stolen legacy ports. Signed-off-by: Tejun Heo --- drivers/scsi/libata-core.c | 61 ++++++++++++++++++++++++++++++++++++++-------- include/linux/libata.h | 8 ++++++ 2 files changed, 59 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 3634279d896d..f2e7e2f13db9 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5311,7 +5311,6 @@ static struct ata_port * ata_port_add(const struct ata_probe_ent *ent, { struct Scsi_Host *shost; struct ata_port *ap; - int rc; DPRINTK("ENTER\n"); @@ -5333,15 +5332,7 @@ static struct ata_port * ata_port_add(const struct ata_probe_ent *ent, ata_port_init(ap, host_set, ent, port_no); ata_port_init_shost(ap, shost); - rc = ap->ops->port_start(ap); - if (rc) - goto err_out; - return ap; - -err_out: - scsi_host_put(shost); - return NULL; } /** @@ -5415,11 +5406,27 @@ int ata_device_add(const struct ata_probe_ent *ent) if (!ap) goto err_out; + host_set->ports[i] = ap; + + /* dummy? */ + if (ent->dummy_port_mask & (1 << i)) { + ata_port_printk(ap, KERN_INFO, "DUMMY\n"); + ap->ops = &ata_dummy_port_ops; + continue; + } + + /* start port */ + rc = ap->ops->port_start(ap); + if (rc) { + host_set->ports[i] = NULL; + scsi_host_put(ap->host); + goto err_out; + } + /* Report the secondary IRQ for second channel legacy */ if (i == 1 && ent->irq2) irq_line = ent->irq2; - host_set->ports[i] = ap; xfer_mode_mask =(ap->udma_mask << ATA_SHIFT_UDMA) | (ap->mwdma_mask << ATA_SHIFT_MWDMA) | (ap->pio_mask << ATA_SHIFT_PIO); @@ -5940,6 +5947,39 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, return tmp; } +/* + * Dummy port_ops + */ +static void ata_dummy_noret(struct ata_port *ap) { } +static int ata_dummy_ret0(struct ata_port *ap) { return 0; } +static void ata_dummy_qc_noret(struct ata_queued_cmd *qc) { } + +static u8 ata_dummy_check_status(struct ata_port *ap) +{ + return ATA_DRDY; +} + +static unsigned int ata_dummy_qc_issue(struct ata_queued_cmd *qc) +{ + return AC_ERR_SYSTEM; +} + +const struct ata_port_operations ata_dummy_port_ops = { + .port_disable = ata_port_disable, + .check_status = ata_dummy_check_status, + .check_altstatus = ata_dummy_check_status, + .dev_select = ata_noop_dev_select, + .qc_prep = ata_noop_qc_prep, + .qc_issue = ata_dummy_qc_issue, + .freeze = ata_dummy_noret, + .thaw = ata_dummy_noret, + .error_handler = ata_dummy_noret, + .post_internal_cmd = ata_dummy_qc_noret, + .irq_clear = ata_dummy_noret, + .port_start = ata_dummy_ret0, + .port_stop = ata_dummy_noret, +}; + /* * libata is essentially a library of internal helper functions for * low-level ATA host controller drivers. As such, the API/ABI is @@ -5950,6 +5990,7 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, EXPORT_SYMBOL_GPL(sata_deb_timing_normal); EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug); EXPORT_SYMBOL_GPL(sata_deb_timing_long); +EXPORT_SYMBOL_GPL(ata_dummy_port_ops); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_std_ports); EXPORT_SYMBOL_GPL(ata_host_set_init); diff --git a/include/linux/libata.h b/include/linux/libata.h index 4504776570e4..30bfe8f1666e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -353,6 +353,7 @@ struct ata_probe_ent { struct ata_ioports port[ATA_MAX_PORTS]; unsigned int n_ports; unsigned int hard_port_no; + unsigned int dummy_port_mask; unsigned int pio_mask; unsigned int mwdma_mask; unsigned int udma_mask; @@ -652,6 +653,8 @@ extern const unsigned long sata_deb_timing_normal[]; extern const unsigned long sata_deb_timing_hotplug[]; extern const unsigned long sata_deb_timing_long[]; +extern const struct ata_port_operations ata_dummy_port_ops; + static inline const unsigned long * sata_ehc_deb_timing(struct ata_eh_context *ehc) { @@ -661,6 +664,11 @@ sata_ehc_deb_timing(struct ata_eh_context *ehc) return sata_deb_timing_normal; } +static inline int ata_port_is_dummy(struct ata_port *ap) +{ + return ap->ops == &ata_dummy_port_ops; +} + extern void ata_port_probe(struct ata_port *); extern void __sata_phy_reset(struct ata_port *ap); extern void sata_phy_reset(struct ata_port *ap); -- cgit v1.2.3-71-gd317 From 4852ba24f647199be797545226c6d325db231937 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 10 Aug 2006 16:59:18 +0900 Subject: [PATCH] libata: kill unused hard_port_no and legacy_mode Kill unused probe_ent/ap->hard_port_no and probe_ent->legacy_mode. Signed-off-by: Tejun Heo --- drivers/scsi/libata-core.c | 1 - include/linux/libata.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 2ef86b4e7f6d..3f963f206d4a 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5223,7 +5223,6 @@ void ata_port_init(struct ata_port *ap, struct ata_host_set *host_set, ap->host_set = host_set; ap->dev = ent->dev; ap->port_no = port_no; - ap->hard_port_no = port_no; ap->pio_mask = ent->pio_mask; ap->mwdma_mask = ent->mwdma_mask; ap->udma_mask = ent->udma_mask; diff --git a/include/linux/libata.h b/include/linux/libata.h index 30bfe8f1666e..ed749f778697 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -352,12 +352,10 @@ struct ata_probe_ent { struct scsi_host_template *sht; struct ata_ioports port[ATA_MAX_PORTS]; unsigned int n_ports; - unsigned int hard_port_no; unsigned int dummy_port_mask; unsigned int pio_mask; unsigned int mwdma_mask; unsigned int udma_mask; - unsigned int legacy_mode; unsigned long irq; unsigned long irq2; unsigned int irq_flags; @@ -509,7 +507,6 @@ struct ata_port { unsigned int pflags; /* ATA_PFLAG_xxx */ unsigned int id; /* unique id req'd by scsi midlyr */ unsigned int port_no; /* unique port #; from zero */ - unsigned int hard_port_no; /* hardware port #; from zero */ struct ata_prd *prd; /* our SG list */ dma_addr_t prd_dma; /* and its DMA mapping */ -- cgit v1.2.3-71-gd317 From 8b881b0410de0f72a43e814393abf3a4cb29ebb4 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 11 Jun 2006 09:59:27 -0400 Subject: [ATA] Increase lba48 max-sectors from 200 to 256. Also, moved ATA_MAX_SECTORS and ATA_MAX_SECTORS_LBA48 from linux/libata.h to linux/ata.h, now that they truly reflect the standard (well... mostly; note TODO comment). This changes the performance profile (and potential bug profile) for a bunch of drivers, so be wary. --- include/linux/ata.h | 2 ++ include/linux/libata.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 3671af869696..8d708a3d505b 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -40,6 +40,8 @@ enum { ATA_MAX_DEVICES = 2, /* per bus/port */ ATA_MAX_PRD = 256, /* we could make these 256/256 */ ATA_SECT_SIZE = 512, + ATA_MAX_SECTORS = 256, + ATA_MAX_SECTORS_LBA48 = 65535,/* TODO: 65536? */ ATA_ID_WORDS = 256, ATA_ID_SERNO_OFS = 10, diff --git a/include/linux/libata.h b/include/linux/libata.h index ed749f778697..060da736b3a8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -114,8 +114,6 @@ enum { /* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */ ATA_MAX_QUEUE = 32, ATA_TAG_INTERNAL = ATA_MAX_QUEUE - 1, - ATA_MAX_SECTORS = 200, /* FIXME */ - ATA_MAX_SECTORS_LBA48 = 65535, ATA_MAX_BUS = 2, ATA_DEF_BUSY_WAIT = 10000, ATA_SHORT_PAUSE = (HZ >> 6) + 1, -- cgit v1.2.3-71-gd317 From b352e57dc3bb5033996adaa67c2f69b795eddd39 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 10 Aug 2006 18:52:12 +0100 Subject: [PATCH] libata: Add CompactFlash support The CFA world has some additional rules and drive modes we need to support for newer expansion cards and on embedded boxes Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 62 +++++++++++++++++++++++++++++++++++++++++------ include/linux/ata.h | 20 ++++++++++++++- include/linux/libata.h | 4 +-- 3 files changed, 75 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 0ac0b519cf2d..9092416a6301 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -386,9 +386,13 @@ static const char *ata_mode_string(unsigned int xfer_mask) "PIO2", "PIO3", "PIO4", + "PIO5", + "PIO6", "MWDMA0", "MWDMA1", "MWDMA2", + "MWDMA3", + "MWDMA4", "UDMA/16", "UDMA/25", "UDMA/33", @@ -875,6 +879,23 @@ static unsigned int ata_id_xfermask(const u16 *id) mwdma_mask = id[ATA_ID_MWDMA_MODES] & 0x07; + if (ata_id_is_cfa(id)) { + /* + * Process compact flash extended modes + */ + int pio = id[163] & 0x7; + int dma = (id[163] >> 3) & 7; + + if (pio) + pio_mask |= (1 << 5); + if (pio > 1) + pio_mask |= (1 << 6); + if (dma) + mwdma_mask |= (1 << 3); + if (dma > 1) + mwdma_mask |= (1 << 4); + } + udma_mask = 0; if (id[ATA_ID_FIELD_VALID] & (1 << 2)) udma_mask = id[ATA_ID_UDMA_MODES] & 0xff; @@ -1356,6 +1377,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) struct ata_port *ap = dev->ap; const u16 *id = dev->id; unsigned int xfer_mask; + char revbuf[7]; /* XYZ-99\0 */ int rc; if (!ata_dev_enabled(dev) && ata_msg_info(ap)) { @@ -1399,6 +1421,15 @@ int ata_dev_configure(struct ata_device *dev, int print_info) /* ATA-specific feature tests */ if (dev->class == ATA_DEV_ATA) { + if (ata_id_is_cfa(id)) { + if (id[162] & 1) /* CPRM may make this media unusable */ + ata_dev_printk(dev, KERN_WARNING, "ata%u: device %u supports DRM functions and may not be fully accessable.\n", + ap->id, dev->devno); + snprintf(revbuf, 7, "CFA"); + } + else + snprintf(revbuf, 7, "ATA-%d", ata_id_major_version(id)); + dev->n_sectors = ata_id_n_sectors(id); if (ata_id_has_lba(id)) { @@ -1417,9 +1448,9 @@ int ata_dev_configure(struct ata_device *dev, int print_info) /* print device info to dmesg */ if (ata_msg_drv(ap) && print_info) - ata_dev_printk(dev, KERN_INFO, "ATA-%d, " + ata_dev_printk(dev, KERN_INFO, "%s, " "max %s, %Lu sectors: %s %s\n", - ata_id_major_version(id), + revbuf, ata_mode_string(xfer_mask), (unsigned long long)dev->n_sectors, lba_desc, ncq_desc); @@ -1440,9 +1471,9 @@ int ata_dev_configure(struct ata_device *dev, int print_info) /* print device info to dmesg */ if (ata_msg_drv(ap) && print_info) - ata_dev_printk(dev, KERN_INFO, "ATA-%d, " + ata_dev_printk(dev, KERN_INFO, "%s, " "max %s, %Lu sectors: CHS %u/%u/%u\n", - ata_id_major_version(id), + revbuf, ata_mode_string(xfer_mask), (unsigned long long)dev->n_sectors, dev->cylinders, dev->heads, @@ -1900,10 +1931,11 @@ int sata_set_spd(struct ata_port *ap) * drivers/ide/ide-timing.h and was originally written by Vojtech Pavlik */ /* - * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). + * PIO 0-4, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). * These were taken from ATA/ATAPI-6 standard, rev 0a, except - * for PIO 5, which is a nonstandard extension and UDMA6, which - * is currently supported only by Maxtor drives. + * for UDMA6, which is currently supported only by Maxtor drives. + * + * For PIO 5/6 MWDMA 3/4 see the CFA specification 3.0. */ static const struct ata_timing ata_timing[] = { @@ -1913,6 +1945,8 @@ static const struct ata_timing ata_timing[] = { { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, + { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 80, 0 }, + { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 100, 0 }, { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, @@ -1927,7 +1961,8 @@ static const struct ata_timing ata_timing[] = { { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, -/* { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, */ + { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 80, 0 }, + { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 100, 0 }, { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, @@ -3062,6 +3097,17 @@ static void ata_dev_xfermask(struct ata_device *dev) dev->mwdma_mask, dev->udma_mask); xfer_mask &= ata_id_xfermask(dev->id); + /* + * CFA Advanced TrueIDE timings are not allowed on a shared + * cable + */ + if (ata_dev_pair(dev)) { + /* No PIO5 or PIO6 */ + xfer_mask &= ~(0x03 << (ATA_SHIFT_PIO + 5)); + /* No MWDMA3 or MWDMA 4 */ + xfer_mask &= ~(0x03 << (ATA_SHIFT_MWDMA + 3)); + } + if (ata_dma_blacklisted(dev)) { xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA); ata_dev_printk(dev, KERN_WARNING, diff --git a/include/linux/ata.h b/include/linux/ata.h index 8d708a3d505b..991b858acc30 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -170,12 +170,16 @@ enum { XFER_UDMA_2 = 0x42, XFER_UDMA_1 = 0x41, XFER_UDMA_0 = 0x40, + XFER_MW_DMA_4 = 0x24, /* CFA only */ + XFER_MW_DMA_3 = 0x23, /* CFA only */ XFER_MW_DMA_2 = 0x22, XFER_MW_DMA_1 = 0x21, XFER_MW_DMA_0 = 0x20, XFER_SW_DMA_2 = 0x12, XFER_SW_DMA_1 = 0x11, XFER_SW_DMA_0 = 0x10, + XFER_PIO_6 = 0x0E, /* CFA only */ + XFER_PIO_5 = 0x0D, /* CFA only */ XFER_PIO_4 = 0x0C, XFER_PIO_3 = 0x0B, XFER_PIO_2 = 0x0A, @@ -274,7 +278,6 @@ struct ata_taskfile { }; #define ata_id_is_ata(id) (((id)[0] & (1 << 15)) == 0) -#define ata_id_is_cfa(id) ((id)[0] == 0x848A) #define ata_id_is_sata(id) ((id)[93] == 0) #define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6)) #define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5)) @@ -306,6 +309,9 @@ static inline unsigned int ata_id_major_version(const u16 *id) { unsigned int mver; + if (id[ATA_ID_MAJOR_VER] == 0xFFFF) + return 0; + for (mver = 14; mver >= 1; mver--) if (id[ATA_ID_MAJOR_VER] & (1 << mver)) break; @@ -324,6 +330,18 @@ static inline int ata_id_current_chs_valid(const u16 *id) id[56]; /* sectors in current translation */ } +static inline int ata_id_is_cfa(const u16 *id) +{ + u16 v = id[0]; + if (v == 0x848A) /* Standard CF */ + return 1; + /* Could be CF hiding as standard ATA */ + if (ata_id_major_version(id) >= 3 && id[82] != 0xFFFF && + (id[82] & ( 1 << 2))) + return 1; + return 0; +} + static inline int atapi_cdb_len(const u16 *dev_id) { u16 tmp = dev_id[0] & 0x3; diff --git a/include/linux/libata.h b/include/linux/libata.h index 060da736b3a8..806682603ac5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -225,8 +225,8 @@ enum { /* encoding various smaller bitmaps into a single * unsigned int bitmap */ - ATA_BITS_PIO = 5, - ATA_BITS_MWDMA = 3, + ATA_BITS_PIO = 7, + ATA_BITS_MWDMA = 5, ATA_BITS_UDMA = 8, ATA_SHIFT_PIO = 0, -- cgit v1.2.3-71-gd317 From cca3974e48607c3775dc73b544a5700b2e37c21a Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 24 Aug 2006 03:19:22 -0400 Subject: libata: Grand renaming. The biggest change is that ata_host_set is renamed to ata_host. * ata_host_set => ata_host * ata_probe_ent->host_flags => ata_probe_ent->port_flags * ata_probe_ent->host_set_flags => ata_probe_ent->_host_flags * ata_host_stats => ata_port_stats * ata_port->host => ata_port->scsi_host * ata_port->host_set => ata_port->host * ata_port_info->host_flags => ata_port_info->flags * ata_(.*)host_set(.*)\(\) => ata_\1host\2() The leading underscore in ata_probe_ent->_host_flags is to avoid reusing ->host_flags for different purpose. Currently, the only user of the field is libata-bmdma.c and probe_ent itself is scheduled to be removed. ata_port->host is reused for different purpose but this field is used inside libata core proper and of different type. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 98 +++++++-------- drivers/ata/ata_piix.c | 56 ++++----- drivers/ata/libata-bmdma.c | 34 +++--- drivers/ata/libata-core.c | 292 ++++++++++++++++++++++----------------------- drivers/ata/libata-eh.c | 16 +-- drivers/ata/libata-scsi.c | 78 ++++++------ drivers/ata/libata.h | 2 +- drivers/ata/pdc_adma.c | 46 +++---- drivers/ata/sata_mv.c | 98 ++++++++------- drivers/ata/sata_nv.c | 58 ++++----- drivers/ata/sata_promise.c | 54 ++++----- drivers/ata/sata_qstor.c | 44 +++---- drivers/ata/sata_sil.c | 47 ++++---- drivers/ata/sata_sil24.c | 59 +++++---- drivers/ata/sata_sis.c | 18 +-- drivers/ata/sata_svw.c | 10 +- drivers/ata/sata_sx4.c | 64 +++++----- drivers/ata/sata_uli.c | 8 +- drivers/ata/sata_via.c | 4 +- drivers/ata/sata_vsc.c | 16 +-- include/linux/libata.h | 39 +++--- 21 files changed, 568 insertions(+), 573 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 813031c01fba..3f1106fdaed1 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -277,7 +277,7 @@ static const struct ata_port_info ahci_port_info[] = { /* board_ahci */ { .sht = &ahci_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_SKIP_D2H_BSY, .pio_mask = 0x1f, /* pio0-4 */ @@ -287,7 +287,7 @@ static const struct ata_port_info ahci_port_info[] = { /* board_ahci_vt8251 */ { .sht = &ahci_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_SKIP_D2H_BSY | AHCI_FLAG_RESET_NEEDS_CLO | AHCI_FLAG_NO_NCQ, @@ -709,7 +709,7 @@ static void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, static int ahci_clo(struct ata_port *ap) { void __iomem *port_mmio = (void __iomem *) ap->ioaddr.cmd_addr; - struct ahci_host_priv *hpriv = ap->host_set->private_data; + struct ahci_host_priv *hpriv = ap->host->private_data; u32 tmp; if (!(hpriv->cap & HOST_CAP_CLO)) @@ -741,7 +741,7 @@ static int ahci_prereset(struct ata_port *ap) static int ahci_softreset(struct ata_port *ap, unsigned int *class) { struct ahci_port_priv *pp = ap->private_data; - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); const u32 cmd_fis_len = 5; /* five dwords */ const char *reason = NULL; @@ -850,7 +850,7 @@ static int ahci_hardreset(struct ata_port *ap, unsigned int *class) struct ahci_port_priv *pp = ap->private_data; u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; struct ata_taskfile tf; - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); int rc; @@ -1039,7 +1039,7 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat) static void ahci_host_intr(struct ata_port *ap) { - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); struct ata_eh_info *ehi = &ap->eh_info; u32 status, qc_active; @@ -1091,7 +1091,7 @@ static void ahci_irq_clear(struct ata_port *ap) static irqreturn_t ahci_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; struct ahci_host_priv *hpriv; unsigned int i, handled = 0; void __iomem *mmio; @@ -1099,8 +1099,8 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance, struct pt_regs *r VPRINTK("ENTER\n"); - hpriv = host_set->private_data; - mmio = host_set->mmio_base; + hpriv = host->private_data; + mmio = host->mmio_base; /* sigh. 0xffffffff is a valid return from h/w */ irq_stat = readl(mmio + HOST_IRQ_STAT); @@ -1108,22 +1108,22 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance, struct pt_regs *r if (!irq_stat) return IRQ_NONE; - spin_lock(&host_set->lock); + spin_lock(&host->lock); - for (i = 0; i < host_set->n_ports; i++) { + for (i = 0; i < host->n_ports; i++) { struct ata_port *ap; if (!(irq_stat & (1 << i))) continue; - ap = host_set->ports[i]; + ap = host->ports[i]; if (ap) { ahci_host_intr(ap); VPRINTK("port %u\n", i); } else { VPRINTK("port %u (no irq)\n", i); if (ata_ratelimit()) - dev_printk(KERN_WARNING, host_set->dev, + dev_printk(KERN_WARNING, host->dev, "interrupt on disabled port %u\n", i); } @@ -1135,7 +1135,7 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance, struct pt_regs *r handled = 1; } - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); VPRINTK("EXIT\n"); @@ -1157,7 +1157,7 @@ static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc) static void ahci_freeze(struct ata_port *ap) { - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); /* turn IRQ off */ @@ -1166,7 +1166,7 @@ static void ahci_freeze(struct ata_port *ap) static void ahci_thaw(struct ata_port *ap) { - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); u32 tmp; @@ -1181,7 +1181,7 @@ static void ahci_thaw(struct ata_port *ap) static void ahci_error_handler(struct ata_port *ap) { - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); if (!(ap->pflags & ATA_PFLAG_FROZEN)) { @@ -1198,7 +1198,7 @@ static void ahci_error_handler(struct ata_port *ap) static void ahci_post_internal_cmd(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); if (qc->flags & ATA_QCFLAG_FAILED) @@ -1213,9 +1213,9 @@ static void ahci_post_internal_cmd(struct ata_queued_cmd *qc) static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg) { - struct ahci_host_priv *hpriv = ap->host_set->private_data; + struct ahci_host_priv *hpriv = ap->host->private_data; struct ahci_port_priv *pp = ap->private_data; - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); const char *emsg = NULL; int rc; @@ -1233,8 +1233,8 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg) static int ahci_port_resume(struct ata_port *ap) { struct ahci_port_priv *pp = ap->private_data; - struct ahci_host_priv *hpriv = ap->host_set->private_data; - void __iomem *mmio = ap->host_set->mmio_base; + struct ahci_host_priv *hpriv = ap->host->private_data; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); ahci_init_port(port_mmio, hpriv->cap, pp->cmd_slot_dma, pp->rx_fis_dma); @@ -1244,8 +1244,8 @@ static int ahci_port_resume(struct ata_port *ap) static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) { - struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); - void __iomem *mmio = host_set->mmio_base; + struct ata_host *host = dev_get_drvdata(&pdev->dev); + void __iomem *mmio = host->mmio_base; u32 ctl; if (mesg.event == PM_EVENT_SUSPEND) { @@ -1264,9 +1264,9 @@ static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) static int ahci_pci_device_resume(struct pci_dev *pdev) { - struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); - struct ahci_host_priv *hpriv = host_set->private_data; - void __iomem *mmio = host_set->mmio_base; + struct ata_host *host = dev_get_drvdata(&pdev->dev); + struct ahci_host_priv *hpriv = host->private_data; + void __iomem *mmio = host->mmio_base; int rc; ata_pci_device_do_resume(pdev); @@ -1276,20 +1276,20 @@ static int ahci_pci_device_resume(struct pci_dev *pdev) if (rc) return rc; - ahci_init_controller(mmio, pdev, host_set->n_ports, hpriv->cap); + ahci_init_controller(mmio, pdev, host->n_ports, hpriv->cap); } - ata_host_set_resume(host_set); + ata_host_resume(host); return 0; } static int ahci_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; - struct ahci_host_priv *hpriv = ap->host_set->private_data; + struct device *dev = ap->host->dev; + struct ahci_host_priv *hpriv = ap->host->private_data; struct ahci_port_priv *pp; - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); void *mem; dma_addr_t mem_dma; @@ -1350,10 +1350,10 @@ static int ahci_port_start(struct ata_port *ap) static void ahci_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; - struct ahci_host_priv *hpriv = ap->host_set->private_data; + struct device *dev = ap->host->dev; + struct ahci_host_priv *hpriv = ap->host->private_data; struct ahci_port_priv *pp = ap->private_data; - void __iomem *mmio = ap->host_set->mmio_base; + void __iomem *mmio = ap->host->mmio_base; void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no); const char *emsg = NULL; int rc; @@ -1581,7 +1581,7 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) memset(hpriv, 0, sizeof(*hpriv)); probe_ent->sht = ahci_port_info[board_idx].sht; - probe_ent->host_flags = ahci_port_info[board_idx].host_flags; + probe_ent->port_flags = ahci_port_info[board_idx].flags; probe_ent->pio_mask = ahci_port_info[board_idx].pio_mask; probe_ent->udma_mask = ahci_port_info[board_idx].udma_mask; probe_ent->port_ops = ahci_port_info[board_idx].port_ops; @@ -1599,9 +1599,9 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto err_out_hpriv; - if (!(probe_ent->host_flags & AHCI_FLAG_NO_NCQ) && + if (!(probe_ent->port_flags & AHCI_FLAG_NO_NCQ) && (hpriv->cap & HOST_CAP_NCQ)) - probe_ent->host_flags |= ATA_FLAG_NCQ; + probe_ent->port_flags |= ATA_FLAG_NCQ; ahci_print_info(probe_ent); @@ -1632,27 +1632,27 @@ err_out: static void ahci_remove_one (struct pci_dev *pdev) { struct device *dev = pci_dev_to_dev(pdev); - struct ata_host_set *host_set = dev_get_drvdata(dev); - struct ahci_host_priv *hpriv = host_set->private_data; + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; unsigned int i; int have_msi; - for (i = 0; i < host_set->n_ports; i++) - ata_port_detach(host_set->ports[i]); + for (i = 0; i < host->n_ports; i++) + ata_port_detach(host->ports[i]); have_msi = hpriv->flags & AHCI_FLAG_MSI; - free_irq(host_set->irq, host_set); + free_irq(host->irq, host); - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; - ata_scsi_release(ap->host); - scsi_host_put(ap->host); + ata_scsi_release(ap->scsi_host); + scsi_host_put(ap->scsi_host); } kfree(hpriv); - pci_iounmap(pdev, host_set->mmio_base); - kfree(host_set); + pci_iounmap(pdev, host->mmio_base); + kfree(host); if (have_msi) pci_disable_msi(pdev); diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 0ca4c3b78dc5..22b2dba90b9a 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -151,7 +151,7 @@ struct piix_host_priv { static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); -static void piix_host_stop(struct ata_host_set *host_set); +static void piix_host_stop(struct ata_host *host); static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev); static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev); static void piix_pata_error_handler(struct ata_port *ap); @@ -362,7 +362,7 @@ static struct ata_port_info piix_port_info[] = { /* piix4_pata */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SLAVE_POSS, + .flags = ATA_FLAG_SLAVE_POSS, .pio_mask = 0x1f, /* pio0-4 */ #if 0 .mwdma_mask = 0x06, /* mwdma1-2 */ @@ -376,7 +376,7 @@ static struct ata_port_info piix_port_info[] = { /* ich5_pata */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SLAVE_POSS | PIIX_FLAG_CHECKINTR, + .flags = ATA_FLAG_SLAVE_POSS | PIIX_FLAG_CHECKINTR, .pio_mask = 0x1f, /* pio0-4 */ #if 0 .mwdma_mask = 0x06, /* mwdma1-2 */ @@ -390,7 +390,7 @@ static struct ata_port_info piix_port_info[] = { /* ich5_sata */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | + .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | PIIX_FLAG_IGNORE_PCS, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ @@ -401,7 +401,7 @@ static struct ata_port_info piix_port_info[] = { /* i6300esb_sata */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SATA | + .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | PIIX_FLAG_IGNORE_PCS, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ @@ -412,7 +412,7 @@ static struct ata_port_info piix_port_info[] = { /* ich6_sata */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SATA | + .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | PIIX_FLAG_SCR, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ @@ -423,7 +423,7 @@ static struct ata_port_info piix_port_info[] = { /* ich6_sata_ahci */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SATA | + .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | PIIX_FLAG_SCR | PIIX_FLAG_AHCI, .pio_mask = 0x1f, /* pio0-4 */ @@ -435,7 +435,7 @@ static struct ata_port_info piix_port_info[] = { /* ich6m_sata_ahci */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SATA | + .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | PIIX_FLAG_SCR | PIIX_FLAG_AHCI, .pio_mask = 0x1f, /* pio0-4 */ @@ -447,7 +447,7 @@ static struct ata_port_info piix_port_info[] = { /* ich8_sata_ahci */ { .sht = &piix_sht, - .host_flags = ATA_FLAG_SATA | + .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | PIIX_FLAG_SCR | PIIX_FLAG_AHCI, .pio_mask = 0x1f, /* pio0-4 */ @@ -485,7 +485,7 @@ MODULE_PARM_DESC(force_pcs, "force honoring or ignoring PCS to work around " */ static void piix_pata_cbl_detect(struct ata_port *ap) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 tmp, mask; /* no 80c support in host controller? */ @@ -517,7 +517,7 @@ cbl40: */ static int piix_pata_prereset(struct ata_port *ap) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); if (!pci_test_config_bits(pdev, &piix_enable_bits[ap->port_no])) { ata_port_printk(ap, KERN_INFO, "port disabled. ignoring.\n"); @@ -551,8 +551,8 @@ static void piix_pata_error_handler(struct ata_port *ap) */ static unsigned int piix_sata_present_mask(struct ata_port *ap) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); - struct piix_host_priv *hpriv = ap->host_set->private_data; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct piix_host_priv *hpriv = ap->host->private_data; const unsigned int *map = hpriv->map; int base = 2 * ap->port_no; unsigned int present_mask = 0; @@ -631,7 +631,7 @@ static void piix_sata_error_handler(struct ata_port *ap) static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) { unsigned int pio = adev->pio_mode - XFER_PIO_0; - struct pci_dev *dev = to_pci_dev(ap->host_set->dev); + struct pci_dev *dev = to_pci_dev(ap->host->dev); unsigned int is_slave = (adev->devno != 0); unsigned int master_port= ap->port_no ? 0x42 : 0x40; unsigned int slave_port = 0x44; @@ -683,7 +683,7 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev) { unsigned int udma = adev->dma_mode; /* FIXME: MWDMA too */ - struct pci_dev *dev = to_pci_dev(ap->host_set->dev); + struct pci_dev *dev = to_pci_dev(ap->host->dev); u8 maslave = ap->port_no ? 0x42 : 0x40; u8 speed = udma; unsigned int drive_dn = (ap->port_no ? 2 : 0) + adev->devno; @@ -835,13 +835,13 @@ static void __devinit piix_init_pcs(struct pci_dev *pdev, if (force_pcs == 1) { dev_printk(KERN_INFO, &pdev->dev, "force ignoring PCS (0x%x)\n", new_pcs); - pinfo[0].host_flags |= PIIX_FLAG_IGNORE_PCS; - pinfo[1].host_flags |= PIIX_FLAG_IGNORE_PCS; + pinfo[0].flags |= PIIX_FLAG_IGNORE_PCS; + pinfo[1].flags |= PIIX_FLAG_IGNORE_PCS; } else if (force_pcs == 2) { dev_printk(KERN_INFO, &pdev->dev, "force honoring PCS (0x%x)\n", new_pcs); - pinfo[0].host_flags &= ~PIIX_FLAG_IGNORE_PCS; - pinfo[1].host_flags &= ~PIIX_FLAG_IGNORE_PCS; + pinfo[0].flags &= ~PIIX_FLAG_IGNORE_PCS; + pinfo[1].flags &= ~PIIX_FLAG_IGNORE_PCS; } } @@ -881,7 +881,7 @@ static void __devinit piix_init_sata_map(struct pci_dev *pdev, default: printk(" P%d", map[i]); if (i & 1) - pinfo[i / 2].host_flags |= ATA_FLAG_SLAVE_POSS; + pinfo[i / 2].flags |= ATA_FLAG_SLAVE_POSS; break; } } @@ -916,7 +916,7 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) struct ata_port_info port_info[2]; struct ata_port_info *ppinfo[2] = { &port_info[0], &port_info[1] }; struct piix_host_priv *hpriv; - unsigned long host_flags; + unsigned long port_flags; if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, @@ -935,9 +935,9 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) port_info[0].private_data = hpriv; port_info[1].private_data = hpriv; - host_flags = port_info[0].host_flags; + port_flags = port_info[0].flags; - if (host_flags & PIIX_FLAG_AHCI) { + if (port_flags & PIIX_FLAG_AHCI) { u8 tmp; pci_read_config_byte(pdev, PIIX_SCC, &tmp); if (tmp == PIIX_AHCI_DEVICE) { @@ -948,7 +948,7 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } /* Initialize SATA map */ - if (host_flags & ATA_FLAG_SATA) { + if (port_flags & ATA_FLAG_SATA) { piix_init_sata_map(pdev, port_info, piix_map_db_table[ent->driver_data]); piix_init_pcs(pdev, port_info, @@ -961,7 +961,7 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) * MSI is disabled (and it is disabled, as we don't use * message-signalled interrupts currently). */ - if (host_flags & PIIX_FLAG_CHECKINTR) + if (port_flags & PIIX_FLAG_CHECKINTR) pci_intx(pdev, 1); if (piix_check_450nx_errata(pdev)) { @@ -976,11 +976,11 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) return ata_pci_init_one(pdev, ppinfo, 2); } -static void piix_host_stop(struct ata_host_set *host_set) +static void piix_host_stop(struct ata_host *host) { - struct piix_host_priv *hpriv = host_set->private_data; + struct piix_host_priv *hpriv = host->private_data; - ata_host_stop(host_set); + ata_host_stop(host); kfree(hpriv); } diff --git a/drivers/ata/libata-bmdma.c b/drivers/ata/libata-bmdma.c index 158f62dbf21b..760502859821 100644 --- a/drivers/ata/libata-bmdma.c +++ b/drivers/ata/libata-bmdma.c @@ -193,7 +193,7 @@ void ata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) * synchronization with interrupt handler / other threads. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_exec_command_pio(struct ata_port *ap, const struct ata_taskfile *tf) @@ -216,7 +216,7 @@ static void ata_exec_command_pio(struct ata_port *ap, const struct ata_taskfile * FIXME: missing write posting for 400nS delay enforcement * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf) @@ -237,7 +237,7 @@ static void ata_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile * synchronization with interrupt handler / other threads. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { @@ -422,7 +422,7 @@ u8 ata_altstatus(struct ata_port *ap) * @qc: Info associated with this ATA transaction. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc) @@ -452,7 +452,7 @@ static void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc) * @qc: Info associated with this ATA transaction. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_bmdma_start_mmio (struct ata_queued_cmd *qc) @@ -483,7 +483,7 @@ static void ata_bmdma_start_mmio (struct ata_queued_cmd *qc) * @qc: Info associated with this ATA transaction. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_bmdma_setup_pio (struct ata_queued_cmd *qc) @@ -511,7 +511,7 @@ static void ata_bmdma_setup_pio (struct ata_queued_cmd *qc) * @qc: Info associated with this ATA transaction. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_bmdma_start_pio (struct ata_queued_cmd *qc) @@ -535,7 +535,7 @@ static void ata_bmdma_start_pio (struct ata_queued_cmd *qc) * May be used as the bmdma_start() entry in ata_port_operations. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_bmdma_start(struct ata_queued_cmd *qc) { @@ -557,7 +557,7 @@ void ata_bmdma_start(struct ata_queued_cmd *qc) * May be used as the bmdma_setup() entry in ata_port_operations. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_bmdma_setup(struct ata_queued_cmd *qc) { @@ -577,7 +577,7 @@ void ata_bmdma_setup(struct ata_queued_cmd *qc) * May be used as the irq_clear() entry in ata_port_operations. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_bmdma_irq_clear(struct ata_port *ap) @@ -605,7 +605,7 @@ void ata_bmdma_irq_clear(struct ata_port *ap) * May be used as the bmdma_status() entry in ata_port_operations. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ u8 ata_bmdma_status(struct ata_port *ap) @@ -629,7 +629,7 @@ u8 ata_bmdma_status(struct ata_port *ap) * May be used as the bmdma_stop() entry in ata_port_operations. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_bmdma_stop(struct ata_queued_cmd *qc) @@ -838,7 +838,7 @@ ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int bmdma = pci_resource_start(pdev, 4); if (bmdma) { if (inb(bmdma + 2) & 0x80) - probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + probe_ent->_host_flags |= ATA_HOST_SIMPLEX; probe_ent->port[p].bmdma_addr = bmdma; } ata_std_ports(&probe_ent->port[p]); @@ -854,7 +854,7 @@ ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int if (bmdma) { bmdma += 8; if(inb(bmdma + 2) & 0x80) - probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + probe_ent->_host_flags |= ATA_HOST_SIMPLEX; probe_ent->port[p].bmdma_addr = bmdma; } ata_std_ports(&probe_ent->port[p]); @@ -887,7 +887,7 @@ static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, if (bmdma) { probe_ent->port[0].bmdma_addr = bmdma; if (inb(bmdma + 2) & 0x80) - probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + probe_ent->_host_flags |= ATA_HOST_SIMPLEX; } ata_std_ports(&probe_ent->port[0]); } else @@ -904,7 +904,7 @@ static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, if (bmdma) { probe_ent->port[1].bmdma_addr = bmdma + 8; if (inb(bmdma + 10) & 0x80) - probe_ent->host_set_flags |= ATA_HOST_SIMPLEX; + probe_ent->_host_flags |= ATA_HOST_SIMPLEX; } ata_std_ports(&probe_ent->port[1]); } else @@ -957,7 +957,7 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, else port[1] = port[0]; - if ((port[0]->host_flags & ATA_FLAG_NO_LEGACY) == 0 + if ((port[0]->flags & ATA_FLAG_NO_LEGACY) == 0 && (pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) { /* TODO: What if one channel is in native mode ... */ pci_read_config_byte(pdev, PCI_CLASS_PROG, &tmp8); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 9092416a6301..1c9315401f7a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1335,7 +1335,7 @@ static void ata_dev_config_ncq(struct ata_device *dev, } if (ap->flags & ATA_FLAG_NCQ) { - hdepth = min(ap->host->can_queue, ATA_MAX_QUEUE - 1); + hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE - 1); dev->flags |= ATA_DFLAG_NCQ; } @@ -1349,12 +1349,13 @@ static void ata_set_port_max_cmd_len(struct ata_port *ap) { int i; - if (ap->host) { - ap->host->max_cmd_len = 0; + if (ap->scsi_host) { + unsigned int len = 0; + for (i = 0; i < ATA_MAX_DEVICES; i++) - ap->host->max_cmd_len = max_t(unsigned int, - ap->host->max_cmd_len, - ap->device[i].cdb_len); + len = max(len, ap->device[i].cdb_len); + + ap->scsi_host->max_cmd_len = len; } } @@ -1662,7 +1663,7 @@ int ata_bus_probe(struct ata_port *ap) * Modify @ap data structure such that the system * thinks that the entire port is enabled. * - * LOCKING: host_set lock, or some other form of + * LOCKING: host lock, or some other form of * serialization. */ @@ -1800,7 +1801,7 @@ struct ata_device *ata_dev_pair(struct ata_device *adev) * never attempt to probe or communicate with devices * on this port. * - * LOCKING: host_set lock, or some other form of + * LOCKING: host lock, or some other form of * serialization. */ @@ -2258,8 +2259,8 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev) /* Record simplex status. If we selected DMA then the other * host channels are not permitted to do so. */ - if (used_dma && (ap->host_set->flags & ATA_HOST_SIMPLEX)) - ap->host_set->simplex_claimed = 1; + if (used_dma && (ap->host->flags & ATA_HOST_SIMPLEX)) + ap->host->simplex_claimed = 1; /* step5: chip specific finalisation */ if (ap->ops->post_set_mode) @@ -2281,7 +2282,7 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev) * other threads. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static inline void ata_tf_to_host(struct ata_port *ap, @@ -2445,7 +2446,7 @@ static unsigned int ata_bus_softreset(struct ata_port *ap, * * LOCKING: * PCI/etc. bus probe sem. - * Obtains host_set lock. + * Obtains host lock. * * SIDE EFFECTS: * Sets ATA_FLAG_DISABLED if bus reset fails. @@ -3080,7 +3081,7 @@ static int ata_dma_blacklisted(const struct ata_device *dev) static void ata_dev_xfermask(struct ata_device *dev) { struct ata_port *ap = dev->ap; - struct ata_host_set *hs = ap->host_set; + struct ata_host *host = ap->host; unsigned long xfer_mask; /* controller modes available */ @@ -3114,7 +3115,7 @@ static void ata_dev_xfermask(struct ata_device *dev) "device is on DMA blacklist, disabling DMA\n"); } - if ((hs->flags & ATA_HOST_SIMPLEX) && hs->simplex_claimed) { + if ((host->flags & ATA_HOST_SIMPLEX) && host->simplex_claimed) { xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA); ata_dev_printk(dev, KERN_WARNING, "simplex DMA is claimed by " "other device, disabling DMA\n"); @@ -3207,7 +3208,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev, * Unmap all mapped DMA memory associated with this command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void ata_sg_clean(struct ata_queued_cmd *qc) @@ -3267,7 +3268,7 @@ static void ata_sg_clean(struct ata_queued_cmd *qc) * associated with the current disk command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * */ static void ata_fill_sg(struct ata_queued_cmd *qc) @@ -3319,7 +3320,7 @@ static void ata_fill_sg(struct ata_queued_cmd *qc) * supplied PACKET command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: 0 when ATAPI DMA can be used * nonzero otherwise @@ -3341,7 +3342,7 @@ int ata_check_atapi_dma(struct ata_queued_cmd *qc) * Prepare ATA taskfile for submission. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_qc_prep(struct ata_queued_cmd *qc) { @@ -3363,7 +3364,7 @@ void ata_noop_qc_prep(struct ata_queued_cmd *qc) { } * to point to a single memory buffer, @buf of byte length @buflen. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen) @@ -3394,7 +3395,7 @@ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen) * elements. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, @@ -3413,7 +3414,7 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, * DMA-map the memory buffer associated with queued_cmd @qc. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, negative on error. @@ -3482,7 +3483,7 @@ skip_map: * DMA-map the scatter-gather table associated with queued_cmd @qc. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, negative on error. @@ -3991,7 +3992,7 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap, struct ata_queued_cmd *q * Finish @qc which is running on standard HSM. * * LOCKING: - * If @in_wq is zero, spin_lock_irqsave(host_set lock). + * If @in_wq is zero, spin_lock_irqsave(host lock). * Otherwise, none on entry and grabs host lock. */ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) @@ -4003,8 +4004,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) if (in_wq) { spin_lock_irqsave(ap->lock, flags); - /* EH might have kicked in while host_set lock - * is released. + /* EH might have kicked in while host lock is + * released. */ qc = ata_qc_from_tag(ap, qc->tag); if (qc) { @@ -4369,7 +4370,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev) * in case something prevents using it. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_qc_free(struct ata_queued_cmd *qc) { @@ -4422,7 +4423,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) * command has completed, with either an ok or not-ok status. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_qc_complete(struct ata_queued_cmd *qc) { @@ -4485,7 +4486,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc) * and commands are completed accordingly. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Number of completed commands on success, -errno otherwise. @@ -4556,7 +4557,7 @@ static inline int ata_should_dma_map(struct ata_queued_cmd *qc) * writing the taskfile to hardware, starting the command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_qc_issue(struct ata_queued_cmd *qc) { @@ -4617,7 +4618,7 @@ err: * May be used as the qc_issue() entry in ata_port_operations. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, AC_ERR_* mask on failure @@ -4746,7 +4747,7 @@ unsigned int ata_qc_issue_prot(struct ata_queued_cmd *qc) * handled via polling with interrupts disabled (nIEN bit). * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * One if interrupt was handled, zero if not (shared irq). @@ -4833,14 +4834,14 @@ idle_irq: /** * ata_interrupt - Default ATA host interrupt handler * @irq: irq line (unused) - * @dev_instance: pointer to our ata_host_set information structure + * @dev_instance: pointer to our ata_host information structure * @regs: unused * * Default interrupt handler for PCI IDE devices. Calls * ata_host_intr() for each port that is not disabled. * * LOCKING: - * Obtains host_set lock during operation. + * Obtains host lock during operation. * * RETURNS: * IRQ_NONE or IRQ_HANDLED. @@ -4848,18 +4849,18 @@ idle_irq: irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; unsigned int i; unsigned int handled = 0; unsigned long flags; /* TODO: make _irqsave conditional on x86 PCI IDE legacy mode */ - spin_lock_irqsave(&host_set->lock, flags); + spin_lock_irqsave(&host->lock, flags); - for (i = 0; i < host_set->n_ports; i++) { + for (i = 0; i < host->n_ports; i++) { struct ata_port *ap; - ap = host_set->ports[i]; + ap = host->ports[i]; if (ap && !(ap->flags & ATA_FLAG_DISABLED)) { struct ata_queued_cmd *qc; @@ -4871,7 +4872,7 @@ irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs) } } - spin_unlock_irqrestore(&host_set->lock, flags); + spin_unlock_irqrestore(&host->lock, flags); return IRQ_RETVAL(handled); } @@ -5036,15 +5037,15 @@ int ata_flush_cache(struct ata_device *dev) return 0; } -static int ata_host_set_request_pm(struct ata_host_set *host_set, - pm_message_t mesg, unsigned int action, - unsigned int ehi_flags, int wait) +static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg, + unsigned int action, unsigned int ehi_flags, + int wait) { unsigned long flags; int i, rc; - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; /* Previous resume operation might still be in * progress. Wait for PM_PENDING to clear. @@ -5084,11 +5085,11 @@ static int ata_host_set_request_pm(struct ata_host_set *host_set, } /** - * ata_host_set_suspend - suspend host_set - * @host_set: host_set to suspend + * ata_host_suspend - suspend host + * @host: host to suspend * @mesg: PM message * - * Suspend @host_set. Actual operation is performed by EH. This + * Suspend @host. Actual operation is performed by EH. This * function requests EH to perform PM operations and waits for EH * to finish. * @@ -5098,11 +5099,11 @@ static int ata_host_set_request_pm(struct ata_host_set *host_set, * RETURNS: * 0 on success, -errno on failure. */ -int ata_host_set_suspend(struct ata_host_set *host_set, pm_message_t mesg) +int ata_host_suspend(struct ata_host *host, pm_message_t mesg) { int i, j, rc; - rc = ata_host_set_request_pm(host_set, mesg, 0, ATA_EHI_QUIET, 1); + rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1); if (rc) goto fail; @@ -5110,8 +5111,8 @@ int ata_host_set_suspend(struct ata_host_set *host_set, pm_message_t mesg) * This happens if hotplug occurs between completion of device * suspension and here. */ - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; for (j = 0; j < ATA_MAX_DEVICES; j++) { struct ata_device *dev = &ap->device[j]; @@ -5126,30 +5127,30 @@ int ata_host_set_suspend(struct ata_host_set *host_set, pm_message_t mesg) } } - host_set->dev->power.power_state = mesg; + host->dev->power.power_state = mesg; return 0; fail: - ata_host_set_resume(host_set); + ata_host_resume(host); return rc; } /** - * ata_host_set_resume - resume host_set - * @host_set: host_set to resume + * ata_host_resume - resume host + * @host: host to resume * - * Resume @host_set. Actual operation is performed by EH. This + * Resume @host. Actual operation is performed by EH. This * function requests EH to perform PM operations and returns. * Note that all resume operations are performed parallely. * * LOCKING: * Kernel thread context (may sleep). */ -void ata_host_set_resume(struct ata_host_set *host_set) +void ata_host_resume(struct ata_host *host) { - ata_host_set_request_pm(host_set, PMSG_ON, ATA_EH_SOFTRESET, - ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0); - host_set->dev->power.power_state = PMSG_ON; + ata_host_request_pm(host, PMSG_ON, ATA_EH_SOFTRESET, + ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0); + host->dev->power.power_state = PMSG_ON; } /** @@ -5206,10 +5207,10 @@ void ata_port_stop (struct ata_port *ap) ata_pad_free(ap, dev); } -void ata_host_stop (struct ata_host_set *host_set) +void ata_host_stop (struct ata_host *host) { - if (host_set->mmio_base) - iounmap(host_set->mmio_base); + if (host->mmio_base) + iounmap(host->mmio_base); } /** @@ -5231,7 +5232,7 @@ void ata_dev_init(struct ata_device *dev) /* High bits of dev->flags are used to record warm plug * requests which occur asynchronously. Synchronize using - * host_set lock. + * host lock. */ spin_lock_irqsave(ap->lock, flags); dev->flags &= ~ATA_DFLAG_INIT_MASK; @@ -5247,7 +5248,7 @@ void ata_dev_init(struct ata_device *dev) /** * ata_port_init - Initialize an ata_port structure * @ap: Structure to initialize - * @host_set: Collection of hosts to which @ap belongs + * @host: Collection of hosts to which @ap belongs * @ent: Probe information provided by low-level driver * @port_no: Port number associated with this ata_port * @@ -5256,22 +5257,22 @@ void ata_dev_init(struct ata_device *dev) * LOCKING: * Inherited from caller. */ -void ata_port_init(struct ata_port *ap, struct ata_host_set *host_set, +void ata_port_init(struct ata_port *ap, struct ata_host *host, const struct ata_probe_ent *ent, unsigned int port_no) { unsigned int i; - ap->lock = &host_set->lock; + ap->lock = &host->lock; ap->flags = ATA_FLAG_DISABLED; ap->id = ata_unique_id++; ap->ctl = ATA_DEVCTL_OBS; - ap->host_set = host_set; + ap->host = host; ap->dev = ent->dev; ap->port_no = port_no; ap->pio_mask = ent->pio_mask; ap->mwdma_mask = ent->mwdma_mask; ap->udma_mask = ent->udma_mask; - ap->flags |= ent->host_flags; + ap->flags |= ent->port_flags; ap->ops = ent->port_ops; ap->hw_sata_spd_limit = UINT_MAX; ap->active_tag = ATA_TAG_POISON; @@ -5324,7 +5325,7 @@ void ata_port_init(struct ata_port *ap, struct ata_host_set *host_set, */ static void ata_port_init_shost(struct ata_port *ap, struct Scsi_Host *shost) { - ap->host = shost; + ap->scsi_host = shost; shost->unique_id = ap->id; shost->max_id = 16; @@ -5336,7 +5337,7 @@ static void ata_port_init_shost(struct ata_port *ap, struct Scsi_Host *shost) /** * ata_port_add - Attach low-level ATA driver to system * @ent: Information provided by low-level driver - * @host_set: Collections of ports to which we add + * @host: Collections of ports to which we add * @port_no: Port number associated with this host * * Attach low-level ATA driver to system. @@ -5348,7 +5349,7 @@ static void ata_port_init_shost(struct ata_port *ap, struct Scsi_Host *shost) * New ata_port on success, for NULL on error. */ static struct ata_port * ata_port_add(const struct ata_probe_ent *ent, - struct ata_host_set *host_set, + struct ata_host *host, unsigned int port_no) { struct Scsi_Host *shost; @@ -5357,7 +5358,7 @@ static struct ata_port * ata_port_add(const struct ata_probe_ent *ent, DPRINTK("ENTER\n"); if (!ent->port_ops->error_handler && - !(ent->host_flags & (ATA_FLAG_SATA_RESET | ATA_FLAG_SRST))) { + !(ent->port_flags & (ATA_FLAG_SATA_RESET | ATA_FLAG_SRST))) { printk(KERN_ERR "ata%u: no reset mechanism available\n", port_no); return NULL; @@ -5371,32 +5372,31 @@ static struct ata_port * ata_port_add(const struct ata_probe_ent *ent, ap = ata_shost_to_port(shost); - ata_port_init(ap, host_set, ent, port_no); + ata_port_init(ap, host, ent, port_no); ata_port_init_shost(ap, shost); return ap; } /** - * ata_sas_host_init - Initialize a host_set struct - * @host_set: host_set to initialize - * @dev: device host_set is attached to - * @flags: host_set flags - * @ops: port_ops + * ata_sas_host_init - Initialize a host struct + * @host: host to initialize + * @dev: device host is attached to + * @flags: host flags + * @ops: port_ops * * LOCKING: * PCI/etc. bus probe sem. * */ -void ata_host_set_init(struct ata_host_set *host_set, - struct device *dev, unsigned long flags, - const struct ata_port_operations *ops) +void ata_host_init(struct ata_host *host, struct device *dev, + unsigned long flags, const struct ata_port_operations *ops) { - spin_lock_init(&host_set->lock); - host_set->dev = dev; - host_set->flags = flags; - host_set->ops = ops; + spin_lock_init(&host->lock); + host->dev = dev; + host->flags = flags; + host->ops = ops; } /** @@ -5421,34 +5421,34 @@ int ata_device_add(const struct ata_probe_ent *ent) { unsigned int i; struct device *dev = ent->dev; - struct ata_host_set *host_set; + struct ata_host *host; int rc; DPRINTK("ENTER\n"); /* alloc a container for our list of ATA ports (buses) */ - host_set = kzalloc(sizeof(struct ata_host_set) + - (ent->n_ports * sizeof(void *)), GFP_KERNEL); - if (!host_set) + host = kzalloc(sizeof(struct ata_host) + + (ent->n_ports * sizeof(void *)), GFP_KERNEL); + if (!host) return 0; - ata_host_set_init(host_set, dev, ent->host_set_flags, ent->port_ops); - host_set->n_ports = ent->n_ports; - host_set->irq = ent->irq; - host_set->irq2 = ent->irq2; - host_set->mmio_base = ent->mmio_base; - host_set->private_data = ent->private_data; + ata_host_init(host, dev, ent->_host_flags, ent->port_ops); + host->n_ports = ent->n_ports; + host->irq = ent->irq; + host->irq2 = ent->irq2; + host->mmio_base = ent->mmio_base; + host->private_data = ent->private_data; /* register each port bound to this device */ - for (i = 0; i < host_set->n_ports; i++) { + for (i = 0; i < host->n_ports; i++) { struct ata_port *ap; unsigned long xfer_mode_mask; int irq_line = ent->irq; - ap = ata_port_add(ent, host_set, i); + ap = ata_port_add(ent, host, i); if (!ap) goto err_out; - host_set->ports[i] = ap; + host->ports[i] = ap; /* dummy? */ if (ent->dummy_port_mask & (1 << i)) { @@ -5460,8 +5460,8 @@ int ata_device_add(const struct ata_probe_ent *ent) /* start port */ rc = ap->ops->port_start(ap); if (rc) { - host_set->ports[i] = NULL; - scsi_host_put(ap->host); + host->ports[i] = NULL; + scsi_host_put(ap->scsi_host); goto err_out; } @@ -5484,13 +5484,13 @@ int ata_device_add(const struct ata_probe_ent *ent) irq_line); ata_chk_status(ap); - host_set->ops->irq_clear(ap); + host->ops->irq_clear(ap); ata_eh_freeze_port(ap); /* freeze port before requesting IRQ */ } /* obtain irq, that may be shared between channels */ rc = request_irq(ent->irq, ent->port_ops->irq_handler, ent->irq_flags, - DRV_NAME, host_set); + DRV_NAME, host); if (rc) { dev_printk(KERN_ERR, dev, "irq %lu request failed: %d\n", ent->irq, rc); @@ -5504,7 +5504,7 @@ int ata_device_add(const struct ata_probe_ent *ent) BUG_ON(ent->irq == ent->irq2); rc = request_irq(ent->irq2, ent->port_ops->irq_handler, ent->irq_flags, - DRV_NAME, host_set); + DRV_NAME, host); if (rc) { dev_printk(KERN_ERR, dev, "irq %lu request failed: %d\n", ent->irq2, rc); @@ -5514,8 +5514,8 @@ int ata_device_add(const struct ata_probe_ent *ent) /* perform each probe synchronously */ DPRINTK("probe begin\n"); - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; u32 scontrol; int rc; @@ -5526,7 +5526,7 @@ int ata_device_add(const struct ata_probe_ent *ent) } ap->sata_spd_limit = ap->hw_sata_spd_limit; - rc = scsi_add_host(ap->host, dev); + rc = scsi_add_host(ap->scsi_host, dev); if (rc) { ata_port_printk(ap, KERN_ERR, "scsi_add_host failed\n"); /* FIXME: do something useful here */ @@ -5574,29 +5574,29 @@ int ata_device_add(const struct ata_probe_ent *ent) /* probes are done, now scan each port's disk(s) */ DPRINTK("host probe begin\n"); - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; ata_scsi_scan_host(ap); } - dev_set_drvdata(dev, host_set); + dev_set_drvdata(dev, host); VPRINTK("EXIT, returning %u\n", ent->n_ports); return ent->n_ports; /* success */ err_out_free_irq: - free_irq(ent->irq, host_set); + free_irq(ent->irq, host); err_out: - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; if (ap) { ap->ops->port_stop(ap); - scsi_host_put(ap->host); + scsi_host_put(ap->scsi_host); } } - kfree(host_set); + kfree(host); VPRINTK("EXIT, returning 0\n"); return 0; } @@ -5656,12 +5656,12 @@ void ata_port_detach(struct ata_port *ap) skip_eh: /* remove the associated SCSI host */ - scsi_remove_host(ap->host); + scsi_remove_host(ap->scsi_host); } /** - * ata_host_set_remove - PCI layer callback for device removal - * @host_set: ATA host set that was removed + * ata_host_remove - PCI layer callback for device removal + * @host: ATA host set that was removed * * Unregister all objects associated with this host set. Free those * objects. @@ -5670,21 +5670,21 @@ void ata_port_detach(struct ata_port *ap) * Inherited from calling layer (may sleep). */ -void ata_host_set_remove(struct ata_host_set *host_set) +void ata_host_remove(struct ata_host *host) { unsigned int i; - for (i = 0; i < host_set->n_ports; i++) - ata_port_detach(host_set->ports[i]); + for (i = 0; i < host->n_ports; i++) + ata_port_detach(host->ports[i]); - free_irq(host_set->irq, host_set); - if (host_set->irq2) - free_irq(host_set->irq2, host_set); + free_irq(host->irq, host); + if (host->irq2) + free_irq(host->irq2, host); - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; - ata_scsi_release(ap->host); + ata_scsi_release(ap->scsi_host); if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) { struct ata_ioports *ioaddr = &ap->ioaddr; @@ -5696,13 +5696,13 @@ void ata_host_set_remove(struct ata_host_set *host_set) release_region(ATA_SECONDARY_CMD, 8); } - scsi_host_put(ap->host); + scsi_host_put(ap->scsi_host); } - if (host_set->ops->host_stop) - host_set->ops->host_stop(host_set); + if (host->ops->host_stop) + host->ops->host_stop(host); - kfree(host_set); + kfree(host); } /** @@ -5719,9 +5719,9 @@ void ata_host_set_remove(struct ata_host_set *host_set) * One. */ -int ata_scsi_release(struct Scsi_Host *host) +int ata_scsi_release(struct Scsi_Host *shost) { - struct ata_port *ap = ata_shost_to_port(host); + struct ata_port *ap = ata_shost_to_port(shost); DPRINTK("ENTER\n"); @@ -5748,7 +5748,7 @@ ata_probe_ent_alloc(struct device *dev, const struct ata_port_info *port) probe_ent->dev = dev; probe_ent->sht = port->sht; - probe_ent->host_flags = port->host_flags; + probe_ent->port_flags = port->flags; probe_ent->pio_mask = port->pio_mask; probe_ent->mwdma_mask = port->mwdma_mask; probe_ent->udma_mask = port->udma_mask; @@ -5786,11 +5786,11 @@ void ata_std_ports(struct ata_ioports *ioaddr) #ifdef CONFIG_PCI -void ata_pci_host_stop (struct ata_host_set *host_set) +void ata_pci_host_stop (struct ata_host *host) { - struct pci_dev *pdev = to_pci_dev(host_set->dev); + struct pci_dev *pdev = to_pci_dev(host->dev); - pci_iounmap(pdev, host_set->mmio_base); + pci_iounmap(pdev, host->mmio_base); } /** @@ -5810,9 +5810,9 @@ void ata_pci_host_stop (struct ata_host_set *host_set) void ata_pci_remove_one (struct pci_dev *pdev) { struct device *dev = pci_dev_to_dev(pdev); - struct ata_host_set *host_set = dev_get_drvdata(dev); + struct ata_host *host = dev_get_drvdata(dev); - ata_host_set_remove(host_set); + ata_host_remove(host); pci_release_regions(pdev); pci_disable_device(pdev); @@ -5873,10 +5873,10 @@ void ata_pci_device_do_resume(struct pci_dev *pdev) int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) { - struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + struct ata_host *host = dev_get_drvdata(&pdev->dev); int rc = 0; - rc = ata_host_set_suspend(host_set, mesg); + rc = ata_host_suspend(host, mesg); if (rc) return rc; @@ -5887,10 +5887,10 @@ int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) int ata_pci_device_resume(struct pci_dev *pdev) { - struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + struct ata_host *host = dev_get_drvdata(&pdev->dev); ata_pci_device_do_resume(pdev); - ata_host_set_resume(host_set); + ata_host_resume(host); return 0; } #endif /* CONFIG_PCI */ @@ -6035,10 +6035,10 @@ EXPORT_SYMBOL_GPL(sata_deb_timing_long); EXPORT_SYMBOL_GPL(ata_dummy_port_ops); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_std_ports); -EXPORT_SYMBOL_GPL(ata_host_set_init); +EXPORT_SYMBOL_GPL(ata_host_init); EXPORT_SYMBOL_GPL(ata_device_add); EXPORT_SYMBOL_GPL(ata_port_detach); -EXPORT_SYMBOL_GPL(ata_host_set_remove); +EXPORT_SYMBOL_GPL(ata_host_remove); EXPORT_SYMBOL_GPL(ata_sg_init); EXPORT_SYMBOL_GPL(ata_sg_init_one); EXPORT_SYMBOL_GPL(ata_hsm_move); @@ -6105,8 +6105,8 @@ EXPORT_SYMBOL_GPL(sata_scr_write); EXPORT_SYMBOL_GPL(sata_scr_write_flush); EXPORT_SYMBOL_GPL(ata_port_online); EXPORT_SYMBOL_GPL(ata_port_offline); -EXPORT_SYMBOL_GPL(ata_host_set_suspend); -EXPORT_SYMBOL_GPL(ata_host_set_resume); +EXPORT_SYMBOL_GPL(ata_host_suspend); +EXPORT_SYMBOL_GPL(ata_host_resume); EXPORT_SYMBOL_GPL(ata_id_string); EXPORT_SYMBOL_GPL(ata_id_c_string); EXPORT_SYMBOL_GPL(ata_scsi_simulate); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 2c476eee463f..b1b510493c2d 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -200,7 +200,7 @@ void ata_scsi_error(struct Scsi_Host *host) /* synchronize with port task */ ata_port_flush_task(ap); - /* synchronize with host_set lock and sort out timeouts */ + /* synchronize with host lock and sort out timeouts */ /* For new EH, all qcs are finished in one of three ways - * normal completion, error completion, and SCSI timeout. @@ -377,7 +377,7 @@ void ata_port_wait_eh(struct ata_port *ap) spin_unlock_irqrestore(ap->lock, flags); /* make sure SCSI EH is complete */ - if (scsi_host_in_recovery(ap->host)) { + if (scsi_host_in_recovery(ap->scsi_host)) { msleep(10); goto retry; } @@ -486,7 +486,7 @@ void ata_eng_timeout(struct ata_port *ap) * other commands are drained. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) { @@ -513,14 +513,14 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * all commands are drained. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_port_schedule_eh(struct ata_port *ap) { WARN_ON(!ap->ops->error_handler); ap->pflags |= ATA_PFLAG_EH_PENDING; - scsi_schedule_eh(ap->host); + scsi_schedule_eh(ap->scsi_host); DPRINTK("port EH scheduled\n"); } @@ -532,7 +532,7 @@ void ata_port_schedule_eh(struct ata_port *ap) * Abort all active qc's of @ap and schedule EH. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Number of aborted qc's. @@ -575,7 +575,7 @@ int ata_port_abort(struct ata_port *ap) * is frozen. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void __ata_port_freeze(struct ata_port *ap) { @@ -596,7 +596,7 @@ static void __ata_port_freeze(struct ata_port *ap) * Abort and freeze @ap. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Number of aborted commands. diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d168e3413661..3986ec8741b4 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -321,7 +321,7 @@ int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) * current command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Command allocated, or %NULL if none available. @@ -537,7 +537,7 @@ int ata_scsi_device_resume(struct scsi_device *sdev) * format sense blocks. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, u8 *asc, u8 *ascq, int verbose) @@ -649,7 +649,7 @@ void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, u8 *asc, * block. Clear sense key, ASC & ASCQ if there is no error. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_gen_ata_desc_sense(struct ata_queued_cmd *qc) { @@ -918,7 +918,7 @@ int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth) * [See SAT revision 5 at www.t10.org] * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. @@ -986,7 +986,7 @@ invalid_fld: * FLUSH CACHE EXT. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. @@ -1109,7 +1109,7 @@ static void scsi_16_lba_len(const u8 *scsicmd, u64 *plba, u32 *plen) * Converts SCSI VERIFY command to an ATA READ VERIFY command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. @@ -1233,7 +1233,7 @@ nothing_to_do: * %WRITE_16 are currently supported. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. @@ -1467,7 +1467,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) * issued to @dev. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * 1 if deferring is needed, 0 otherwise. @@ -1510,7 +1510,7 @@ static int ata_scmd_need_defer(struct ata_device *dev, int is_io) * termination. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * 0 on success, SCSI_ML_QUEUE_DEVICE_BUSY if the command @@ -1589,7 +1589,7 @@ defer: * Maps buffer contained within SCSI command @cmd. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Length of response buffer. @@ -1623,7 +1623,7 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out) * Unmaps response buffer contained within @cmd. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf) @@ -1649,7 +1649,7 @@ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf) * and sense buffer are assumed to be set). * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_scsi_rbuf_fill(struct ata_scsi_args *args, @@ -1680,7 +1680,7 @@ void ata_scsi_rbuf_fill(struct ata_scsi_args *args, * with non-VPD INQUIRY command output. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, @@ -1736,7 +1736,7 @@ unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, * Returns list of inquiry VPD pages available. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf, @@ -1764,7 +1764,7 @@ unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf, * Returns ATA device serial number. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, @@ -1797,7 +1797,7 @@ unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, * name ("ATA "), model and serial numbers. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf, @@ -1849,7 +1849,7 @@ unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf, * that the caller should successfully complete this SCSI command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_noop(struct ata_scsi_args *args, u8 *rbuf, @@ -1990,7 +1990,7 @@ static int ata_dev_supports_fua(u16 *id) * descriptor for other device types. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, @@ -2129,7 +2129,7 @@ saving_not_supp: * Simulate READ CAPACITY commands. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf, @@ -2204,7 +2204,7 @@ unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf, * Simulate REPORT LUNS command. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf, @@ -2256,7 +2256,7 @@ void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) * and the specified additional sense codes. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq) @@ -2421,7 +2421,7 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc) * @scsicmd: SCSI CDB associated with this PACKET command * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on failure. @@ -2500,7 +2500,7 @@ static struct ata_device * __ata_scsi_find_dev(struct ata_port *ap, * Determine if commands should be sent to the specified device. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * 0 if commands are not allowed / 1 if commands are allowed @@ -2534,7 +2534,7 @@ static int ata_scsi_dev_enabled(struct ata_device *dev) * SCSI command to be sent. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * Associated ATA device, or %NULL if not found. @@ -2808,7 +2808,7 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *cmd, * ATA and ATAPI devices appearing as SCSI devices. * * LOCKING: - * Releases scsi-layer-held lock, and obtains host_set lock. + * Releases scsi-layer-held lock, and obtains host lock. * * RETURNS: * Return value from __ata_scsi_queuecmd() if @cmd can be queued, @@ -2852,7 +2852,7 @@ int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) * that can be handled internally. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd, @@ -2944,7 +2944,7 @@ void ata_scsi_scan_host(struct ata_port *ap) if (!ata_dev_enabled(dev) || dev->sdev) continue; - sdev = __scsi_add_device(ap->host, 0, i, 0, NULL); + sdev = __scsi_add_device(ap->scsi_host, 0, i, 0, NULL); if (!IS_ERR(sdev)) { dev->sdev = sdev; scsi_device_put(sdev); @@ -2958,11 +2958,11 @@ void ata_scsi_scan_host(struct ata_port *ap) * * This function is called from ata_eh_hotplug() and responsible * for taking the SCSI device attached to @dev offline. This - * function is called with host_set lock which protects dev->sdev + * function is called with host lock which protects dev->sdev * against clearing. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) * * RETURNS: * 1 if attached SCSI device exists, 0 otherwise. @@ -2998,16 +2998,16 @@ static void ata_scsi_remove_dev(struct ata_device *dev) * be removed if there is __scsi_device_get() interface which * increments reference counts regardless of device state. */ - mutex_lock(&ap->host->scan_mutex); + mutex_lock(&ap->scsi_host->scan_mutex); spin_lock_irqsave(ap->lock, flags); - /* clearing dev->sdev is protected by host_set lock */ + /* clearing dev->sdev is protected by host lock */ sdev = dev->sdev; dev->sdev = NULL; if (sdev) { /* If user initiated unplug races with us, sdev can go - * away underneath us after the host_set lock and + * away underneath us after the host lock and * scan_mutex are released. Hold onto it. */ if (scsi_device_get(sdev) == 0) { @@ -3024,7 +3024,7 @@ static void ata_scsi_remove_dev(struct ata_device *dev) } spin_unlock_irqrestore(ap->lock, flags); - mutex_unlock(&ap->host->scan_mutex); + mutex_unlock(&ap->scsi_host->scan_mutex); if (sdev) { ata_dev_printk(dev, KERN_INFO, "detaching (SCSI %s)\n", @@ -3176,7 +3176,7 @@ void ata_scsi_dev_rescan(void *data) * ata_sas_port_alloc - Allocate port for a SAS attached SATA device * @pdev: PCI device that the scsi device is attached to * @port_info: Information from low-level host driver - * @host: SCSI host that the scsi device is attached to + * @shost: SCSI host that the scsi device is attached to * * LOCKING: * PCI/etc. bus probe sem. @@ -3185,9 +3185,9 @@ void ata_scsi_dev_rescan(void *data) * ata_port pointer on success / NULL on failure. */ -struct ata_port *ata_sas_port_alloc(struct ata_host_set *host_set, +struct ata_port *ata_sas_port_alloc(struct ata_host *host, struct ata_port_info *port_info, - struct Scsi_Host *host) + struct Scsi_Host *shost) { struct ata_port *ap = kzalloc(sizeof(*ap), GFP_KERNEL); struct ata_probe_ent *ent; @@ -3195,14 +3195,14 @@ struct ata_port *ata_sas_port_alloc(struct ata_host_set *host_set, if (!ap) return NULL; - ent = ata_probe_ent_alloc(host_set->dev, port_info); + ent = ata_probe_ent_alloc(host->dev, port_info); if (!ent) { kfree(ap); return NULL; } - ata_port_init(ap, host_set, ent, 0); - ap->lock = host->host_lock; + ata_port_init(ap, host, ent, 0); + ap->lock = shost->host_lock; kfree(ent); return ap; } diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index d4a4f82360ec..a5ecb71390a9 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -69,7 +69,7 @@ extern int ata_flush_cache(struct ata_device *dev); extern void ata_dev_init(struct ata_device *dev); extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg); extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg); -extern void ata_port_init(struct ata_port *ap, struct ata_host_set *host_set, +extern void ata_port_init(struct ata_port *ap, struct ata_host *host, const struct ata_probe_ent *ent, unsigned int port_no); extern struct ata_probe_ent *ata_probe_ent_alloc(struct device *dev, const struct ata_port_info *port); diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c index 61d2aa697b4d..912211ada816 100644 --- a/drivers/ata/pdc_adma.c +++ b/drivers/ata/pdc_adma.c @@ -127,7 +127,7 @@ static int adma_ata_init_one (struct pci_dev *pdev, static irqreturn_t adma_intr (int irq, void *dev_instance, struct pt_regs *regs); static int adma_port_start(struct ata_port *ap); -static void adma_host_stop(struct ata_host_set *host_set); +static void adma_host_stop(struct ata_host *host); static void adma_port_stop(struct ata_port *ap); static void adma_phy_reset(struct ata_port *ap); static void adma_qc_prep(struct ata_queued_cmd *qc); @@ -182,7 +182,7 @@ static struct ata_port_info adma_port_info[] = { /* board_1841_idx */ { .sht = &adma_ata_sht, - .host_flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING, .pio_mask = 0x10, /* pio4 */ @@ -237,7 +237,7 @@ static void adma_reset_engine(void __iomem *chan) static void adma_reinit_engine(struct ata_port *ap) { struct adma_port_priv *pp = ap->private_data; - void __iomem *mmio_base = ap->host_set->mmio_base; + void __iomem *mmio_base = ap->host->mmio_base; void __iomem *chan = ADMA_REGS(mmio_base, ap->port_no); /* mask/clear ATA interrupts */ @@ -265,7 +265,7 @@ static void adma_reinit_engine(struct ata_port *ap) static inline void adma_enter_reg_mode(struct ata_port *ap) { - void __iomem *chan = ADMA_REGS(ap->host_set->mmio_base, ap->port_no); + void __iomem *chan = ADMA_REGS(ap->host->mmio_base, ap->port_no); writew(aPIOMD4, chan + ADMA_CONTROL); readb(chan + ADMA_STATUS); /* flush */ @@ -412,7 +412,7 @@ static void adma_qc_prep(struct ata_queued_cmd *qc) static inline void adma_packet_start(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - void __iomem *chan = ADMA_REGS(ap->host_set->mmio_base, ap->port_no); + void __iomem *chan = ADMA_REGS(ap->host->mmio_base, ap->port_no); VPRINTK("ENTER, ap %p\n", ap); @@ -442,13 +442,13 @@ static unsigned int adma_qc_issue(struct ata_queued_cmd *qc) return ata_qc_issue_prot(qc); } -static inline unsigned int adma_intr_pkt(struct ata_host_set *host_set) +static inline unsigned int adma_intr_pkt(struct ata_host *host) { unsigned int handled = 0, port_no; - u8 __iomem *mmio_base = host_set->mmio_base; + u8 __iomem *mmio_base = host->mmio_base; - for (port_no = 0; port_no < host_set->n_ports; ++port_no) { - struct ata_port *ap = host_set->ports[port_no]; + for (port_no = 0; port_no < host->n_ports; ++port_no) { + struct ata_port *ap = host->ports[port_no]; struct adma_port_priv *pp; struct ata_queued_cmd *qc; void __iomem *chan = ADMA_REGS(mmio_base, port_no); @@ -476,13 +476,13 @@ static inline unsigned int adma_intr_pkt(struct ata_host_set *host_set) return handled; } -static inline unsigned int adma_intr_mmio(struct ata_host_set *host_set) +static inline unsigned int adma_intr_mmio(struct ata_host *host) { unsigned int handled = 0, port_no; - for (port_no = 0; port_no < host_set->n_ports; ++port_no) { + for (port_no = 0; port_no < host->n_ports; ++port_no) { struct ata_port *ap; - ap = host_set->ports[port_no]; + ap = host->ports[port_no]; if (ap && (!(ap->flags & ATA_FLAG_DISABLED))) { struct ata_queued_cmd *qc; struct adma_port_priv *pp = ap->private_data; @@ -511,14 +511,14 @@ static inline unsigned int adma_intr_mmio(struct ata_host_set *host_set) static irqreturn_t adma_intr(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; unsigned int handled = 0; VPRINTK("ENTER\n"); - spin_lock(&host_set->lock); - handled = adma_intr_pkt(host_set) | adma_intr_mmio(host_set); - spin_unlock(&host_set->lock); + spin_lock(&host->lock); + handled = adma_intr_pkt(host) | adma_intr_mmio(host); + spin_unlock(&host->lock); VPRINTK("EXIT\n"); @@ -544,7 +544,7 @@ static void adma_ata_setup_port(struct ata_ioports *port, unsigned long base) static int adma_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct adma_port_priv *pp; int rc; @@ -582,10 +582,10 @@ err_out: static void adma_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct adma_port_priv *pp = ap->private_data; - adma_reset_engine(ADMA_REGS(ap->host_set->mmio_base, ap->port_no)); + adma_reset_engine(ADMA_REGS(ap->host->mmio_base, ap->port_no)); if (pp != NULL) { ap->private_data = NULL; if (pp->pkt != NULL) @@ -596,14 +596,14 @@ static void adma_port_stop(struct ata_port *ap) ata_port_stop(ap); } -static void adma_host_stop(struct ata_host_set *host_set) +static void adma_host_stop(struct ata_host *host) { unsigned int port_no; for (port_no = 0; port_no < ADMA_PORTS; ++port_no) - adma_reset_engine(ADMA_REGS(host_set->mmio_base, port_no)); + adma_reset_engine(ADMA_REGS(host->mmio_base, port_no)); - ata_pci_host_stop(host_set); + ata_pci_host_stop(host); } static void adma_host_init(unsigned int chip_id, @@ -684,7 +684,7 @@ static int adma_ata_init_one(struct pci_dev *pdev, INIT_LIST_HEAD(&probe_ent->node); probe_ent->sht = adma_port_info[board_idx].sht; - probe_ent->host_flags = adma_port_info[board_idx].host_flags; + probe_ent->port_flags = adma_port_info[board_idx].flags; probe_ent->pio_mask = adma_port_info[board_idx].pio_mask; probe_ent->mwdma_mask = adma_port_info[board_idx].mwdma_mask; probe_ent->udma_mask = adma_port_info[board_idx].udma_mask; diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index a2915a56accd..34f1939b44c9 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -342,7 +342,7 @@ static u32 mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in); static void mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val); static void mv_phy_reset(struct ata_port *ap); static void __mv_phy_reset(struct ata_port *ap, int can_sleep); -static void mv_host_stop(struct ata_host_set *host_set); +static void mv_host_stop(struct ata_host *host); static int mv_port_start(struct ata_port *ap); static void mv_port_stop(struct ata_port *ap); static void mv_qc_prep(struct ata_queued_cmd *qc); @@ -480,35 +480,35 @@ static const struct ata_port_operations mv_iie_ops = { static const struct ata_port_info mv_port_info[] = { { /* chip_504x */ .sht = &mv_sht, - .host_flags = MV_COMMON_FLAGS, + .flags = MV_COMMON_FLAGS, .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &mv5_ops, }, { /* chip_508x */ .sht = &mv_sht, - .host_flags = (MV_COMMON_FLAGS | MV_FLAG_DUAL_HC), + .flags = (MV_COMMON_FLAGS | MV_FLAG_DUAL_HC), .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &mv5_ops, }, { /* chip_5080 */ .sht = &mv_sht, - .host_flags = (MV_COMMON_FLAGS | MV_FLAG_DUAL_HC), + .flags = (MV_COMMON_FLAGS | MV_FLAG_DUAL_HC), .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &mv5_ops, }, { /* chip_604x */ .sht = &mv_sht, - .host_flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS), + .flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS), .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &mv6_ops, }, { /* chip_608x */ .sht = &mv_sht, - .host_flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS | + .flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS | MV_FLAG_DUAL_HC), .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ @@ -516,14 +516,14 @@ static const struct ata_port_info mv_port_info[] = { }, { /* chip_6042 */ .sht = &mv_sht, - .host_flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS), + .flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS), .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &mv_iie_ops, }, { /* chip_7042 */ .sht = &mv_sht, - .host_flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS | + .flags = (MV_COMMON_FLAGS | MV_6XXX_FLAGS | MV_FLAG_DUAL_HC), .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ @@ -618,12 +618,12 @@ static inline void __iomem *mv_port_base(void __iomem *base, unsigned int port) static inline void __iomem *mv_ap_base(struct ata_port *ap) { - return mv_port_base(ap->host_set->mmio_base, ap->port_no); + return mv_port_base(ap->host->mmio_base, ap->port_no); } -static inline int mv_get_hc_count(unsigned long host_flags) +static inline int mv_get_hc_count(unsigned long port_flags) { - return ((host_flags & MV_FLAG_DUAL_HC) ? 2 : 1); + return ((port_flags & MV_FLAG_DUAL_HC) ? 2 : 1); } static void mv_irq_clear(struct ata_port *ap) @@ -809,7 +809,7 @@ static void mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val) /** * mv_host_stop - Host specific cleanup/stop routine. - * @host_set: host data structure + * @host: host data structure * * Disable ints, cleanup host memory, call general purpose * host_stop. @@ -817,10 +817,10 @@ static void mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val) * LOCKING: * Inherited from caller. */ -static void mv_host_stop(struct ata_host_set *host_set) +static void mv_host_stop(struct ata_host *host) { - struct mv_host_priv *hpriv = host_set->private_data; - struct pci_dev *pdev = to_pci_dev(host_set->dev); + struct mv_host_priv *hpriv = host->private_data; + struct pci_dev *pdev = to_pci_dev(host->dev); if (hpriv->hp_flags & MV_HP_FLAG_MSI) { pci_disable_msi(pdev); @@ -828,7 +828,7 @@ static void mv_host_stop(struct ata_host_set *host_set) pci_intx(pdev, 0); } kfree(hpriv); - ata_host_stop(host_set); + ata_host_stop(host); } static inline void mv_priv_free(struct mv_port_priv *pp, struct device *dev) @@ -875,8 +875,8 @@ static void mv_edma_cfg(struct mv_host_priv *hpriv, void __iomem *port_mmio) */ static int mv_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; - struct mv_host_priv *hpriv = ap->host_set->private_data; + struct device *dev = ap->host->dev; + struct mv_host_priv *hpriv = ap->host->private_data; struct mv_port_priv *pp; void __iomem *port_mmio = mv_ap_base(ap); void *mem; @@ -965,17 +965,17 @@ err_out: * Stop DMA, cleanup port memory. * * LOCKING: - * This routine uses the host_set lock to protect the DMA stop. + * This routine uses the host lock to protect the DMA stop. */ static void mv_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct mv_port_priv *pp = ap->private_data; unsigned long flags; - spin_lock_irqsave(&ap->host_set->lock, flags); + spin_lock_irqsave(&ap->host->lock, flags); mv_stop_dma(ap); - spin_unlock_irqrestore(&ap->host_set->lock, flags); + spin_unlock_irqrestore(&ap->host->lock, flags); ap->private_data = NULL; ata_pad_free(ap, dev); @@ -1330,7 +1330,7 @@ static void mv_err_intr(struct ata_port *ap, int reset_allowed) /** * mv_host_intr - Handle all interrupts on the given host controller - * @host_set: host specific structure + * @host: host specific structure * @relevant: port error bits relevant to this host controller * @hc: which host controller we're to look at * @@ -1344,10 +1344,9 @@ static void mv_err_intr(struct ata_port *ap, int reset_allowed) * LOCKING: * Inherited from caller. */ -static void mv_host_intr(struct ata_host_set *host_set, u32 relevant, - unsigned int hc) +static void mv_host_intr(struct ata_host *host, u32 relevant, unsigned int hc) { - void __iomem *mmio = host_set->mmio_base; + void __iomem *mmio = host->mmio_base; void __iomem *hc_mmio = mv_hc_base(mmio, hc); struct ata_queued_cmd *qc; u32 hc_irq_cause; @@ -1371,7 +1370,7 @@ static void mv_host_intr(struct ata_host_set *host_set, u32 relevant, for (port = port0; port < port0 + MV_PORTS_PER_HC; port++) { u8 ata_status = 0; - struct ata_port *ap = host_set->ports[port]; + struct ata_port *ap = host->ports[port]; struct mv_port_priv *pp = ap->private_data; hard_port = mv_hardport_from_port(port); /* range 0..3 */ @@ -1444,15 +1443,15 @@ static void mv_host_intr(struct ata_host_set *host_set, u32 relevant, * reported here. * * LOCKING: - * This routine holds the host_set lock while processing pending + * This routine holds the host lock while processing pending * interrupts. */ static irqreturn_t mv_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; unsigned int hc, handled = 0, n_hcs; - void __iomem *mmio = host_set->mmio_base; + void __iomem *mmio = host->mmio_base; struct mv_host_priv *hpriv; u32 irq_stat; @@ -1465,18 +1464,18 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance, return IRQ_NONE; } - n_hcs = mv_get_hc_count(host_set->ports[0]->flags); - spin_lock(&host_set->lock); + n_hcs = mv_get_hc_count(host->ports[0]->flags); + spin_lock(&host->lock); for (hc = 0; hc < n_hcs; hc++) { u32 relevant = irq_stat & (HC0_IRQ_PEND << (hc * HC_SHIFT)); if (relevant) { - mv_host_intr(host_set, relevant, hc); + mv_host_intr(host, relevant, hc); handled++; } } - hpriv = host_set->private_data; + hpriv = host->private_data; if (IS_60XX(hpriv)) { /* deal with the interrupt coalescing bits */ if (irq_stat & (TRAN_LO_DONE | TRAN_HI_DONE | PORTS_0_7_COAL_DONE)) { @@ -1491,12 +1490,12 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance, readl(mmio + PCI_IRQ_CAUSE_OFS)); DPRINTK("All regs @ PCI error\n"); - mv_dump_all_regs(mmio, -1, to_pci_dev(host_set->dev)); + mv_dump_all_regs(mmio, -1, to_pci_dev(host->dev)); writelfl(0, mmio + PCI_IRQ_CAUSE_OFS); handled++; } - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); return IRQ_RETVAL(handled); } @@ -1528,7 +1527,7 @@ static unsigned int mv5_scr_offset(unsigned int sc_reg_in) static u32 mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in) { - void __iomem *mmio = mv5_phy_base(ap->host_set->mmio_base, ap->port_no); + void __iomem *mmio = mv5_phy_base(ap->host->mmio_base, ap->port_no); unsigned int ofs = mv5_scr_offset(sc_reg_in); if (ofs != 0xffffffffU) @@ -1539,7 +1538,7 @@ static u32 mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in) static void mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val) { - void __iomem *mmio = mv5_phy_base(ap->host_set->mmio_base, ap->port_no); + void __iomem *mmio = mv5_phy_base(ap->host->mmio_base, ap->port_no); unsigned int ofs = mv5_scr_offset(sc_reg_in); if (ofs != 0xffffffffU) @@ -1904,8 +1903,8 @@ static void mv_channel_reset(struct mv_host_priv *hpriv, void __iomem *mmio, static void mv_stop_and_reset(struct ata_port *ap) { - struct mv_host_priv *hpriv = ap->host_set->private_data; - void __iomem *mmio = ap->host_set->mmio_base; + struct mv_host_priv *hpriv = ap->host->private_data; + void __iomem *mmio = ap->host->mmio_base; mv_stop_dma(ap); @@ -1936,7 +1935,7 @@ static inline void __msleep(unsigned int msec, int can_sleep) static void __mv_phy_reset(struct ata_port *ap, int can_sleep) { struct mv_port_priv *pp = ap->private_data; - struct mv_host_priv *hpriv = ap->host_set->private_data; + struct mv_host_priv *hpriv = ap->host->private_data; void __iomem *port_mmio = mv_ap_base(ap); struct ata_taskfile tf; struct ata_device *dev = &ap->device[0]; @@ -2035,7 +2034,7 @@ static void mv_phy_reset(struct ata_port *ap) * chip/bus, fail the command, and move on. * * LOCKING: - * This routine holds the host_set lock while failing the command. + * This routine holds the host lock while failing the command. */ static void mv_eng_timeout(struct ata_port *ap) { @@ -2044,18 +2043,17 @@ static void mv_eng_timeout(struct ata_port *ap) ata_port_printk(ap, KERN_ERR, "Entering mv_eng_timeout\n"); DPRINTK("All regs @ start of eng_timeout\n"); - mv_dump_all_regs(ap->host_set->mmio_base, ap->port_no, - to_pci_dev(ap->host_set->dev)); + mv_dump_all_regs(ap->host->mmio_base, ap->port_no, + to_pci_dev(ap->host->dev)); qc = ata_qc_from_tag(ap, ap->active_tag); printk(KERN_ERR "mmio_base %p ap %p qc %p scsi_cmnd %p &cmnd %p\n", - ap->host_set->mmio_base, ap, qc, qc->scsicmd, - &qc->scsicmd->cmnd); + ap->host->mmio_base, ap, qc, qc->scsicmd, &qc->scsicmd->cmnd); - spin_lock_irqsave(&ap->host_set->lock, flags); + spin_lock_irqsave(&ap->host->lock, flags); mv_err_intr(ap, 0); mv_stop_and_reset(ap); - spin_unlock_irqrestore(&ap->host_set->lock, flags); + spin_unlock_irqrestore(&ap->host->lock, flags); WARN_ON(!(qc->flags & ATA_QCFLAG_ACTIVE)); if (qc->flags & ATA_QCFLAG_ACTIVE) { @@ -2236,7 +2234,7 @@ static int mv_init_host(struct pci_dev *pdev, struct ata_probe_ent *probe_ent, if (rc) goto done; - n_hc = mv_get_hc_count(probe_ent->host_flags); + n_hc = mv_get_hc_count(probe_ent->port_flags); probe_ent->n_ports = MV_PORTS_PER_HC * n_hc; for (port = 0; port < probe_ent->n_ports; port++) @@ -2389,7 +2387,7 @@ static int mv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) memset(hpriv, 0, sizeof(*hpriv)); probe_ent->sht = mv_port_info[board_idx].sht; - probe_ent->host_flags = mv_port_info[board_idx].host_flags; + probe_ent->port_flags = mv_port_info[board_idx].flags; probe_ent->pio_mask = mv_port_info[board_idx].pio_mask; probe_ent->udma_mask = mv_port_info[board_idx].udma_mask; probe_ent->port_ops = mv_port_info[board_idx].port_ops; diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index be46df75ab5a..27c22feebf30 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -81,7 +81,7 @@ enum { }; static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); -static void nv_ck804_host_stop(struct ata_host_set *host_set); +static void nv_ck804_host_stop(struct ata_host *host); static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance, struct pt_regs *regs); static irqreturn_t nv_nf2_interrupt(int irq, void *dev_instance, @@ -257,7 +257,7 @@ static struct ata_port_info nv_port_info[] = { /* generic */ { .sht = &nv_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, .pio_mask = NV_PIO_MASK, .mwdma_mask = NV_MWDMA_MASK, .udma_mask = NV_UDMA_MASK, @@ -266,7 +266,7 @@ static struct ata_port_info nv_port_info[] = { /* nforce2/3 */ { .sht = &nv_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, .pio_mask = NV_PIO_MASK, .mwdma_mask = NV_MWDMA_MASK, .udma_mask = NV_UDMA_MASK, @@ -275,7 +275,7 @@ static struct ata_port_info nv_port_info[] = { /* ck804 */ { .sht = &nv_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, .pio_mask = NV_PIO_MASK, .mwdma_mask = NV_MWDMA_MASK, .udma_mask = NV_UDMA_MASK, @@ -292,17 +292,17 @@ MODULE_VERSION(DRV_VERSION); static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; unsigned int i; unsigned int handled = 0; unsigned long flags; - spin_lock_irqsave(&host_set->lock, flags); + spin_lock_irqsave(&host->lock, flags); - for (i = 0; i < host_set->n_ports; i++) { + for (i = 0; i < host->n_ports; i++) { struct ata_port *ap; - ap = host_set->ports[i]; + ap = host->ports[i]; if (ap && !(ap->flags & ATA_FLAG_DISABLED)) { struct ata_queued_cmd *qc; @@ -318,7 +318,7 @@ static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance, } - spin_unlock_irqrestore(&host_set->lock, flags); + spin_unlock_irqrestore(&host->lock, flags); return IRQ_RETVAL(handled); } @@ -354,12 +354,12 @@ static int nv_host_intr(struct ata_port *ap, u8 irq_stat) return 1; } -static irqreturn_t nv_do_interrupt(struct ata_host_set *host_set, u8 irq_stat) +static irqreturn_t nv_do_interrupt(struct ata_host *host, u8 irq_stat) { int i, handled = 0; - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; if (ap && !(ap->flags & ATA_FLAG_DISABLED)) handled += nv_host_intr(ap, irq_stat); @@ -373,14 +373,14 @@ static irqreturn_t nv_do_interrupt(struct ata_host_set *host_set, u8 irq_stat) static irqreturn_t nv_nf2_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; u8 irq_stat; irqreturn_t ret; - spin_lock(&host_set->lock); - irq_stat = inb(host_set->ports[0]->ioaddr.scr_addr + NV_INT_STATUS); - ret = nv_do_interrupt(host_set, irq_stat); - spin_unlock(&host_set->lock); + spin_lock(&host->lock); + irq_stat = inb(host->ports[0]->ioaddr.scr_addr + NV_INT_STATUS); + ret = nv_do_interrupt(host, irq_stat); + spin_unlock(&host->lock); return ret; } @@ -388,14 +388,14 @@ static irqreturn_t nv_nf2_interrupt(int irq, void *dev_instance, static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; u8 irq_stat; irqreturn_t ret; - spin_lock(&host_set->lock); - irq_stat = readb(host_set->mmio_base + NV_INT_STATUS_CK804); - ret = nv_do_interrupt(host_set, irq_stat); - spin_unlock(&host_set->lock); + spin_lock(&host->lock); + irq_stat = readb(host->mmio_base + NV_INT_STATUS_CK804); + ret = nv_do_interrupt(host, irq_stat); + spin_unlock(&host->lock); return ret; } @@ -418,7 +418,7 @@ static void nv_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val) static void nv_nf2_freeze(struct ata_port *ap) { - unsigned long scr_addr = ap->host_set->ports[0]->ioaddr.scr_addr; + unsigned long scr_addr = ap->host->ports[0]->ioaddr.scr_addr; int shift = ap->port_no * NV_INT_PORT_SHIFT; u8 mask; @@ -429,7 +429,7 @@ static void nv_nf2_freeze(struct ata_port *ap) static void nv_nf2_thaw(struct ata_port *ap) { - unsigned long scr_addr = ap->host_set->ports[0]->ioaddr.scr_addr; + unsigned long scr_addr = ap->host->ports[0]->ioaddr.scr_addr; int shift = ap->port_no * NV_INT_PORT_SHIFT; u8 mask; @@ -442,7 +442,7 @@ static void nv_nf2_thaw(struct ata_port *ap) static void nv_ck804_freeze(struct ata_port *ap) { - void __iomem *mmio_base = ap->host_set->mmio_base; + void __iomem *mmio_base = ap->host->mmio_base; int shift = ap->port_no * NV_INT_PORT_SHIFT; u8 mask; @@ -453,7 +453,7 @@ static void nv_ck804_freeze(struct ata_port *ap) static void nv_ck804_thaw(struct ata_port *ap) { - void __iomem *mmio_base = ap->host_set->mmio_base; + void __iomem *mmio_base = ap->host->mmio_base; int shift = ap->port_no * NV_INT_PORT_SHIFT; u8 mask; @@ -568,9 +568,9 @@ err_out: return rc; } -static void nv_ck804_host_stop(struct ata_host_set *host_set) +static void nv_ck804_host_stop(struct ata_host *host) { - struct pci_dev *pdev = to_pci_dev(host_set->dev); + struct pci_dev *pdev = to_pci_dev(host->dev); u8 regval; /* disable SATA space for CK804 */ @@ -578,7 +578,7 @@ static void nv_ck804_host_stop(struct ata_host_set *host_set) regval &= ~NV_MCP_SATA_CFG_20_SATA_SPACE_EN; pci_write_config_byte(pdev, NV_MCP_SATA_CFG_20, regval); - ata_pci_host_stop(host_set); + ata_pci_host_stop(host); } static int __init nv_init(void) diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index a5b3a7db7a9f..d627812ea73d 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -104,7 +104,7 @@ static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf) static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf); static void pdc_irq_clear(struct ata_port *ap); static unsigned int pdc_qc_issue_prot(struct ata_queued_cmd *qc); -static void pdc_host_stop(struct ata_host_set *host_set); +static void pdc_host_stop(struct ata_host *host); static struct scsi_host_template pdc_ata_sht = { @@ -175,7 +175,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_2037x */ { .sht = &pdc_ata_sht, - .host_flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ @@ -185,7 +185,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_20319 */ { .sht = &pdc_ata_sht, - .host_flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ @@ -195,7 +195,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_20619 */ { .sht = &pdc_ata_sht, - .host_flags = PDC_COMMON_FLAGS | ATA_FLAG_SLAVE_POSS, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SLAVE_POSS, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ @@ -205,7 +205,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_20771 */ { .sht = &pdc_ata_sht, - .host_flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ @@ -215,7 +215,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_2057x */ { .sht = &pdc_ata_sht, - .host_flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ @@ -225,7 +225,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_40518 */ { .sht = &pdc_ata_sht, - .host_flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ @@ -292,7 +292,7 @@ static struct pci_driver pdc_ata_pci_driver = { static int pdc_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct pdc_port_priv *pp; int rc; @@ -326,7 +326,7 @@ err_out: static void pdc_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct pdc_port_priv *pp = ap->private_data; ap->private_data = NULL; @@ -336,11 +336,11 @@ static void pdc_port_stop(struct ata_port *ap) } -static void pdc_host_stop(struct ata_host_set *host_set) +static void pdc_host_stop(struct ata_host *host) { - struct pdc_host_priv *hp = host_set->private_data; + struct pdc_host_priv *hp = host->private_data; - ata_pci_host_stop(host_set); + ata_pci_host_stop(host); kfree(hp); } @@ -443,14 +443,14 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc) static void pdc_eng_timeout(struct ata_port *ap) { - struct ata_host_set *host_set = ap->host_set; + struct ata_host *host = ap->host; u8 drv_stat; struct ata_queued_cmd *qc; unsigned long flags; DPRINTK("ENTER\n"); - spin_lock_irqsave(&host_set->lock, flags); + spin_lock_irqsave(&host->lock, flags); qc = ata_qc_from_tag(ap, ap->active_tag); @@ -473,7 +473,7 @@ static void pdc_eng_timeout(struct ata_port *ap) break; } - spin_unlock_irqrestore(&host_set->lock, flags); + spin_unlock_irqrestore(&host->lock, flags); ata_eh_qc_complete(qc); DPRINTK("EXIT\n"); } @@ -509,15 +509,15 @@ static inline unsigned int pdc_host_intr( struct ata_port *ap, static void pdc_irq_clear(struct ata_port *ap) { - struct ata_host_set *host_set = ap->host_set; - void __iomem *mmio = host_set->mmio_base; + struct ata_host *host = ap->host; + void __iomem *mmio = host->mmio_base; readl(mmio + PDC_INT_SEQMASK); } static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; struct ata_port *ap; u32 mask = 0; unsigned int i, tmp; @@ -526,12 +526,12 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r VPRINTK("ENTER\n"); - if (!host_set || !host_set->mmio_base) { + if (!host || !host->mmio_base) { VPRINTK("QUICK EXIT\n"); return IRQ_NONE; } - mmio_base = host_set->mmio_base; + mmio_base = host->mmio_base; /* reading should also clear interrupts */ mask = readl(mmio_base + PDC_INT_SEQMASK); @@ -541,7 +541,7 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r return IRQ_NONE; } - spin_lock(&host_set->lock); + spin_lock(&host->lock); mask &= 0xffff; /* only 16 tags possible */ if (!mask) { @@ -551,9 +551,9 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r writel(mask, mmio_base + PDC_INT_SEQMASK); - for (i = 0; i < host_set->n_ports; i++) { + for (i = 0; i < host->n_ports; i++) { VPRINTK("port %u\n", i); - ap = host_set->ports[i]; + ap = host->ports[i]; tmp = mask & (1 << (i + 1)); if (tmp && ap && !(ap->flags & ATA_FLAG_DISABLED)) { @@ -568,7 +568,7 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r VPRINTK("EXIT\n"); done_irq: - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); return IRQ_RETVAL(handled); } @@ -581,8 +581,8 @@ static inline void pdc_packet_start(struct ata_queued_cmd *qc) VPRINTK("ENTER, ap %p\n", ap); - writel(0x00000001, ap->host_set->mmio_base + (seq * 4)); - readl(ap->host_set->mmio_base + (seq * 4)); /* flush */ + writel(0x00000001, ap->host->mmio_base + (seq * 4)); + readl(ap->host->mmio_base + (seq * 4)); /* flush */ pp->pkt[2] = seq; wmb(); /* flush PRD, pkt writes */ @@ -743,7 +743,7 @@ static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *e probe_ent->private_data = hp; probe_ent->sht = pdc_port_info[board_idx].sht; - probe_ent->host_flags = pdc_port_info[board_idx].host_flags; + probe_ent->port_flags = pdc_port_info[board_idx].flags; probe_ent->pio_mask = pdc_port_info[board_idx].pio_mask; probe_ent->mwdma_mask = pdc_port_info[board_idx].mwdma_mask; probe_ent->udma_mask = pdc_port_info[board_idx].udma_mask; diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c index 71bd6712b377..fa29dfe2a7b5 100644 --- a/drivers/ata/sata_qstor.c +++ b/drivers/ata/sata_qstor.c @@ -116,7 +116,7 @@ static void qs_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val); static int qs_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); static irqreturn_t qs_intr (int irq, void *dev_instance, struct pt_regs *regs); static int qs_port_start(struct ata_port *ap); -static void qs_host_stop(struct ata_host_set *host_set); +static void qs_host_stop(struct ata_host *host); static void qs_port_stop(struct ata_port *ap); static void qs_phy_reset(struct ata_port *ap); static void qs_qc_prep(struct ata_queued_cmd *qc); @@ -174,7 +174,7 @@ static const struct ata_port_info qs_port_info[] = { /* board_2068_idx */ { .sht = &qs_ata_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET | //FIXME ATA_FLAG_SRST | ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING, @@ -220,7 +220,7 @@ static void qs_irq_clear(struct ata_port *ap) static inline void qs_enter_reg_mode(struct ata_port *ap) { - u8 __iomem *chan = ap->host_set->mmio_base + (ap->port_no * 0x4000); + u8 __iomem *chan = ap->host->mmio_base + (ap->port_no * 0x4000); writeb(QS_CTR0_REG, chan + QS_CCT_CTR0); readb(chan + QS_CCT_CTR0); /* flush */ @@ -228,7 +228,7 @@ static inline void qs_enter_reg_mode(struct ata_port *ap) static inline void qs_reset_channel_logic(struct ata_port *ap) { - u8 __iomem *chan = ap->host_set->mmio_base + (ap->port_no * 0x4000); + u8 __iomem *chan = ap->host->mmio_base + (ap->port_no * 0x4000); writeb(QS_CTR1_RCHN, chan + QS_CCT_CTR1); readb(chan + QS_CCT_CTR0); /* flush */ @@ -342,7 +342,7 @@ static void qs_qc_prep(struct ata_queued_cmd *qc) static inline void qs_packet_start(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - u8 __iomem *chan = ap->host_set->mmio_base + (ap->port_no * 0x4000); + u8 __iomem *chan = ap->host->mmio_base + (ap->port_no * 0x4000); VPRINTK("ENTER, ap %p\n", ap); @@ -375,11 +375,11 @@ static unsigned int qs_qc_issue(struct ata_queued_cmd *qc) return ata_qc_issue_prot(qc); } -static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set) +static inline unsigned int qs_intr_pkt(struct ata_host *host) { unsigned int handled = 0; u8 sFFE; - u8 __iomem *mmio_base = host_set->mmio_base; + u8 __iomem *mmio_base = host->mmio_base; do { u32 sff0 = readl(mmio_base + QS_HST_SFF); @@ -391,7 +391,7 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set) u8 sDST = sff0 >> 16; /* dev status */ u8 sHST = sff1 & 0x3f; /* host status */ unsigned int port_no = (sff1 >> 8) & 0x03; - struct ata_port *ap = host_set->ports[port_no]; + struct ata_port *ap = host->ports[port_no]; DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n", sff1, sff0, port_no, sHST, sDST); @@ -421,13 +421,13 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set) return handled; } -static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) +static inline unsigned int qs_intr_mmio(struct ata_host *host) { unsigned int handled = 0, port_no; - for (port_no = 0; port_no < host_set->n_ports; ++port_no) { + for (port_no = 0; port_no < host->n_ports; ++port_no) { struct ata_port *ap; - ap = host_set->ports[port_no]; + ap = host->ports[port_no]; if (ap && !(ap->flags & ATA_FLAG_DISABLED)) { struct ata_queued_cmd *qc; @@ -457,14 +457,14 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) static irqreturn_t qs_intr(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; unsigned int handled = 0; VPRINTK("ENTER\n"); - spin_lock(&host_set->lock); - handled = qs_intr_pkt(host_set) | qs_intr_mmio(host_set); - spin_unlock(&host_set->lock); + spin_lock(&host->lock); + handled = qs_intr_pkt(host) | qs_intr_mmio(host); + spin_unlock(&host->lock); VPRINTK("EXIT\n"); @@ -491,9 +491,9 @@ static void qs_ata_setup_port(struct ata_ioports *port, unsigned long base) static int qs_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct qs_port_priv *pp; - void __iomem *mmio_base = ap->host_set->mmio_base; + void __iomem *mmio_base = ap->host->mmio_base; void __iomem *chan = mmio_base + (ap->port_no * 0x4000); u64 addr; int rc; @@ -530,7 +530,7 @@ err_out: static void qs_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct qs_port_priv *pp = ap->private_data; if (pp != NULL) { @@ -543,10 +543,10 @@ static void qs_port_stop(struct ata_port *ap) ata_port_stop(ap); } -static void qs_host_stop(struct ata_host_set *host_set) +static void qs_host_stop(struct ata_host *host) { - void __iomem *mmio_base = host_set->mmio_base; - struct pci_dev *pdev = to_pci_dev(host_set->dev); + void __iomem *mmio_base = host->mmio_base; + struct pci_dev *pdev = to_pci_dev(host->dev); writeb(0, mmio_base + QS_HCT_CTRL); /* disable host interrupts */ writeb(QS_CNFG3_GSRST, mmio_base + QS_HCF_CNFG3); /* global reset */ @@ -673,7 +673,7 @@ static int qs_ata_init_one(struct pci_dev *pdev, INIT_LIST_HEAD(&probe_ent->node); probe_ent->sht = qs_port_info[board_idx].sht; - probe_ent->host_flags = qs_port_info[board_idx].host_flags; + probe_ent->port_flags = qs_port_info[board_idx].flags; probe_ent->pio_mask = qs_port_info[board_idx].pio_mask; probe_ent->mwdma_mask = qs_port_info[board_idx].mwdma_mask; probe_ent->udma_mask = qs_port_info[board_idx].udma_mask; diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index f17b3ae4dd37..c63dbabc0cd9 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -56,7 +56,7 @@ enum { SIL_FLAG_RERR_ON_DMA_ACT = (1 << 29), SIL_FLAG_MOD15WRITE = (1 << 30), - SIL_DFL_HOST_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + SIL_DFL_PORT_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_HRST_TO_RESUME, /* @@ -218,7 +218,7 @@ static const struct ata_port_info sil_port_info[] = { /* sil_3112 */ { .sht = &sil_sht, - .host_flags = SIL_DFL_HOST_FLAGS | SIL_FLAG_MOD15WRITE, + .flags = SIL_DFL_PORT_FLAGS | SIL_FLAG_MOD15WRITE, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x3f, /* udma0-5 */ @@ -227,7 +227,7 @@ static const struct ata_port_info sil_port_info[] = { /* sil_3112_no_sata_irq */ { .sht = &sil_sht, - .host_flags = SIL_DFL_HOST_FLAGS | SIL_FLAG_MOD15WRITE | + .flags = SIL_DFL_PORT_FLAGS | SIL_FLAG_MOD15WRITE | SIL_FLAG_NO_SATA_IRQ, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ @@ -237,7 +237,7 @@ static const struct ata_port_info sil_port_info[] = { /* sil_3512 */ { .sht = &sil_sht, - .host_flags = SIL_DFL_HOST_FLAGS | SIL_FLAG_RERR_ON_DMA_ACT, + .flags = SIL_DFL_PORT_FLAGS | SIL_FLAG_RERR_ON_DMA_ACT, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x3f, /* udma0-5 */ @@ -246,7 +246,7 @@ static const struct ata_port_info sil_port_info[] = { /* sil_3114 */ { .sht = &sil_sht, - .host_flags = SIL_DFL_HOST_FLAGS | SIL_FLAG_RERR_ON_DMA_ACT, + .flags = SIL_DFL_PORT_FLAGS | SIL_FLAG_RERR_ON_DMA_ACT, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x3f, /* udma0-5 */ @@ -295,10 +295,9 @@ static unsigned char sil_get_device_cache_line(struct pci_dev *pdev) static void sil_post_set_mode (struct ata_port *ap) { - struct ata_host_set *host_set = ap->host_set; + struct ata_host *host = ap->host; struct ata_device *dev; - void __iomem *addr = - host_set->mmio_base + sil_port[ap->port_no].xfer_mode; + void __iomem *addr = host->mmio_base + sil_port[ap->port_no].xfer_mode; u32 tmp, dev_mode[2]; unsigned int i; @@ -440,15 +439,15 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2) static irqreturn_t sil_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; - void __iomem *mmio_base = host_set->mmio_base; + struct ata_host *host = dev_instance; + void __iomem *mmio_base = host->mmio_base; int handled = 0; int i; - spin_lock(&host_set->lock); + spin_lock(&host->lock); - for (i = 0; i < host_set->n_ports; i++) { - struct ata_port *ap = host_set->ports[i]; + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; u32 bmdma2 = readl(mmio_base + sil_port[ap->port_no].bmdma2); if (unlikely(!ap || ap->flags & ATA_FLAG_DISABLED)) @@ -466,14 +465,14 @@ static irqreturn_t sil_interrupt(int irq, void *dev_instance, handled = 1; } - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); return IRQ_RETVAL(handled); } static void sil_freeze(struct ata_port *ap) { - void __iomem *mmio_base = ap->host_set->mmio_base; + void __iomem *mmio_base = ap->host->mmio_base; u32 tmp; /* global IRQ mask doesn't block SATA IRQ, turn off explicitly */ @@ -488,7 +487,7 @@ static void sil_freeze(struct ata_port *ap) static void sil_thaw(struct ata_port *ap) { - void __iomem *mmio_base = ap->host_set->mmio_base; + void __iomem *mmio_base = ap->host->mmio_base; u32 tmp; /* clear IRQ */ @@ -567,7 +566,7 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev) } static void sil_init_controller(struct pci_dev *pdev, - int n_ports, unsigned long host_flags, + int n_ports, unsigned long port_flags, void __iomem *mmio_base) { u8 cls; @@ -587,7 +586,7 @@ static void sil_init_controller(struct pci_dev *pdev, "cache line size not set. Driver may not function\n"); /* Apply R_ERR on DMA activate FIS errata workaround */ - if (host_flags & SIL_FLAG_RERR_ON_DMA_ACT) { + if (port_flags & SIL_FLAG_RERR_ON_DMA_ACT) { int cnt; for (i = 0, cnt = 0; i < n_ports; i++) { @@ -658,7 +657,7 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) probe_ent->udma_mask = sil_port_info[ent->driver_data].udma_mask; probe_ent->irq = pdev->irq; probe_ent->irq_flags = IRQF_SHARED; - probe_ent->host_flags = sil_port_info[ent->driver_data].host_flags; + probe_ent->port_flags = sil_port_info[ent->driver_data].flags; mmio_base = pci_iomap(pdev, 5, 0); if (mmio_base == NULL) { @@ -679,7 +678,7 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) ata_std_ports(&probe_ent->port[i]); } - sil_init_controller(pdev, probe_ent->n_ports, probe_ent->host_flags, + sil_init_controller(pdev, probe_ent->n_ports, probe_ent->port_flags, mmio_base); pci_set_master(pdev); @@ -703,12 +702,12 @@ err_out: #ifdef CONFIG_PM static int sil_pci_device_resume(struct pci_dev *pdev) { - struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + struct ata_host *host = dev_get_drvdata(&pdev->dev); ata_pci_device_do_resume(pdev); - sil_init_controller(pdev, host_set->n_ports, host_set->ports[0]->flags, - host_set->mmio_base); - ata_host_set_resume(host_set); + sil_init_controller(pdev, host->n_ports, host->ports[0]->flags, + host->mmio_base); + ata_host_resume(host); return 0; } diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 2d7cf3264587..39cb07baebae 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -316,7 +316,7 @@ struct sil24_port_priv { struct ata_taskfile tf; /* Cached taskfile registers */ }; -/* ap->host_set->private_data */ +/* ap->host->private_data */ struct sil24_host_priv { void __iomem *host_base; /* global controller control (128 bytes @BAR0) */ void __iomem *port_base; /* port registers (4 * 8192 bytes @BAR2) */ @@ -337,7 +337,7 @@ static void sil24_error_handler(struct ata_port *ap); static void sil24_post_internal_cmd(struct ata_queued_cmd *qc); static int sil24_port_start(struct ata_port *ap); static void sil24_port_stop(struct ata_port *ap); -static void sil24_host_stop(struct ata_host_set *host_set); +static void sil24_host_stop(struct ata_host *host); static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); #ifdef CONFIG_PM static int sil24_pci_device_resume(struct pci_dev *pdev); @@ -415,7 +415,7 @@ static const struct ata_port_operations sil24_ops = { }; /* - * Use bits 30-31 of host_flags to encode available port numbers. + * Use bits 30-31 of port_flags to encode available port numbers. * Current maxium is 4. */ #define SIL24_NPORTS2FLAG(nports) ((((unsigned)(nports) - 1) & 0x3) << 30) @@ -425,7 +425,7 @@ static struct ata_port_info sil24_port_info[] = { /* sil_3124 */ { .sht = &sil24_sht, - .host_flags = SIL24_COMMON_FLAGS | SIL24_NPORTS2FLAG(4) | + .flags = SIL24_COMMON_FLAGS | SIL24_NPORTS2FLAG(4) | SIL24_FLAG_PCIX_IRQ_WOC, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ @@ -435,7 +435,7 @@ static struct ata_port_info sil24_port_info[] = { /* sil_3132 */ { .sht = &sil24_sht, - .host_flags = SIL24_COMMON_FLAGS | SIL24_NPORTS2FLAG(2), + .flags = SIL24_COMMON_FLAGS | SIL24_NPORTS2FLAG(2), .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x3f, /* udma0-5 */ @@ -444,7 +444,7 @@ static struct ata_port_info sil24_port_info[] = { /* sil_3131/sil_3531 */ { .sht = &sil24_sht, - .host_flags = SIL24_COMMON_FLAGS | SIL24_NPORTS2FLAG(1), + .flags = SIL24_COMMON_FLAGS | SIL24_NPORTS2FLAG(1), .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x3f, /* udma0-5 */ @@ -871,8 +871,8 @@ static inline void sil24_host_intr(struct ata_port *ap) static irqreturn_t sil24_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; - struct sil24_host_priv *hpriv = host_set->private_data; + struct ata_host *host = dev_instance; + struct sil24_host_priv *hpriv = host->private_data; unsigned handled = 0; u32 status; int i; @@ -888,20 +888,20 @@ static irqreturn_t sil24_interrupt(int irq, void *dev_instance, struct pt_regs * if (!(status & IRQ_STAT_4PORTS)) goto out; - spin_lock(&host_set->lock); + spin_lock(&host->lock); - for (i = 0; i < host_set->n_ports; i++) + for (i = 0; i < host->n_ports; i++) if (status & (1 << i)) { - struct ata_port *ap = host_set->ports[i]; + struct ata_port *ap = host->ports[i]; if (ap && !(ap->flags & ATA_FLAG_DISABLED)) { - sil24_host_intr(host_set->ports[i]); + sil24_host_intr(host->ports[i]); handled++; } else printk(KERN_ERR DRV_NAME ": interrupt from disabled port %d\n", i); } - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); out: return IRQ_RETVAL(handled); } @@ -941,7 +941,7 @@ static inline void sil24_cblk_free(struct sil24_port_priv *pp, struct device *de static int sil24_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct sil24_port_priv *pp; union sil24_cmd_block *cb; size_t cb_size = sizeof(*cb) * SIL24_MAX_CMDS; @@ -980,7 +980,7 @@ err_out: static void sil24_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct sil24_port_priv *pp = ap->private_data; sil24_cblk_free(pp, dev); @@ -988,10 +988,10 @@ static void sil24_port_stop(struct ata_port *ap) kfree(pp); } -static void sil24_host_stop(struct ata_host_set *host_set) +static void sil24_host_stop(struct ata_host *host) { - struct sil24_host_priv *hpriv = host_set->private_data; - struct pci_dev *pdev = to_pci_dev(host_set->dev); + struct sil24_host_priv *hpriv = host->private_data; + struct pci_dev *pdev = to_pci_dev(host->dev); pci_iounmap(pdev, hpriv->host_base); pci_iounmap(pdev, hpriv->port_base); @@ -999,7 +999,7 @@ static void sil24_host_stop(struct ata_host_set *host_set) } static void sil24_init_controller(struct pci_dev *pdev, int n_ports, - unsigned long host_flags, + unsigned long port_flags, void __iomem *host_base, void __iomem *port_base) { @@ -1032,7 +1032,7 @@ static void sil24_init_controller(struct pci_dev *pdev, int n_ports, } /* Configure IRQ WoC */ - if (host_flags & SIL24_FLAG_PCIX_IRQ_WOC) + if (port_flags & SIL24_FLAG_PCIX_IRQ_WOC) writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_STAT); else writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_CLR); @@ -1101,12 +1101,12 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_LIST_HEAD(&probe_ent->node); probe_ent->sht = pinfo->sht; - probe_ent->host_flags = pinfo->host_flags; + probe_ent->port_flags = pinfo->flags; probe_ent->pio_mask = pinfo->pio_mask; probe_ent->mwdma_mask = pinfo->mwdma_mask; probe_ent->udma_mask = pinfo->udma_mask; probe_ent->port_ops = pinfo->port_ops; - probe_ent->n_ports = SIL24_FLAG2NPORTS(pinfo->host_flags); + probe_ent->n_ports = SIL24_FLAG2NPORTS(pinfo->flags); probe_ent->irq = pdev->irq; probe_ent->irq_flags = IRQF_SHARED; @@ -1144,14 +1144,14 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Apply workaround for completion IRQ loss on PCI-X errata */ - if (probe_ent->host_flags & SIL24_FLAG_PCIX_IRQ_WOC) { + if (probe_ent->port_flags & SIL24_FLAG_PCIX_IRQ_WOC) { tmp = readl(host_base + HOST_CTRL); if (tmp & (HOST_CTRL_TRDY | HOST_CTRL_STOP | HOST_CTRL_DEVSEL)) dev_printk(KERN_INFO, &pdev->dev, "Applying completion IRQ loss on PCI-X " "errata fix\n"); else - probe_ent->host_flags &= ~SIL24_FLAG_PCIX_IRQ_WOC; + probe_ent->port_flags &= ~SIL24_FLAG_PCIX_IRQ_WOC; } for (i = 0; i < probe_ent->n_ports; i++) { @@ -1164,7 +1164,7 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ata_std_ports(&probe_ent->port[i]); } - sil24_init_controller(pdev, probe_ent->n_ports, probe_ent->host_flags, + sil24_init_controller(pdev, probe_ent->n_ports, probe_ent->port_flags, host_base, port_base); pci_set_master(pdev); @@ -1191,19 +1191,18 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_PM static int sil24_pci_device_resume(struct pci_dev *pdev) { - struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); - struct sil24_host_priv *hpriv = host_set->private_data; + struct ata_host *host = dev_get_drvdata(&pdev->dev); + struct sil24_host_priv *hpriv = host->private_data; ata_pci_device_do_resume(pdev); if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) writel(HOST_CTRL_GLOBAL_RST, hpriv->host_base + HOST_CTRL); - sil24_init_controller(pdev, host_set->n_ports, - host_set->ports[0]->flags, + sil24_init_controller(pdev, host->n_ports, host->ports[0]->flags, hpriv->host_base, hpriv->port_base); - ata_host_set_resume(host_set); + ata_host_resume(host); return 0; } diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c index ac24f66897f6..9b17375d8056 100644 --- a/drivers/ata/sata_sis.c +++ b/drivers/ata/sata_sis.c @@ -128,7 +128,7 @@ static const struct ata_port_operations sis_ops = { static struct ata_port_info sis_port_info = { .sht = &sis_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, .pio_mask = 0x1f, .mwdma_mask = 0x7, .udma_mask = 0x7f, @@ -158,7 +158,7 @@ static unsigned int get_scr_cfg_addr(unsigned int port_no, unsigned int sc_reg, static u32 sis_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, sc_reg, pdev->device); u32 val, val2 = 0; u8 pmr; @@ -178,7 +178,7 @@ static u32 sis_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg) static void sis_scr_cfg_write (struct ata_port *ap, unsigned int scr, u32 val) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, scr, pdev->device); u8 pmr; @@ -195,7 +195,7 @@ static void sis_scr_cfg_write (struct ata_port *ap, unsigned int scr, u32 val) static u32 sis_scr_read (struct ata_port *ap, unsigned int sc_reg) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); u32 val, val2 = 0; u8 pmr; @@ -217,7 +217,7 @@ static u32 sis_scr_read (struct ata_port *ap, unsigned int sc_reg) static void sis_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 pmr; if (sc_reg > SCR_CONTROL) @@ -275,17 +275,17 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) /* check and see if the SCRs are in IO space or PCI cfg space */ pci_read_config_dword(pdev, SIS_GENCTL, &genctl); if ((genctl & GENCTL_IOMAPPED_SCR) == 0) - probe_ent->host_flags |= SIS_FLAG_CFGSCR; + probe_ent->port_flags |= SIS_FLAG_CFGSCR; /* if hardware thinks SCRs are in IO space, but there are * no IO resources assigned, change to PCI cfg space. */ - if ((!(probe_ent->host_flags & SIS_FLAG_CFGSCR)) && + if ((!(probe_ent->port_flags & SIS_FLAG_CFGSCR)) && ((pci_resource_start(pdev, SIS_SCR_PCI_BAR) == 0) || (pci_resource_len(pdev, SIS_SCR_PCI_BAR) < 128))) { genctl &= ~GENCTL_IOMAPPED_SCR; pci_write_config_dword(pdev, SIS_GENCTL, genctl); - probe_ent->host_flags |= SIS_FLAG_CFGSCR; + probe_ent->port_flags |= SIS_FLAG_CFGSCR; } pci_read_config_byte(pdev, SIS_PMR, &pmr); @@ -306,7 +306,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) port2_start = 0x20; } - if (!(probe_ent->host_flags & SIS_FLAG_CFGSCR)) { + if (!(probe_ent->port_flags & SIS_FLAG_CFGSCR)) { probe_ent->port[0].scr_addr = pci_resource_start(pdev, SIS_SCR_PCI_BAR); probe_ent->port[1].scr_addr = diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c index baf259a966d0..2a7e3495cf16 100644 --- a/drivers/ata/sata_svw.c +++ b/drivers/ata/sata_svw.c @@ -169,7 +169,7 @@ static void k2_sata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) * @qc: Info associated with this ATA transaction. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void k2_bmdma_setup_mmio (struct ata_queued_cmd *qc) @@ -199,7 +199,7 @@ static void k2_bmdma_setup_mmio (struct ata_queued_cmd *qc) * @qc: Info associated with this ATA transaction. * * LOCKING: - * spin_lock_irqsave(host_set lock) + * spin_lock_irqsave(host lock) */ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc) @@ -261,12 +261,12 @@ static int k2_sata_proc_info(struct Scsi_Host *shost, char *page, char **start, return 0; /* Find the OF node for the PCI device proper */ - np = pci_device_to_OF_node(to_pci_dev(ap->host_set->dev)); + np = pci_device_to_OF_node(to_pci_dev(ap->host->dev)); if (np == NULL) return 0; /* Match it to a port node */ - index = (ap == ap->host_set->ports[0]) ? 0 : 1; + index = (ap == ap->host->ports[0]) ? 0 : 1; for (np = np->child; np != NULL; np = np->sibling) { u32 *reg = (u32 *)get_property(np, "reg", NULL); if (!reg) @@ -423,7 +423,7 @@ static int k2_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *e writel(0x0, mmio_base + K2_SATA_SIM_OFFSET); probe_ent->sht = &k2_sata_sht; - probe_ent->host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + probe_ent->port_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO; probe_ent->port_ops = &k2_sata_ops; probe_ent->n_ports = 4; diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 0da83cba5c12..091867e10ea3 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -160,7 +160,7 @@ static void pdc_port_stop(struct ata_port *ap); static void pdc20621_qc_prep(struct ata_queued_cmd *qc); static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf); static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf); -static void pdc20621_host_stop(struct ata_host_set *host_set); +static void pdc20621_host_stop(struct ata_host *host); static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe); static int pdc20621_detect_dimm(struct ata_probe_ent *pe); static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, @@ -218,7 +218,7 @@ static const struct ata_port_info pdc_port_info[] = { /* board_20621 */ { .sht = &pdc_sata_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SRST | ATA_FLAG_MMIO | ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING, .pio_mask = 0x1f, /* pio0-4 */ @@ -244,21 +244,21 @@ static struct pci_driver pdc_sata_pci_driver = { }; -static void pdc20621_host_stop(struct ata_host_set *host_set) +static void pdc20621_host_stop(struct ata_host *host) { - struct pci_dev *pdev = to_pci_dev(host_set->dev); - struct pdc_host_priv *hpriv = host_set->private_data; + struct pci_dev *pdev = to_pci_dev(host->dev); + struct pdc_host_priv *hpriv = host->private_data; void __iomem *dimm_mmio = hpriv->dimm_mmio; pci_iounmap(pdev, dimm_mmio); kfree(hpriv); - pci_iounmap(pdev, host_set->mmio_base); + pci_iounmap(pdev, host->mmio_base); } static int pdc_port_start(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct pdc_port_priv *pp; int rc; @@ -293,7 +293,7 @@ err_out: static void pdc_port_stop(struct ata_port *ap) { - struct device *dev = ap->host_set->dev; + struct device *dev = ap->host->dev; struct pdc_port_priv *pp = ap->private_data; ap->private_data = NULL; @@ -453,8 +453,8 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc) struct scatterlist *sg; struct ata_port *ap = qc->ap; struct pdc_port_priv *pp = ap->private_data; - void __iomem *mmio = ap->host_set->mmio_base; - struct pdc_host_priv *hpriv = ap->host_set->private_data; + void __iomem *mmio = ap->host->mmio_base; + struct pdc_host_priv *hpriv = ap->host->private_data; void __iomem *dimm_mmio = hpriv->dimm_mmio; unsigned int portno = ap->port_no; unsigned int i, idx, total_len = 0, sgt_len; @@ -514,8 +514,8 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct pdc_port_priv *pp = ap->private_data; - void __iomem *mmio = ap->host_set->mmio_base; - struct pdc_host_priv *hpriv = ap->host_set->private_data; + void __iomem *mmio = ap->host->mmio_base; + struct pdc_host_priv *hpriv = ap->host->private_data; void __iomem *dimm_mmio = hpriv->dimm_mmio; unsigned int portno = ap->port_no; unsigned int i; @@ -565,8 +565,8 @@ static void __pdc20621_push_hdma(struct ata_queued_cmd *qc, u32 pkt_ofs) { struct ata_port *ap = qc->ap; - struct ata_host_set *host_set = ap->host_set; - void __iomem *mmio = host_set->mmio_base; + struct ata_host *host = ap->host; + void __iomem *mmio = host->mmio_base; /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; @@ -583,7 +583,7 @@ static void pdc20621_push_hdma(struct ata_queued_cmd *qc, u32 pkt_ofs) { struct ata_port *ap = qc->ap; - struct pdc_host_priv *pp = ap->host_set->private_data; + struct pdc_host_priv *pp = ap->host->private_data; unsigned int idx = pp->hdma_prod & PDC_HDMA_Q_MASK; if (!pp->doing_hdma) { @@ -601,7 +601,7 @@ static void pdc20621_push_hdma(struct ata_queued_cmd *qc, static void pdc20621_pop_hdma(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - struct pdc_host_priv *pp = ap->host_set->private_data; + struct pdc_host_priv *pp = ap->host->private_data; unsigned int idx = pp->hdma_cons & PDC_HDMA_Q_MASK; /* if nothing on queue, we're done */ @@ -620,7 +620,7 @@ static void pdc20621_dump_hdma(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; unsigned int port_no = ap->port_no; - struct pdc_host_priv *hpriv = ap->host_set->private_data; + struct pdc_host_priv *hpriv = ap->host->private_data; void *dimm_mmio = hpriv->dimm_mmio; dimm_mmio += (port_no * PDC_DIMM_WINDOW_STEP); @@ -638,9 +638,9 @@ static inline void pdc20621_dump_hdma(struct ata_queued_cmd *qc) { } static void pdc20621_packet_start(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - struct ata_host_set *host_set = ap->host_set; + struct ata_host *host = ap->host; unsigned int port_no = ap->port_no; - void __iomem *mmio = host_set->mmio_base; + void __iomem *mmio = host->mmio_base; unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE); u8 seq = (u8) (port_no + 1); unsigned int port_ofs; @@ -781,8 +781,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap, static void pdc20621_irq_clear(struct ata_port *ap) { - struct ata_host_set *host_set = ap->host_set; - void __iomem *mmio = host_set->mmio_base; + struct ata_host *host = ap->host; + void __iomem *mmio = host->mmio_base; mmio += PDC_CHIP0_OFS; @@ -791,7 +791,7 @@ static void pdc20621_irq_clear(struct ata_port *ap) static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; struct ata_port *ap; u32 mask = 0; unsigned int i, tmp, port_no; @@ -800,12 +800,12 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re VPRINTK("ENTER\n"); - if (!host_set || !host_set->mmio_base) { + if (!host || !host->mmio_base) { VPRINTK("QUICK EXIT\n"); return IRQ_NONE; } - mmio_base = host_set->mmio_base; + mmio_base = host->mmio_base; /* reading should also clear interrupts */ mmio_base += PDC_CHIP0_OFS; @@ -822,16 +822,16 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re return IRQ_NONE; } - spin_lock(&host_set->lock); + spin_lock(&host->lock); for (i = 1; i < 9; i++) { port_no = i - 1; if (port_no > 3) port_no -= 4; - if (port_no >= host_set->n_ports) + if (port_no >= host->n_ports) ap = NULL; else - ap = host_set->ports[port_no]; + ap = host->ports[port_no]; tmp = mask & (1 << i); VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp); if (tmp && ap && @@ -845,7 +845,7 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re } } - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); VPRINTK("mask == 0x%x\n", mask); @@ -857,13 +857,13 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re static void pdc_eng_timeout(struct ata_port *ap) { u8 drv_stat; - struct ata_host_set *host_set = ap->host_set; + struct ata_host *host = ap->host; struct ata_queued_cmd *qc; unsigned long flags; DPRINTK("ENTER\n"); - spin_lock_irqsave(&host_set->lock, flags); + spin_lock_irqsave(&host->lock, flags); qc = ata_qc_from_tag(ap, ap->active_tag); @@ -885,7 +885,7 @@ static void pdc_eng_timeout(struct ata_port *ap) break; } - spin_unlock_irqrestore(&host_set->lock, flags); + spin_unlock_irqrestore(&host->lock, flags); ata_eh_qc_complete(qc); DPRINTK("EXIT\n"); } @@ -1429,7 +1429,7 @@ static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id * hpriv->dimm_mmio = dimm_mmio; probe_ent->sht = pdc_port_info[board_idx].sht; - probe_ent->host_flags = pdc_port_info[board_idx].host_flags; + probe_ent->port_flags = pdc_port_info[board_idx].flags; probe_ent->pio_mask = pdc_port_info[board_idx].pio_mask; probe_ent->mwdma_mask = pdc_port_info[board_idx].mwdma_mask; probe_ent->udma_mask = pdc_port_info[board_idx].udma_mask; diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c index 654aae2b25c5..8fc6e800011a 100644 --- a/drivers/ata/sata_uli.c +++ b/drivers/ata/sata_uli.c @@ -128,7 +128,7 @@ static const struct ata_port_operations uli_ops = { static struct ata_port_info uli_port_info = { .sht = &uli_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, .pio_mask = 0x1f, /* pio0-4 */ .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &uli_ops, @@ -143,13 +143,13 @@ MODULE_VERSION(DRV_VERSION); static unsigned int get_scr_cfg_addr(struct ata_port *ap, unsigned int sc_reg) { - struct uli_priv *hpriv = ap->host_set->private_data; + struct uli_priv *hpriv = ap->host->private_data; return hpriv->scr_cfg_addr[ap->port_no] + (4 * sc_reg); } static u32 uli_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg); u32 val; @@ -159,7 +159,7 @@ static u32 uli_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg) static void uli_scr_cfg_write (struct ata_port *ap, unsigned int scr, u32 val) { - struct pci_dev *pdev = to_pci_dev(ap->host_set->dev); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); unsigned int cfg_addr = get_scr_cfg_addr(ap, scr); pci_write_config_dword(pdev, cfg_addr, val); diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index a0699a1728d4..7f087aef99de 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -176,7 +176,7 @@ static const struct ata_port_operations vt6421_sata_ops = { static struct ata_port_info vt6420_port_info = { .sht = &svia_sht, - .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY, .pio_mask = 0x1f, .mwdma_mask = 0x07, .udma_mask = 0x7f, @@ -346,7 +346,7 @@ static struct ata_probe_ent *vt6421_init_probe_ent(struct pci_dev *pdev) INIT_LIST_HEAD(&probe_ent->node); probe_ent->sht = &svia_sht; - probe_ent->host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY; + probe_ent->port_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY; probe_ent->port_ops = &vt6421_sata_ops; probe_ent->n_ports = N_PORTS; probe_ent->irq = pdev->irq; diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c index 4c69a705a483..d0d92f33de54 100644 --- a/drivers/ata/sata_vsc.c +++ b/drivers/ata/sata_vsc.c @@ -123,7 +123,7 @@ static void vsc_intr_mask_update(struct ata_port *ap, u8 ctl) void __iomem *mask_addr; u8 mask; - mask_addr = ap->host_set->mmio_base + + mask_addr = ap->host->mmio_base + VSC_SATA_INT_MASK_OFFSET + ap->port_no; mask = readb(mask_addr); if (ctl & ATA_NIEN) @@ -206,20 +206,20 @@ static void vsc_sata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance, struct pt_regs *regs) { - struct ata_host_set *host_set = dev_instance; + struct ata_host *host = dev_instance; unsigned int i; unsigned int handled = 0; u32 int_status; - spin_lock(&host_set->lock); + spin_lock(&host->lock); - int_status = readl(host_set->mmio_base + VSC_SATA_INT_STAT_OFFSET); + int_status = readl(host->mmio_base + VSC_SATA_INT_STAT_OFFSET); - for (i = 0; i < host_set->n_ports; i++) { + for (i = 0; i < host->n_ports; i++) { if (int_status & ((u32) 0xFF << (8 * i))) { struct ata_port *ap; - ap = host_set->ports[i]; + ap = host->ports[i]; if (is_vsc_sata_int_err(i, int_status)) { u32 err_status; @@ -259,7 +259,7 @@ static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance, } } - spin_unlock(&host_set->lock); + spin_unlock(&host->lock); return IRQ_RETVAL(handled); } @@ -395,7 +395,7 @@ static int __devinit vsc_sata_init_one (struct pci_dev *pdev, const struct pci_d pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x80); probe_ent->sht = &vsc_sata_sht; - probe_ent->host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + probe_ent->port_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO; probe_ent->port_ops = &vsc_sata_ops; probe_ent->n_ports = 4; diff --git a/include/linux/libata.h b/include/linux/libata.h index 806682603ac5..563885cb0995 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -197,7 +197,7 @@ enum { ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */ /* host set flags */ - ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host_set only */ + ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ /* various lengths of time */ ATA_TMOUT_BOOT = 30 * HZ, /* heuristic */ @@ -357,13 +357,13 @@ struct ata_probe_ent { unsigned long irq; unsigned long irq2; unsigned int irq_flags; - unsigned long host_flags; - unsigned long host_set_flags; + unsigned long port_flags; + unsigned long _host_flags; void __iomem *mmio_base; void *private_data; }; -struct ata_host_set { +struct ata_host { spinlock_t lock; struct device *dev; unsigned long irq; @@ -420,7 +420,7 @@ struct ata_queued_cmd { void *private_data; }; -struct ata_host_stats { +struct ata_port_stats { unsigned long unhandled_irq; unsigned long idle_irq; unsigned long rw_reqbuf; @@ -498,7 +498,7 @@ struct ata_eh_context { }; struct ata_port { - struct Scsi_Host *host; /* our co-allocated scsi host */ + struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ const struct ata_port_operations *ops; spinlock_t *lock; unsigned long flags; /* ATA_FLAG_xxx */ @@ -523,7 +523,7 @@ struct ata_port { unsigned int hw_sata_spd_limit; unsigned int sata_spd_limit; /* SATA PHY speed limit */ - /* record runtime error info, protected by host_set lock */ + /* record runtime error info, protected by host lock */ struct ata_eh_info eh_info; /* EH context owned by EH */ struct ata_eh_context eh_context; @@ -537,8 +537,8 @@ struct ata_port { unsigned int active_tag; u32 sactive; - struct ata_host_stats stats; - struct ata_host_set *host_set; + struct ata_port_stats stats; + struct ata_host *host; struct device *dev; struct work_struct port_task; @@ -614,7 +614,7 @@ struct ata_port_operations { int (*port_start) (struct ata_port *ap); void (*port_stop) (struct ata_port *ap); - void (*host_stop) (struct ata_host_set *host_set); + void (*host_stop) (struct ata_host *host); void (*bmdma_stop) (struct ata_queued_cmd *qc); u8 (*bmdma_status) (struct ata_port *ap); @@ -622,7 +622,7 @@ struct ata_port_operations { struct ata_port_info { struct scsi_host_template *sht; - unsigned long host_flags; + unsigned long flags; unsigned long pio_mask; unsigned long mwdma_mask; unsigned long udma_mask; @@ -690,15 +690,15 @@ extern int ata_pci_clear_simplex(struct pci_dev *pdev); #endif /* CONFIG_PCI */ extern int ata_device_add(const struct ata_probe_ent *ent); extern void ata_port_detach(struct ata_port *ap); -extern void ata_host_set_init(struct ata_host_set *, struct device *, - unsigned long, const struct ata_port_operations *); -extern void ata_host_set_remove(struct ata_host_set *host_set); +extern void ata_host_init(struct ata_host *, struct device *, + unsigned long, const struct ata_port_operations *); +extern void ata_host_remove(struct ata_host *host); extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); extern int ata_scsi_release(struct Scsi_Host *host); extern void ata_sas_port_destroy(struct ata_port *); -extern struct ata_port *ata_sas_port_alloc(struct ata_host_set *, +extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_port_start(struct ata_port *ap); @@ -715,9 +715,8 @@ extern int ata_port_online(struct ata_port *ap); extern int ata_port_offline(struct ata_port *ap); extern int ata_scsi_device_resume(struct scsi_device *); extern int ata_scsi_device_suspend(struct scsi_device *, pm_message_t mesg); -extern int ata_host_set_suspend(struct ata_host_set *host_set, - pm_message_t mesg); -extern void ata_host_set_resume(struct ata_host_set *host_set); +extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); +extern void ata_host_resume(struct ata_host *host); extern int ata_ratelimit(void); extern unsigned int ata_busy_sleep(struct ata_port *ap, unsigned long timeout_pat, @@ -742,7 +741,7 @@ extern u8 ata_altstatus(struct ata_port *ap); extern void ata_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); extern int ata_port_start (struct ata_port *ap); extern void ata_port_stop (struct ata_port *ap); -extern void ata_host_stop (struct ata_host_set *host_set); +extern void ata_host_stop (struct ata_host *host); extern irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs); extern void ata_mmio_data_xfer(struct ata_device *adev, unsigned char *buf, unsigned int buflen, int write_data); @@ -828,7 +827,7 @@ struct pci_bits { unsigned long val; }; -extern void ata_pci_host_stop (struct ata_host_set *host_set); +extern void ata_pci_host_stop (struct ata_host *host); extern struct ata_probe_ent * ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int portmask); extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); -- cgit v1.2.3-71-gd317 From 669a5db411d85a14f86cd92bc16bf7ab5b8aa235 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 29 Aug 2006 18:12:40 -0400 Subject: [libata] Add a bunch of PATA drivers. The vast majority of drivers and changes are from Alan Cox. Albert Lee contributed and maintains pata_pdc2027x. Adrian Bunk, Andrew Morton, and Tejun Heo contributed various minor fixes and updates. Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 334 +++++++++++ drivers/ata/Makefile | 41 ++ drivers/ata/ata_generic.c | 252 ++++++++ drivers/ata/ata_piix.c | 436 ++++++++++---- drivers/ata/pata_ali.c | 679 +++++++++++++++++++++ drivers/ata/pata_amd.c | 707 ++++++++++++++++++++++ drivers/ata/pata_artop.c | 518 ++++++++++++++++ drivers/ata/pata_atiixp.c | 306 ++++++++++ drivers/ata/pata_cmd64x.c | 505 ++++++++++++++++ drivers/ata/pata_cs5520.c | 336 +++++++++++ drivers/ata/pata_cs5530.c | 387 ++++++++++++ drivers/ata/pata_cs5535.c | 291 +++++++++ drivers/ata/pata_cypress.c | 227 +++++++ drivers/ata/pata_efar.c | 342 +++++++++++ drivers/ata/pata_hpt366.c | 478 +++++++++++++++ drivers/ata/pata_hpt37x.c | 1257 +++++++++++++++++++++++++++++++++++++++ drivers/ata/pata_hpt3x2n.c | 597 +++++++++++++++++++ drivers/ata/pata_hpt3x3.c | 226 +++++++ drivers/ata/pata_isapnp.c | 156 +++++ drivers/ata/pata_it8172.c | 288 +++++++++ drivers/ata/pata_it821x.c | 847 ++++++++++++++++++++++++++ drivers/ata/pata_jmicron.c | 266 +++++++++ drivers/ata/pata_legacy.c | 949 +++++++++++++++++++++++++++++ drivers/ata/pata_mpiix.c | 313 ++++++++++ drivers/ata/pata_netcell.c | 175 ++++++ drivers/ata/pata_ns87410.c | 236 ++++++++ drivers/ata/pata_oldpiix.c | 339 +++++++++++ drivers/ata/pata_opti.c | 292 +++++++++ drivers/ata/pata_optidma.c | 547 +++++++++++++++++ drivers/ata/pata_pcmcia.c | 393 ++++++++++++ drivers/ata/pata_pdc2027x.c | 869 +++++++++++++++++++++++++++ drivers/ata/pata_pdc202xx_old.c | 423 +++++++++++++ drivers/ata/pata_qdi.c | 403 +++++++++++++ drivers/ata/pata_radisys.c | 335 +++++++++++ drivers/ata/pata_rz1000.c | 205 +++++++ drivers/ata/pata_sc1200.c | 287 +++++++++ drivers/ata/pata_serverworks.c | 587 ++++++++++++++++++ drivers/ata/pata_sil680.c | 381 ++++++++++++ drivers/ata/pata_sis.c | 1030 ++++++++++++++++++++++++++++++++ drivers/ata/pata_sl82c105.c | 388 ++++++++++++ drivers/ata/pata_triflex.c | 285 +++++++++ drivers/ata/pata_via.c | 568 ++++++++++++++++++ include/linux/libata.h | 2 +- 43 files changed, 18372 insertions(+), 111 deletions(-) create mode 100644 drivers/ata/ata_generic.c create mode 100644 drivers/ata/pata_ali.c create mode 100644 drivers/ata/pata_amd.c create mode 100644 drivers/ata/pata_artop.c create mode 100644 drivers/ata/pata_atiixp.c create mode 100644 drivers/ata/pata_cmd64x.c create mode 100644 drivers/ata/pata_cs5520.c create mode 100644 drivers/ata/pata_cs5530.c create mode 100644 drivers/ata/pata_cs5535.c create mode 100644 drivers/ata/pata_cypress.c create mode 100644 drivers/ata/pata_efar.c create mode 100644 drivers/ata/pata_hpt366.c create mode 100644 drivers/ata/pata_hpt37x.c create mode 100644 drivers/ata/pata_hpt3x2n.c create mode 100644 drivers/ata/pata_hpt3x3.c create mode 100644 drivers/ata/pata_isapnp.c create mode 100644 drivers/ata/pata_it8172.c create mode 100644 drivers/ata/pata_it821x.c create mode 100644 drivers/ata/pata_jmicron.c create mode 100644 drivers/ata/pata_legacy.c create mode 100644 drivers/ata/pata_mpiix.c create mode 100644 drivers/ata/pata_netcell.c create mode 100644 drivers/ata/pata_ns87410.c create mode 100644 drivers/ata/pata_oldpiix.c create mode 100644 drivers/ata/pata_opti.c create mode 100644 drivers/ata/pata_optidma.c create mode 100644 drivers/ata/pata_pcmcia.c create mode 100644 drivers/ata/pata_pdc2027x.c create mode 100644 drivers/ata/pata_pdc202xx_old.c create mode 100644 drivers/ata/pata_qdi.c create mode 100644 drivers/ata/pata_radisys.c create mode 100644 drivers/ata/pata_rz1000.c create mode 100644 drivers/ata/pata_sc1200.c create mode 100644 drivers/ata/pata_serverworks.c create mode 100644 drivers/ata/pata_sil680.c create mode 100644 drivers/ata/pata_sis.c create mode 100644 drivers/ata/pata_sl82c105.c create mode 100644 drivers/ata/pata_triflex.c create mode 100644 drivers/ata/pata_via.c (limited to 'include/linux') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 13027d56b7f6..cbda988692e7 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -145,6 +145,340 @@ config SATA_INTEL_COMBINED depends on IDE=y && !BLK_DEV_IDE_SATA && (SATA_AHCI || ATA_PIIX) default y +config PATA_ALI + tristate "ALi PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the ALi ATA interfaces + found on the many ALi chipsets. + + If unsure, say N. + +config PATA_AMD + tristate "AMD/NVidia PATA support (Experimental)" + depends on PCI + help + This option enables support for the AMD and NVidia PATA + interfaces found on the chipsets for Athlon/Athlon64. + + If unsure, say N. + +config PATA_ARTOP + tristate "ARTOP 6210/6260 PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for ARTOP PATA controllers. + + If unsure, say N. + +config PATA_ATIIXP + tristate "ATI PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the ATI ATA interfaces + found on the many ATI chipsets. + + If unsure, say N. + +config PATA_CMD64X + tristate "CMD64x PATA support (Very Experimental)" + depends on PCI&& EXPERIMENTAL + help + This option enables support for the CMD64x series chips + except for the CMD640. + + If unsure, say N. + +config PATA_CS5520 + tristate "CS5510/5520 PATA support" + depends on PCI + help + This option enables support for the Cyrix 5510/5520 + companion chip used with the MediaGX/Geode processor family. + + If unsure, say N. + +config PATA_CS5530 + tristate "CS5530 PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the Cyrix/NatSemi/AMD CS5530 + companion chip used with the MediaGX/Geode processor family. + + If unsure, say N. + +config PATA_CS5535 + tristate "CS5535 PATA support (Experimental)" + depends on PCI && X86 && !X86_64 && EXPERIMENTAL + help + This option enables support for the NatSemi/AMD CS5535 + companion chip used with the Geode processor family. + + If unsure, say N. + +config PATA_CYPRESS + tristate "Cypress CY82C693 PATA support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the Cypress/Contaq CY82C693 + chipset found in some Alpha systems + + If unsure, say N. + +config PATA_EFAR + tristate "EFAR SLC90E66 support" + depends on PCI + help + This option enables support for the EFAR SLC90E66 + IDE controller found on some older machines. + + If unsure, say N. + +config ATA_GENERIC + tristate "Generic ATA support" + depends on PCI + help + This option enables support for generic BIOS configured + ATA controllers via the new ATA layer + + If unsure, say N. + +config PATA_HPT366 + tristate "HPT 366/368 PATA support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the HPT 366 and 368 + PATA controllers via the new ATA layer. + + If unsure, say N. + +config PATA_HPT37X + tristate "HPT 370/370A/371/372/374/302 PATA support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the majority of the later HPT + PATA controllers via the new ATA layer. + + If unsure, say N. + +config PATA_HPT3X2N + tristate "HPT 372N/302N PATA support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the N variant HPT PATA + controllers via the new ATA layer + + If unsure, say N. + +config PATA_HPT3X3 + tristate "HPT 343/363 PATA support (Experimental)" + depends on PCI + help + This option enables support for the HPT 343/363 + PATA controllers via the new ATA layer + + If unsure, say N. + +config PATA_ISAPNP + tristate "ISA Plug and Play PATA support (Very Experimental)" + depends on EXPERIMENTAL && ISAPNP + help + This option enables support for ISA plug & play ATA + controllers such as those found on old soundcards. + + If unsure, say N. + +config PATA_IT8172 + tristate "IT8172 PATA support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the ITE 8172 PATA controller + via the new ATA layer. + + If unsure, say N. + +config PATA_IT821X + tristate "IT821x PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the ITE 8211 and 8212 + PATA controllers via the new ATA layer, including RAID + mode. + + If unsure, say N. + +config PATA_LEGACY + tristate "Legacy ISA PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for ISA/VLB bus legacy PATA + ports and allows them to be accessed via the new ATA layer. + + If unsure, say N. + +config PATA_TRIFLEX + tristate "Compaq Triflex PATA support" + depends on PCI + help + Enable support for the Compaq 'Triflex' IDE controller as found + on many Compaq Pentium-Pro systems, via the new ATA layer. + + If unsure, say N. + +config PATA_MPIIX + tristate "Intel PATA MPIIX support" + depends on PCI + help + This option enables support for MPIIX PATA support. + + If unsure, say N. + +config PATA_OLDPIIX + tristate "Intel PATA old PIIX support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for old(?) PIIX PATA support. + + If unsure, say N. + +config PATA_NETCELL + tristate "NETCELL Revolution RAID support" + depends on PCI + help + This option enables support for the Netcell Revolution RAID + PATA controller. + + If unsure, say N. + +config PATA_NS87410 + tristate "Nat Semi NS87410 PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the National Semiconductor + NS87410 PCI-IDE controller. + + If unsure, say N. + +config PATA_OPTI + tristate "OPTI621/6215 PATA support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables full PIO support for the early Opti ATA + controllers found on some old motherboards. + + If unsure, say N. + +config PATA_OPTIDMA + tristate "OPTI FireStar PATA support (Veyr Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables DMA/PIO support for the later OPTi + controllers found on some old motherboards and in some + latops + + If unsure, say N. + +config PATA_PCMCIA + tristate "PCMCIA PATA support" + depends on PCMCIA + help + This option enables support for PCMCIA ATA interfaces, including + compact flash card adapters via the new ATA layer. + + If unsure, say N. + +config PATA_PDC_OLD + tristate "Older Promise PATA controller support (Very Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the Promise 20246, 20262, 20263, + 20265 and 20267 adapters. + + If unsure, say N. + +config PATA_QDI + tristate "QDI VLB PATA support" + help + Support for QDI 6500 and 6580 PATA controllers on VESA local bus. + +config PATA_RADISYS + tristate "RADISYS 82600 PATA support (Very experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the RADISYS 82600 + PATA controllers via the new ATA layer + + If unsure, say N. + +config PATA_RZ1000 + tristate "PC Tech RZ1000 PATA support" + depends on PCI + help + This option enables basic support for the PC Tech RZ1000/1 + PATA controllers via the new ATA layer + + If unsure, say N. + +config PATA_SC1200 + tristate "SC1200 PATA support (Raving Lunatic)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the NatSemi/AMD SC1200 SoC + companion chip used with the Geode processor family. + + If unsure, say N. + +config PATA_SERVERWORKS + tristate "SERVERWORKS OSB4/CSB5/CSB6/HT1000 PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for the Serverworks OSB4/CSB5/CSB6 and + HT1000 PATA controllers, via the new ATA layer. + + If unsure, say N. + +config PATA_PDC2027X + tristate "Promise PATA 2027x support" + depends on PCI + help + This option enables support for Promise PATA pdc20268 to pdc20277 host adapters. + + If unsure, say N. + +config PATA_SIL680 + tristate "CMD / Silicon Image 680 PATA support" + depends on PCI + help + This option enables support for CMD / Silicon Image 680 PATA. + + If unsure, say N. + +config PATA_SIS + tristate "SiS PATA support (Experimental)" + depends on PCI && EXPERIMENTAL + help + This option enables support for SiS PATA controllers + + If unsure, say N. + +config PATA_VIA + tristate "VIA PATA support" + depends on PCI + help + This option enables support for the VIA PATA interfaces + found on the many VIA chipsets. + + If unsure, say N. + +config PATA_WINBOND + tristate "Winbond SL82C105 PATA support" + depends on PCI + help + This option enables support for SL82C105 PATA devices found in the + Netwinder and some other systems + + If unsure, say N. + endif endmenu diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index e260e3fe65c8..b183a04aeaf7 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -17,5 +17,46 @@ obj-$(CONFIG_SATA_ULI) += sata_uli.o obj-$(CONFIG_SATA_MV) += sata_mv.o obj-$(CONFIG_PDC_ADMA) += pdc_adma.o +obj-$(CONFIG_PATA_ALI) += pata_ali.o +obj-$(CONFIG_PATA_AMD) += pata_amd.o +obj-$(CONFIG_PATA_ARTOP) += pata_artop.o +obj-$(CONFIG_PATA_ATIIXP) += pata_atiixp.o +obj-$(CONFIG_PATA_CMD64X) += pata_cmd64x.o +obj-$(CONFIG_PATA_CS5520) += pata_cs5520.o +obj-$(CONFIG_PATA_CS5530) += pata_cs5530.o +obj-$(CONFIG_PATA_CS5535) += pata_cs5535.o +obj-$(CONFIG_PATA_CYPRESS) += pata_cypress.o +obj-$(CONFIG_PATA_EFAR) += pata_efar.o +obj-$(CONFIG_PATA_HPT366) += pata_hpt366.o +obj-$(CONFIG_PATA_HPT37X) += pata_hpt37x.o +obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o +obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o +obj-$(CONFIG_PATA_ISAPNP) += pata_isapnp.o +obj-$(CONFIG_PATA_IT8172) += pata_it8172.o +obj-$(CONFIG_PATA_IT821X) += pata_it821x.o +obj-$(CONFIG_PATA_NETCELL) += pata_netcell.o +obj-$(CONFIG_PATA_NS87410) += pata_ns87410.o +obj-$(CONFIG_PATA_OPTI) += pata_opti.o +obj-$(CONFIG_PATA_OPTIDMA) += pata_optidma.o +obj-$(CONFIG_PATA_MPIIX) += pata_mpiix.o +obj-$(CONFIG_PATA_OLDPIIX) += pata_oldpiix.o +obj-$(CONFIG_PATA_PCMCIA) += pata_pcmcia.o +obj-$(CONFIG_PATA_PDC2027X) += pata_pdc2027x.o +obj-$(CONFIG_PATA_PDC_OLD) += pata_pdc202xx_old.o +obj-$(CONFIG_PATA_QDI) += pata_qdi.o +obj-$(CONFIG_PATA_RADISYS) += pata_radisys.o +obj-$(CONFIG_PATA_RZ1000) += pata_rz1000.o +obj-$(CONFIG_PATA_SC1200) += pata_sc1200.o +obj-$(CONFIG_PATA_SERVERWORKS) += pata_serverworks.o +obj-$(CONFIG_PATA_SIL680) += pata_sil680.o +obj-$(CONFIG_PATA_VIA) += pata_via.o +obj-$(CONFIG_PATA_WINBOND) += pata_sl82c105.o +obj-$(CONFIG_PATA_SIS) += pata_sis.o +obj-$(CONFIG_PATA_TRIFLEX) += pata_triflex.o +# Should be last but one libata driver +obj-$(CONFIG_ATA_GENERIC) += ata_generic.o +# Should be last libata driver +obj-$(CONFIG_PATA_LEGACY) += pata_legacy.o + libata-objs := libata-core.o libata-scsi.o libata-sff.o libata-eh.o diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c new file mode 100644 index 000000000000..20b191dcf7ed --- /dev/null +++ b/drivers/ata/ata_generic.c @@ -0,0 +1,252 @@ +/* + * ata_generic.c - Generic PATA/SATA controller driver. + * Copyright 2005 Red Hat Inc , all rights reserved. + * + * Elements from ide/pci/generic.c + * Copyright (C) 2001-2002 Andre Hedrick + * Portions (C) Copyright 2002 Red Hat Inc + * + * May be copied or modified under the terms of the GNU General Public License + * + * Driver for PCI IDE interfaces implementing the standard bus mastering + * interface functionality. This assumes the BIOS did the drive set up and + * tuning for us. By default we do not grab all IDE class devices as they + * may have other drivers or need fixups to avoid problems. Instead we keep + * a default list of stuff without documentation/driver that appears to + * work. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "ata_generic" +#define DRV_VERSION "0.2.6" + +/* + * A generic parallel ATA driver using libata + */ + +/** + * generic_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int generic_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + + +/** + * generic_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * @classes: + * + * LOCKING: + * None (inherited from caller). + */ + + +static void generic_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, generic_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * generic_set_mode - mode setting + * @ap: interface to set up + * + * Use a non standard set_mode function. We don't want to be tuned. + * The BIOS configured everything. Our job is not to fiddle. We + * read the dma enabled bits from the PCI configuration of the device + * and respect them. + */ + +static void generic_set_mode(struct ata_port *ap) +{ + int dma_enabled = 0; + int i; + + /* Bits 5 and 6 indicate if DMA is active on master/slave */ + if (ap->ioaddr.bmdma_addr) + dma_enabled = inb(ap->ioaddr.bmdma_addr + ATA_DMA_CMD); + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + if (ata_dev_enabled(dev)) { + /* We don't really care */ + dev->pio_mode = XFER_PIO_0; + dev->dma_mode = XFER_MW_DMA_0; + /* We do need the right mode information for DMA or PIO + and this comes from the current configuration flags */ + if (dma_enabled & (1 << (5 + i))) { + dev->xfer_mode = XFER_MW_DMA_0; + dev->xfer_shift = ATA_SHIFT_MWDMA; + dev->flags &= ~ATA_DFLAG_PIO; + } else { + dev->xfer_mode = XFER_PIO_0; + dev->xfer_shift = ATA_SHIFT_PIO; + dev->flags |= ATA_DFLAG_PIO; + } + } + } +} + +static struct scsi_host_template generic_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations generic_port_ops = { + .set_mode = generic_set_mode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .data_xfer = ata_pio_data_xfer, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = generic_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int all_generic_ide; /* Set to claim all devices */ + +/** + * ata_generic_init - attach generic IDE + * @dev: PCI device found + * @id: match entry + * + * Called each time a matching IDE interface is found. We check if the + * interface is one we wish to claim and if so we perform any chip + * specific hacks then let the ATA layer do the heavy lifting. + */ + +static int ata_generic_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + u16 command; + static struct ata_port_info info = { + .sht = &generic_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &generic_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + /* Don't use the generic entry unless instructed to do so */ + if (id->driver_data == 1 && all_generic_ide == 0) + return -ENODEV; + + /* Devices that need care */ + if (dev->vendor == PCI_VENDOR_ID_UMC && + dev->device == PCI_DEVICE_ID_UMC_UM8886A && + (!(PCI_FUNC(dev->devfn) & 1))) + return -ENODEV; + + if (dev->vendor == PCI_VENDOR_ID_OPTI && + dev->device == PCI_DEVICE_ID_OPTI_82C558 && + (!(PCI_FUNC(dev->devfn) & 1))) + return -ENODEV; + + /* Don't re-enable devices in generic mode or we will break some + motherboards with disabled and unused IDE controllers */ + pci_read_config_word(dev, PCI_COMMAND, &command); + if (!(command & PCI_COMMAND_IO)) + return -ENODEV; + + if (dev->vendor == PCI_VENDOR_ID_AL) + ata_pci_clear_simplex(dev); + + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id ata_generic[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE), }, + { PCI_DEVICE(PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565), }, + { PCI_DEVICE(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8673F), }, + { PCI_DEVICE(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886A), }, + { PCI_DEVICE(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF), }, + { PCI_DEVICE(PCI_VENDOR_ID_HINT, PCI_DEVICE_ID_HINT_VXPROII_IDE), }, + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C561), }, + { PCI_DEVICE(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, + /* Must come last. If you add entries adjust this table appropriately */ + { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 1}, + { 0, }, +}; + +static struct pci_driver ata_generic_pci_driver = { + .name = DRV_NAME, + .id_table = ata_generic, + .probe = ata_generic_init_one, + .remove = ata_pci_remove_one +}; + +static int __init ata_generic_init(void) +{ + return pci_module_init(&ata_generic_pci_driver); +} + + +static void __exit ata_generic_exit(void) +{ + pci_unregister_driver(&ata_generic_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for generic ATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ata_generic); +MODULE_VERSION(DRV_VERSION); + +module_init(ata_generic_init); +module_exit(ata_generic_exit); + +module_param(all_generic_ide, int, 0); diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 22b2dba90b9a..12b3a42fb356 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -93,7 +93,7 @@ #include #define DRV_NAME "ata_piix" -#define DRV_VERSION "2.00" +#define DRV_VERSION "2.00ac6" enum { PIIX_IOCFG = 0x54, /* IDE I/O configuration register */ @@ -116,15 +116,18 @@ enum { PIIX_80C_SEC = (1 << 7) | (1 << 6), /* controller IDs */ - piix4_pata = 0, - ich5_pata = 1, - ich5_sata = 2, - esb_sata = 3, - ich6_sata = 4, - ich6_sata_ahci = 5, - ich6m_sata_ahci = 6, - ich8_sata_ahci = 7, - + piix_pata_33 = 0, /* PIIX3 or 4 at 33Mhz */ + ich_pata_33 = 1, /* ICH up to UDMA 33 only */ + ich_pata_66 = 2, /* ICH up to 66 Mhz */ + ich_pata_100 = 3, /* ICH up to UDMA 100 */ + ich_pata_133 = 4, /* ICH up to UDMA 133 */ + ich5_sata = 5, + esb_sata = 6, + ich6_sata = 7, + ich6_sata_ahci = 8, + ich6m_sata_ahci = 9, + ich8_sata_ahci = 10, + /* constants for mapping table */ P0 = 0, /* port 0 */ P1 = 1, /* port 1 */ @@ -152,19 +155,54 @@ struct piix_host_priv { static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); static void piix_host_stop(struct ata_host *host); -static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev); -static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev); static void piix_pata_error_handler(struct ata_port *ap); +static void ich_pata_error_handler(struct ata_port *ap); static void piix_sata_error_handler(struct ata_port *ap); +static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev); +static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev); +static void ich_set_dmamode (struct ata_port *ap, struct ata_device *adev); static unsigned int in_module_init = 1; static const struct pci_device_id piix_pci_tbl[] = { #ifdef ATA_ENABLE_PATA - { 0x8086, 0x7111, PCI_ANY_ID, PCI_ANY_ID, 0, 0, piix4_pata }, - { 0x8086, 0x24db, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_pata }, - { 0x8086, 0x25a2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_pata }, - { 0x8086, 0x27df, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_pata }, + /* Intel PIIX4 for the 430TX/440BX/MX chipset: UDMA 33 */ + /* Also PIIX4E (fn3 rev 2) and PIIX4M (fn3 rev 3) */ + { 0x8086, 0x7111, PCI_ANY_ID, PCI_ANY_ID, 0, 0, piix_pata_33 }, + { 0x8086, 0x24db, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + { 0x8086, 0x25a2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* Intel PIIX4 */ + { 0x8086, 0x7199, PCI_ANY_ID, PCI_ANY_ID, 0, 0, piix_pata_33 }, + /* Intel PIIX4 */ + { 0x8086, 0x7601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, piix_pata_33 }, + /* Intel PIIX */ + { 0x8086, 0x84CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, piix_pata_33 }, + /* Intel ICH (i810, i815, i840) UDMA 66*/ + { 0x8086, 0x2411, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_66 }, + /* Intel ICH0 : UDMA 33*/ + { 0x8086, 0x2421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_33 }, + /* Intel ICH2M */ + { 0x8086, 0x244A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* Intel ICH2 (i810E2, i845, 850, 860) UDMA 100 */ + { 0x8086, 0x244B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* Intel ICH3M */ + { 0x8086, 0x248A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* Intel ICH3 (E7500/1) UDMA 100 */ + { 0x8086, 0x248B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* Intel ICH4 (i845GV, i845E, i852, i855) UDMA 100 */ + { 0x8086, 0x24CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + { 0x8086, 0x24CB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* Intel ICH5 */ + { 0x8086, 0x24DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_133 }, + /* C-ICH (i810E2) */ + { 0x8086, 0x245B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* ESB (855GME/875P + 6300ESB) UDMA 100 */ + { 0x8086, 0x25A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* ICH6 (and 6) (i915) UDMA 100 */ + { 0x8086, 0x266F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, + /* ICH7/7-R (i945, i975) UDMA 100*/ + { 0x8086, 0x27DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_133 }, + { 0x8086, 0x269E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, #endif /* NOTE: The following PCI ids must be kept in sync with the @@ -263,6 +301,39 @@ static const struct ata_port_operations piix_pata_ops = { .host_stop = piix_host_stop, }; +static const struct ata_port_operations ich_pata_ops = { + .port_disable = ata_port_disable, + .set_piomode = piix_set_piomode, + .set_dmamode = ich_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ich_pata_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + static const struct ata_port_operations piix_sata_ops = { .port_disable = ata_port_disable, @@ -359,35 +430,56 @@ static const struct piix_map_db *piix_map_db_table[] = { }; static struct ata_port_info piix_port_info[] = { - /* piix4_pata */ + /* piix_pata_33: 0: PIIX3 or 4 at 33MHz */ { .sht = &piix_sht, - .flags = ATA_FLAG_SLAVE_POSS, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, .pio_mask = 0x1f, /* pio0-4 */ -#if 0 - .mwdma_mask = 0x06, /* mwdma1-2 */ -#else - .mwdma_mask = 0x00, /* mwdma broken */ -#endif + .mwdma_mask = 0x06, /* mwdma1-2 ?? CHECK 0 should be ok but slow */ .udma_mask = ATA_UDMA_MASK_40C, .port_ops = &piix_pata_ops, }, - /* ich5_pata */ + /* ich_pata_33: 1 ICH0 - ICH at 33Mhz*/ + { + .sht = &piix_sht, + .flags = ATA_FLAG_SRST | ATA_FLAG_SLAVE_POSS, + .pio_mask = 0x1f, /* pio 0-4 */ + .mwdma_mask = 0x06, /* Check: maybe 0x07 */ + .udma_mask = ATA_UDMA2, /* UDMA33 */ + .port_ops = &ich_pata_ops, + }, + /* ich_pata_66: 2 ICH controllers up to 66MHz */ { .sht = &piix_sht, - .flags = ATA_FLAG_SLAVE_POSS | PIIX_FLAG_CHECKINTR, + .flags = ATA_FLAG_SRST | ATA_FLAG_SLAVE_POSS, + .pio_mask = 0x1f, /* pio 0-4 */ + .mwdma_mask = 0x06, /* MWDMA0 is broken on chip */ + .udma_mask = ATA_UDMA4, + .port_ops = &ich_pata_ops, + }, + + /* ich_pata_100: 3 */ + { + .sht = &piix_sht, + .flags = ATA_FLAG_SRST | ATA_FLAG_SLAVE_POSS | PIIX_FLAG_CHECKINTR, .pio_mask = 0x1f, /* pio0-4 */ -#if 0 .mwdma_mask = 0x06, /* mwdma1-2 */ -#else - .mwdma_mask = 0x00, /* mwdma broken */ -#endif - .udma_mask = 0x3f, /* udma0-5 */ - .port_ops = &piix_pata_ops, + .udma_mask = ATA_UDMA5, /* udma0-5 */ + .port_ops = &ich_pata_ops, }, - /* ich5_sata */ + /* ich_pata_133: 4 ICH with full UDMA6 */ + { + .sht = &piix_sht, + .flags = ATA_FLAG_SRST | ATA_FLAG_SLAVE_POSS | PIIX_FLAG_CHECKINTR, + .pio_mask = 0x1f, /* pio 0-4 */ + .mwdma_mask = 0x06, /* Check: maybe 0x07 */ + .udma_mask = ATA_UDMA6, /* UDMA133 */ + .port_ops = &ich_pata_ops, + }, + + /* ich5_sata: 5 */ { .sht = &piix_sht, .flags = ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR | @@ -398,7 +490,7 @@ static struct ata_port_info piix_port_info[] = { .port_ops = &piix_sata_ops, }, - /* i6300esb_sata */ + /* i6300esb_sata: 6 */ { .sht = &piix_sht, .flags = ATA_FLAG_SATA | @@ -409,7 +501,7 @@ static struct ata_port_info piix_port_info[] = { .port_ops = &piix_sata_ops, }, - /* ich6_sata */ + /* ich6_sata: 7 */ { .sht = &piix_sht, .flags = ATA_FLAG_SATA | @@ -420,7 +512,7 @@ static struct ata_port_info piix_port_info[] = { .port_ops = &piix_sata_ops, }, - /* ich6_sata_ahci */ + /* ich6_sata_ahci:8 */ { .sht = &piix_sht, .flags = ATA_FLAG_SATA | @@ -432,7 +524,7 @@ static struct ata_port_info piix_port_info[] = { .port_ops = &piix_sata_ops, }, - /* ich6m_sata_ahci */ + /* ich6m_sata_ahci: 9 */ { .sht = &piix_sht, .flags = ATA_FLAG_SATA | @@ -455,6 +547,7 @@ static struct ata_port_info piix_port_info[] = { .udma_mask = 0x7f, /* udma0-6 */ .port_ops = &piix_sata_ops, }, + }; static struct pci_bits piix_enable_bits[] = { @@ -483,7 +576,8 @@ MODULE_PARM_DESC(force_pcs, "force honoring or ignoring PCS to work around " * LOCKING: * None (inherited from caller). */ -static void piix_pata_cbl_detect(struct ata_port *ap) + +static void ich_pata_cbl_detect(struct ata_port *ap) { struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 tmp, mask; @@ -503,14 +597,12 @@ static void piix_pata_cbl_detect(struct ata_port *ap) cbl40: ap->cbl = ATA_CBL_PATA40; - ap->udma_mask &= ATA_UDMA_MASK_40C; } /** * piix_pata_prereset - prereset for PATA host controller * @ap: Target port * - * Prereset including cable detection. * * LOCKING: * None (inherited from caller). @@ -524,9 +616,7 @@ static int piix_pata_prereset(struct ata_port *ap) ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; return 0; } - - piix_pata_cbl_detect(ap); - + ap->cbl = ATA_CBL_PATA40; return ata_std_prereset(ap); } @@ -536,6 +626,36 @@ static void piix_pata_error_handler(struct ata_port *ap) ata_std_postreset); } + +/** + * ich_pata_prereset - prereset for PATA host controller + * @ap: Target port + * + * + * LOCKING: + * None (inherited from caller). + */ +static int ich_pata_prereset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (!pci_test_config_bits(pdev, &piix_enable_bits[ap->port_no])) { + ata_port_printk(ap, KERN_INFO, "port disabled. ignoring.\n"); + ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; + return 0; + } + + ich_pata_cbl_detect(ap); + + return ata_std_prereset(ap); +} + +static void ich_pata_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, ich_pata_prereset, ata_std_softreset, NULL, + ata_std_postreset); +} + /** * piix_sata_present_mask - determine present mask for SATA host controller * @ap: Target port @@ -637,6 +757,13 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) unsigned int slave_port = 0x44; u16 master_data; u8 slave_data; + u8 udma_enable; + int control = 0; + + /* + * See Intel Document 298600-004 for the timing programing rules + * for ICH controllers. + */ static const /* ISP RTC */ u8 timings[][2] = { { 0, 0 }, @@ -645,20 +772,30 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) { 2, 1 }, { 2, 3 }, }; + if (pio >= 2) + control |= 1; /* TIME1 enable */ + if (ata_pio_need_iordy(adev)) + control |= 2; /* IE enable */ + + /* Intel specifies that the PPE functionality is for disk only */ + if (adev->class == ATA_DEV_ATA) + control |= 4; /* PPE enable */ + pci_read_config_word(dev, master_port, &master_data); if (is_slave) { + /* Enable SITRE (seperate slave timing register) */ master_data |= 0x4000; - /* enable PPE, IE and TIME */ - master_data |= 0x0070; + /* enable PPE1, IE1 and TIME1 as needed */ + master_data |= (control << 4); pci_read_config_byte(dev, slave_port, &slave_data); slave_data &= (ap->port_no ? 0x0f : 0xf0); - slave_data |= - (timings[pio][0] << 2) | - (timings[pio][1] << (ap->port_no ? 4 : 0)); + /* Load the timing nibble for this slave */ + slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << (ap->port_no ? 4 : 0); } else { + /* Master keeps the bits in a different format */ master_data &= 0xccf8; - /* enable PPE, IE and TIME */ - master_data |= 0x0007; + /* Enable PPE, IE and TIME as appropriate */ + master_data |= control; master_data |= (timings[pio][0] << 12) | (timings[pio][1] << 8); @@ -666,13 +803,23 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) pci_write_config_word(dev, master_port, master_data); if (is_slave) pci_write_config_byte(dev, slave_port, slave_data); + + /* Ensure the UDMA bit is off - it will be turned back on if + UDMA is selected */ + + if (ap->udma_mask) { + pci_read_config_byte(dev, 0x48, &udma_enable); + udma_enable &= ~(1 << (2 * ap->port_no + adev->devno)); + pci_write_config_byte(dev, 0x48, udma_enable); + } } /** - * piix_set_dmamode - Initialize host controller PATA PIO timings + * do_pata_set_dmamode - Initialize host controller PATA PIO timings * @ap: Port whose timings we are configuring - * @adev: um + * @adev: Drive in question * @udma: udma mode, 0 - 6 + * @is_ich: set if the chip is an ICH device * * Set UDMA mode for device, in host controller PCI config space. * @@ -680,70 +827,140 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) * None (inherited from caller). */ -static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev) +static void do_pata_set_dmamode (struct ata_port *ap, struct ata_device *adev, int isich) { - unsigned int udma = adev->dma_mode; /* FIXME: MWDMA too */ struct pci_dev *dev = to_pci_dev(ap->host->dev); - u8 maslave = ap->port_no ? 0x42 : 0x40; - u8 speed = udma; - unsigned int drive_dn = (ap->port_no ? 2 : 0) + adev->devno; - int a_speed = 3 << (drive_dn * 4); - int u_flag = 1 << drive_dn; - int v_flag = 0x01 << drive_dn; - int w_flag = 0x10 << drive_dn; - int u_speed = 0; - int sitre; - u16 reg4042, reg4a; - u8 reg48, reg54, reg55; - - pci_read_config_word(dev, maslave, ®4042); - DPRINTK("reg4042 = 0x%04x\n", reg4042); - sitre = (reg4042 & 0x4000) ? 1 : 0; - pci_read_config_byte(dev, 0x48, ®48); - pci_read_config_word(dev, 0x4a, ®4a); - pci_read_config_byte(dev, 0x54, ®54); - pci_read_config_byte(dev, 0x55, ®55); - - switch(speed) { - case XFER_UDMA_4: - case XFER_UDMA_2: u_speed = 2 << (drive_dn * 4); break; - case XFER_UDMA_6: - case XFER_UDMA_5: - case XFER_UDMA_3: - case XFER_UDMA_1: u_speed = 1 << (drive_dn * 4); break; - case XFER_UDMA_0: u_speed = 0 << (drive_dn * 4); break; - case XFER_MW_DMA_2: - case XFER_MW_DMA_1: break; - default: - BUG(); - return; - } + u8 master_port = ap->port_no ? 0x42 : 0x40; + u16 master_data; + u8 speed = adev->dma_mode; + int devid = adev->devno + 2 * ap->port_no; + u8 udma_enable; + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + pci_read_config_word(dev, master_port, &master_data); + pci_read_config_byte(dev, 0x48, &udma_enable); if (speed >= XFER_UDMA_0) { - if (!(reg48 & u_flag)) - pci_write_config_byte(dev, 0x48, reg48 | u_flag); - if (speed == XFER_UDMA_5) { - pci_write_config_byte(dev, 0x55, (u8) reg55|w_flag); - } else { - pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag); + unsigned int udma = adev->dma_mode - XFER_UDMA_0; + u16 udma_timing; + u16 ideconf; + int u_clock, u_speed; + + /* + * UDMA is handled by a combination of clock switching and + * selection of dividers + * + * Handy rule: Odd modes are UDMATIMx 01, even are 02 + * except UDMA0 which is 00 + */ + u_speed = min(2 - (udma & 1), udma); + if (udma == 5) + u_clock = 0x1000; /* 100Mhz */ + else if (udma > 2) + u_clock = 1; /* 66Mhz */ + else + u_clock = 0; /* 33Mhz */ + + udma_enable |= (1 << devid); + + /* Load the CT/RP selection */ + pci_read_config_word(dev, 0x4A, &udma_timing); + udma_timing &= ~(3 << (4 * devid)); + udma_timing |= u_speed << (4 * devid); + pci_write_config_word(dev, 0x4A, udma_timing); + + if (isich) { + /* Select a 33/66/100Mhz clock */ + pci_read_config_word(dev, 0x54, &ideconf); + ideconf &= ~(0x1001 << devid); + ideconf |= u_clock << devid; + /* For ICH or later we should set bit 10 for better + performance (WR_PingPong_En) */ + pci_write_config_word(dev, 0x54, ideconf); } - if ((reg4a & a_speed) != u_speed) - pci_write_config_word(dev, 0x4a, (reg4a & ~a_speed) | u_speed); - if (speed > XFER_UDMA_2) { - if (!(reg54 & v_flag)) - pci_write_config_byte(dev, 0x54, reg54 | v_flag); - } else - pci_write_config_byte(dev, 0x54, reg54 & ~v_flag); } else { - if (reg48 & u_flag) - pci_write_config_byte(dev, 0x48, reg48 & ~u_flag); - if (reg4a & a_speed) - pci_write_config_word(dev, 0x4a, reg4a & ~a_speed); - if (reg54 & v_flag) - pci_write_config_byte(dev, 0x54, reg54 & ~v_flag); - if (reg55 & w_flag) - pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag); + /* + * MWDMA is driven by the PIO timings. We must also enable + * IORDY unconditionally along with TIME1. PPE has already + * been set when the PIO timing was set. + */ + unsigned int mwdma = adev->dma_mode - XFER_MW_DMA_0; + unsigned int control; + u8 slave_data; + const unsigned int needed_pio[3] = { + XFER_PIO_0, XFER_PIO_3, XFER_PIO_4 + }; + int pio = needed_pio[mwdma] - XFER_PIO_0; + + control = 3; /* IORDY|TIME1 */ + + /* If the drive MWDMA is faster than it can do PIO then + we must force PIO into PIO0 */ + + if (adev->pio_mode < needed_pio[mwdma]) + /* Enable DMA timing only */ + control |= 8; /* PIO cycles in PIO0 */ + + if (adev->devno) { /* Slave */ + master_data &= 0xFF4F; /* Mask out IORDY|TIME1|DMAONLY */ + master_data |= control << 4; + pci_read_config_byte(dev, 0x44, &slave_data); + slave_data &= (0x0F + 0xE1 * ap->port_no); + /* Load the matching timing */ + slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << (ap->port_no ? 4 : 0); + pci_write_config_byte(dev, 0x44, slave_data); + } else { /* Master */ + master_data &= 0xCCF4; /* Mask out IORDY|TIME1|DMAONLY + and master timing bits */ + master_data |= control; + master_data |= + (timings[pio][0] << 12) | + (timings[pio][1] << 8); + } + udma_enable &= ~(1 << devid); + pci_write_config_word(dev, master_port, master_data); } + /* Don't scribble on 0x48 if the controller does not support UDMA */ + if (ap->udma_mask) + pci_write_config_byte(dev, 0x48, udma_enable); +} + +/** + * piix_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: um + * + * Set MW/UDMA mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + do_pata_set_dmamode(ap, adev, 0); +} + +/** + * ich_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: um + * + * Set MW/UDMA mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void ich_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + do_pata_set_dmamode(ap, adev, 1); } #define AHCI_PCI_BAR 5 @@ -872,7 +1089,7 @@ static void __devinit piix_init_sata_map(struct pci_dev *pdev, case IDE: WARN_ON((i & 1) || map[i + 1] != IDE); - pinfo[i / 2] = piix_port_info[ich5_pata]; + pinfo[i / 2] = piix_port_info[ich_pata_100]; pinfo[i / 2].private_data = hpriv; i++; printk(" IDE IDE"); @@ -1007,4 +1224,3 @@ static void __exit piix_exit(void) module_init(piix_init); module_exit(piix_exit); - diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c new file mode 100644 index 000000000000..0e0415ecfce7 --- /dev/null +++ b/drivers/ata/pata_ali.c @@ -0,0 +1,679 @@ +/* + * pata_ali.c - ALI 15x3 PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * based in part upon + * linux/drivers/ide/pci/alim15x3.c Version 0.17 2003/01/02 + * + * Copyright (C) 1998-2000 Michel Aubry, Maintainer + * Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer + * Copyright (C) 1999-2000 CJ, cjtsai@ali.com.tw, Maintainer + * + * Copyright (C) 1998-2000 Andre Hedrick (andre@linux-ide.org) + * May be copied or modified under the terms of the GNU General Public License + * Copyright (C) 2002 Alan Cox + * ALi (now ULi M5228) support by Clear Zhang + * + * Documentation + * Chipset documentation available under NDA only + * + * TODO/CHECK + * Cannot have ATAPI on both master & slave for rev < c2 (???) but + * otherwise should do atapi DMA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_ali" +#define DRV_VERSION "0.6.5" + +/* + * Cable special cases + */ + +static struct dmi_system_id cable_dmi_table[] = { + { + .ident = "HP Pavilion N5430", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_BOARD_NAME, "OmniBook N32N-736"), + }, + }, + { } +}; + +static int ali_cable_override(struct pci_dev *pdev) +{ + /* Fujitsu P2000 */ + if (pdev->subsystem_vendor == 0x10CF && pdev->subsystem_device == 0x10AF) + return 1; + /* Systems by DMI */ + if (dmi_check_system(cable_dmi_table)) + return 1; + return 0; +} + +/** + * ali_c2_cable_detect - cable detection + * @ap: ATA port + * + * Perform cable detection for C2 and later revisions + */ + +static int ali_c2_cable_detect(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 ata66; + + /* Certain laptops use short but suitable cables and don't + implement the detect logic */ + + if (ali_cable_override(pdev)) + return ATA_CBL_PATA80; + + /* Host view cable detect 0x4A bit 0 primary bit 1 secondary + Bit set for 40 pin */ + pci_read_config_byte(pdev, 0x4A, &ata66); + if (ata66 & (1 << ap->port_no)) + return ATA_CBL_PATA40; + else + return ATA_CBL_PATA80; +} + +/** + * ali_early_error_handler - reset for eary chip + * @ap: ATA port + * + * Handle the reset callback for the later chips with cable detect + */ + +static int ali_c2_pre_reset(struct ata_port *ap) +{ + ap->cbl = ali_c2_cable_detect(ap); + return ata_std_prereset(ap); +} + +static void ali_c2_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, ali_c2_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} + +/** + * ali_early_cable_detect - cable detection + * @ap: ATA port + * + * Perform cable detection for older chipsets. This turns out to be + * rather easy to implement + */ + +static int ali_early_cable_detect(struct ata_port *ap) +{ + return ATA_CBL_PATA40; +} + +/** + * ali_early_probe_init - reset for early chip + * @ap: ATA port + * + * Handle the reset callback for the early (pre cable detect) chips. + */ + +static int ali_early_pre_reset(struct ata_port *ap) +{ + ap->cbl = ali_early_cable_detect(ap); + return ata_std_prereset(ap); +} + +static void ali_early_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, ali_early_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} + +/** + * ali_20_filter - filter for earlier ALI DMA + * @ap: ALi ATA port + * @adev: attached device + * + * Ensure that we do not do DMA on CD devices. We may be able to + * fix that later on. Also ensure we do not do UDMA on WDC drives + */ + +static unsigned long ali_20_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) +{ + char model_num[40]; + /* No DMA on anything but a disk for now */ + if (adev->class != ATA_DEV_ATA) + mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA); + ata_id_string(adev->id, model_num, ATA_ID_PROD_OFS, sizeof(model_num)); + if (strstr(model_num, "WDC")) + return mask &= ~ATA_MASK_UDMA; + return ata_pci_default_filter(ap, adev, mask); +} + +/** + * ali_fifo_control - FIFO manager + * @ap: ALi channel to control + * @adev: device for FIFO control + * @on: 0 for off 1 for on + * + * Enable or disable the FIFO on a given device. Because of the way the + * ALi FIFO works it provides a boost on ATA disk but can be confused by + * ATAPI and we must therefore manage it. + */ + +static void ali_fifo_control(struct ata_port *ap, struct ata_device *adev, int on) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int pio_fifo = 0x54 + ap->port_no; + u8 fifo; + int shift = 4 * adev->devno; + + /* ATA - FIFO on set nibble to 0x05, ATAPI - FIFO off, set nibble to + 0x00. Not all the docs agree but the behaviour we now use is the + one stated in the BIOS Programming Guide */ + + pci_read_config_byte(pdev, pio_fifo, &fifo); + fifo &= ~(0x0F << shift); + if (on) + fifo |= (on << shift); + pci_write_config_byte(pdev, pio_fifo, fifo); +} + +/** + * ali_program_modes - load mode registers + * @ap: ALi channel to load + * @adev: Device the timing is for + * @cmd: Command timing + * @data: Data timing + * @ultra: UDMA timing or zero for off + * + * Loads the timing registers for cmd/data and disable UDMA if + * ultra is zero. If ultra is set then load and enable the UDMA + * timing but do not touch the command/data timing. + */ + +static void ali_program_modes(struct ata_port *ap, struct ata_device *adev, struct ata_timing *t, u8 ultra) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int cas = 0x58 + 4 * ap->port_no; /* Command timing */ + int cbt = 0x59 + 4 * ap->port_no; /* Command timing */ + int drwt = 0x5A + 4 * ap->port_no + adev->devno; /* R/W timing */ + int udmat = 0x56 + ap->port_no; /* UDMA timing */ + int shift = 4 * adev->devno; + u8 udma; + + if (t != NULL) { + t->setup = FIT(t->setup, 1, 8) & 7; + t->act8b = FIT(t->act8b, 1, 8) & 7; + t->rec8b = FIT(t->rec8b, 1, 16) & 15; + t->active = FIT(t->active, 1, 8) & 7; + t->recover = FIT(t->recover, 1, 16) & 15; + + pci_write_config_byte(pdev, cas, t->setup); + pci_write_config_byte(pdev, cbt, (t->act8b << 4) | t->rec8b); + pci_write_config_byte(pdev, drwt, (t->active << 4) | t->recover); + } + + /* Set up the UDMA enable */ + pci_read_config_byte(pdev, udmat, &udma); + udma &= ~(0x0F << shift); + udma |= ultra << shift; + pci_write_config_byte(pdev, udmat, udma); +} + +/** + * ali_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the ALi registers for PIO mode. FIXME: add timings for + * PIO5. + */ + +static void ali_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct ata_device *pair = ata_dev_pair(adev); + struct ata_timing t; + unsigned long T = 1000000000 / 33333; /* PCI clock based */ + + ata_timing_compute(adev, adev->pio_mode, &t, T, 1); + if (pair) { + struct ata_timing p; + ata_timing_compute(pair, pair->pio_mode, &p, T, 1); + ata_timing_merge(&p, &t, &t, ATA_TIMING_SETUP|ATA_TIMING_8BIT); + if (pair->dma_mode) { + ata_timing_compute(pair, pair->dma_mode, &p, T, 1); + ata_timing_merge(&p, &t, &t, ATA_TIMING_SETUP|ATA_TIMING_8BIT); + } + } + + /* PIO FIFO is only permitted on ATA disk */ + if (adev->class != ATA_DEV_ATA) + ali_fifo_control(ap, adev, 0x00); + ali_program_modes(ap, adev, &t, 0); + if (adev->class == ATA_DEV_ATA) + ali_fifo_control(ap, adev, 0x05); + +} + +/** + * ali_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * FIXME: MWDMA timings + */ + +static void ali_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static u8 udma_timing[7] = { 0xC, 0xB, 0xA, 0x9, 0x8, 0xF, 0xD }; + struct ata_device *pair = ata_dev_pair(adev); + struct ata_timing t; + unsigned long T = 1000000000 / 33333; /* PCI clock based */ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + + if (adev->class == ATA_DEV_ATA) + ali_fifo_control(ap, adev, 0x08); + + if (adev->dma_mode >= XFER_UDMA_0) { + ali_program_modes(ap, adev, NULL, udma_timing[adev->dma_mode - XFER_UDMA_0]); + if (adev->dma_mode >= XFER_UDMA_3) { + u8 reg4b; + pci_read_config_byte(pdev, 0x4B, ®4b); + reg4b |= 1; + pci_write_config_byte(pdev, 0x4B, reg4b); + } + } else { + ata_timing_compute(adev, adev->dma_mode, &t, T, 1); + if (pair) { + struct ata_timing p; + ata_timing_compute(pair, pair->pio_mode, &p, T, 1); + ata_timing_merge(&p, &t, &t, ATA_TIMING_SETUP|ATA_TIMING_8BIT); + if (pair->dma_mode) { + ata_timing_compute(pair, pair->dma_mode, &p, T, 1); + ata_timing_merge(&p, &t, &t, ATA_TIMING_SETUP|ATA_TIMING_8BIT); + } + } + ali_program_modes(ap, adev, &t, 0); + } +} + +/** + * ali_lock_sectors - Keep older devices to 255 sector mode + * @ap: ATA port + * @adev: Device + * + * Called during the bus probe for each device that is found. We use + * this call to lock the sector count of the device to 255 or less on + * older ALi controllers. If we didn't do this then large I/O's would + * require LBA48 commands which the older ALi requires are issued by + * slower PIO methods + */ + +static void ali_lock_sectors(struct ata_port *ap, struct ata_device *adev) +{ + adev->max_sectors = 255; +} + +static struct scsi_host_template ali_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + /* Keep LBA28 counts so large I/O's don't turn LBA48 and PIO + with older controllers. Not locked so will grow on C5 or later */ + .max_sectors = 255, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +/* + * Port operations for PIO only ALi + */ + +static struct ata_port_operations ali_early_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = ali_set_piomode, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ali_early_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Port operations for DMA capable ALi without cable + * detect + */ +static struct ata_port_operations ali_20_port_ops = { + .port_disable = ata_port_disable, + + .set_piomode = ali_set_piomode, + .set_dmamode = ali_set_dmamode, + .mode_filter = ali_20_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + .dev_config = ali_lock_sectors, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ali_early_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Port operations for DMA capable ALi with cable detect + */ +static struct ata_port_operations ali_c2_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = ali_set_piomode, + .set_dmamode = ali_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + .dev_config = ali_lock_sectors, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ali_c2_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Port operations for DMA capable ALi with cable detect and LBA48 + */ +static struct ata_port_operations ali_c5_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = ali_set_piomode, + .set_dmamode = ali_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ali_c2_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * ali_init_one - discovery callback + * @pdev: PCI device ID + * @id: PCI table info + * + * An ALi IDE interface has been discovered. Figure out what revision + * and perform configuration work before handing it to the ATA layer + */ + +static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + static struct ata_port_info info_early = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .port_ops = &ali_early_port_ops + }; + /* Revision 0x20 added DMA */ + static struct ata_port_info info_20 = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | ATA_FLAG_PIO_LBA48, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &ali_20_port_ops + }; + /* Revision 0x20 with support logic added UDMA */ + static struct ata_port_info info_20_udma = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | ATA_FLAG_PIO_LBA48, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x07, /* UDMA33 */ + .port_ops = &ali_20_port_ops + }; + /* Revision 0xC2 adds UDMA66 */ + static struct ata_port_info info_c2 = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | ATA_FLAG_PIO_LBA48, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, + .port_ops = &ali_c2_port_ops + }; + /* Revision 0xC3 is UDMA100 */ + static struct ata_port_info info_c3 = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | ATA_FLAG_PIO_LBA48, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &ali_c2_port_ops + }; + /* Revision 0xC4 is UDMA133 */ + static struct ata_port_info info_c4 = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST | ATA_FLAG_PIO_LBA48, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &ali_c2_port_ops + }; + /* Revision 0xC5 is UDMA133 with LBA48 DMA */ + static struct ata_port_info info_c5 = { + .sht = &ali_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &ali_c5_port_ops + }; + + static struct ata_port_info *port_info[2]; + u8 rev, tmp; + struct pci_dev *north, *isa_bridge; + + pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); + + /* + * The chipset revision selects the driver operations and + * mode data. + */ + + if (rev < 0x20) { + port_info[0] = port_info[1] = &info_early; + } else if (rev < 0xC2) { + /* 1543-E/F, 1543C-C, 1543C-D, 1543C-E */ + pci_read_config_byte(pdev, 0x4B, &tmp); + /* Clear CD-ROM DMA write bit */ + tmp &= 0x7F; + pci_write_config_byte(pdev, 0x4B, tmp); + port_info[0] = port_info[1] = &info_20; + } else if (rev == 0xC2) { + port_info[0] = port_info[1] = &info_c2; + } else if (rev == 0xC3) { + port_info[0] = port_info[1] = &info_c3; + } else if (rev == 0xC4) { + port_info[0] = port_info[1] = &info_c4; + } else + port_info[0] = port_info[1] = &info_c5; + + if (rev >= 0xC2) { + /* Enable cable detection logic */ + pci_read_config_byte(pdev, 0x4B, &tmp); + pci_write_config_byte(pdev, 0x4B, tmp | 0x08); + } + + north = pci_get_slot(pdev->bus, PCI_DEVFN(0,0)); + isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); + + if (north && north->vendor == PCI_VENDOR_ID_AL) { + /* Configure the ALi bridge logic. For non ALi rely on BIOS. + Set the south bridge enable bit */ + pci_read_config_byte(isa_bridge, 0x79, &tmp); + if (rev == 0xC2) + pci_write_config_byte(isa_bridge, 0x79, tmp | 0x04); + else if (rev > 0xC2) + pci_write_config_byte(isa_bridge, 0x79, tmp | 0x02); + } + + if (rev >= 0x20) { + if (rev < 0xC2) { + /* Are we paired with a UDMA capable chip */ + pci_read_config_byte(isa_bridge, 0x5E, &tmp); + if ((tmp & 0x1E) == 0x12) + port_info[0] = port_info[1] = &info_20_udma; + } + /* + * CD_ROM DMA on (0x53 bit 0). Enable this even if we want + * to use PIO. 0x53 bit 1 (rev 20 only) - enable FIFO control + * via 0x54/55. + */ + pci_read_config_byte(pdev, 0x53, &tmp); + if (rev <= 0x20) + tmp &= ~0x02; + if (rev == 0xc7) + tmp |= 0x03; + else + tmp |= 0x01; /* CD_ROM enable for DMA */ + pci_write_config_byte(pdev, 0x53, tmp); + } + + pci_dev_put(isa_bridge); + pci_dev_put(north); + + ata_pci_clear_simplex(pdev); + return ata_pci_init_one(pdev, port_info, 2); +} + +static struct pci_device_id ali[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M5228), }, + { PCI_DEVICE(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M5229), }, + { 0, }, +}; + +static struct pci_driver ali_pci_driver = { + .name = DRV_NAME, + .id_table = ali, + .probe = ali_init_one, + .remove = ata_pci_remove_one +}; + +static int __init ali_init(void) +{ + return pci_register_driver(&ali_pci_driver); +} + + +static void __exit ali_exit(void) +{ + pci_unregister_driver(&ali_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for ALi PATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ali); +MODULE_VERSION(DRV_VERSION); + +module_init(ali_init); +module_exit(ali_exit); diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c new file mode 100644 index 000000000000..6ab568428a14 --- /dev/null +++ b/drivers/ata/pata_amd.c @@ -0,0 +1,707 @@ +/* + * pata_amd.c - AMD PATA for new ATA layer + * (C) 2005-2006 Red Hat Inc + * Alan Cox + * + * Based on pata-sil680. Errata information is taken from data sheets + * and the amd74xx.c driver by Vojtech Pavlik. Nvidia SATA devices are + * claimed by sata-nv.c. + * + * TODO: + * Variable system clock when/if it makes sense + * Power management on ports + * + * + * Documentation publically available. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_amd" +#define DRV_VERSION "0.2.2" + +/** + * timing_setup - shared timing computation and load + * @ap: ATA port being set up + * @adev: drive being configured + * @offset: port offset + * @speed: target speed + * @clock: clock multiplier (number of times 33MHz for this part) + * + * Perform the actual timing set up for Nvidia or AMD PATA devices. + * The actual devices vary so they all call into this helper function + * providing the clock multipler and offset (because AMD and Nvidia put + * the ports at different locations). + */ + +static void timing_setup(struct ata_port *ap, struct ata_device *adev, int offset, int speed, int clock) +{ + static const unsigned char amd_cyc2udma[] = { + 6, 6, 5, 4, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 7 + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct ata_device *peer = ata_dev_pair(adev); + int dn = ap->port_no * 2 + adev->devno; + struct ata_timing at, apeer; + int T, UT; + const int amd_clock = 33333; /* KHz. */ + u8 t; + + T = 1000000000 / amd_clock; + UT = T / min_t(int, max_t(int, clock, 1), 2); + + if (ata_timing_compute(adev, speed, &at, T, UT) < 0) { + dev_printk(KERN_ERR, &pdev->dev, "unknown mode %d.\n", speed); + return; + } + + if (peer) { + /* This may be over conservative */ + if (peer->dma_mode) { + ata_timing_compute(peer, peer->dma_mode, &apeer, T, UT); + ata_timing_merge(&apeer, &at, &at, ATA_TIMING_8BIT); + } + ata_timing_compute(peer, peer->pio_mode, &apeer, T, UT); + ata_timing_merge(&apeer, &at, &at, ATA_TIMING_8BIT); + } + + if (speed == XFER_UDMA_5 && amd_clock <= 33333) at.udma = 1; + if (speed == XFER_UDMA_6 && amd_clock <= 33333) at.udma = 15; + + /* + * Now do the setup work + */ + + /* Configure the address set up timing */ + pci_read_config_byte(pdev, offset + 0x0C, &t); + t = (t & ~(3 << ((3 - dn) << 1))) | ((FIT(at.setup, 1, 4) - 1) << ((3 - dn) << 1)); + pci_write_config_byte(pdev, offset + 0x0C , t); + + /* Configure the 8bit I/O timing */ + pci_write_config_byte(pdev, offset + 0x0E + (1 - (dn >> 1)), + ((FIT(at.act8b, 1, 16) - 1) << 4) | (FIT(at.rec8b, 1, 16) - 1)); + + /* Drive timing */ + pci_write_config_byte(pdev, offset + 0x08 + (3 - dn), + ((FIT(at.active, 1, 16) - 1) << 4) | (FIT(at.recover, 1, 16) - 1)); + + switch (clock) { + case 1: + t = at.udma ? (0xc0 | (FIT(at.udma, 2, 5) - 2)) : 0x03; + break; + + case 2: + t = at.udma ? (0xc0 | amd_cyc2udma[FIT(at.udma, 2, 10)]) : 0x03; + break; + + case 3: + t = at.udma ? (0xc0 | amd_cyc2udma[FIT(at.udma, 1, 10)]) : 0x03; + break; + + case 4: + t = at.udma ? (0xc0 | amd_cyc2udma[FIT(at.udma, 1, 15)]) : 0x03; + break; + + default: + return; + } + + /* UDMA timing */ + pci_write_config_byte(pdev, offset + 0x10 + (3 - dn), t); +} + +/** + * amd_probe_init - cable detection + * @ap: ATA port + * + * Perform cable detection. The BIOS stores this in PCI config + * space for us. + */ + +static int amd_pre_reset(struct ata_port *ap) +{ + static const u32 bitmask[2] = {0x03, 0xC0}; + static const struct pci_bits amd_enable_bits[] = { + { 0x40, 1, 0x02, 0x02 }, + { 0x40, 1, 0x01, 0x01 } + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 ata66; + + if (!pci_test_config_bits(pdev, &amd_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + + pci_read_config_byte(pdev, 0x42, &ata66); + if (ata66 & bitmask[ap->port_no]) + ap->cbl = ATA_CBL_PATA80; + else + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); + +} + +static void amd_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, amd_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} + +static int amd_early_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static struct pci_bits amd_enable_bits[] = { + { 0x40, 1, 0x02, 0x02 }, + { 0x40, 1, 0x01, 0x01 } + }; + + if (!pci_test_config_bits(pdev, &amd_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + /* No host side cable detection */ + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); + +} + +static void amd_early_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, amd_early_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} + +/** + * amd33_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the AMD registers for PIO mode. + */ + +static void amd33_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->pio_mode, 1); +} + +static void amd66_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->pio_mode, 2); +} + +static void amd100_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->pio_mode, 3); +} + +static void amd133_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->pio_mode, 4); +} + +/** + * amd33_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the MWDMA/UDMA modes for the AMD and Nvidia + * chipset. + */ + +static void amd33_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->dma_mode, 1); +} + +static void amd66_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->dma_mode, 2); +} + +static void amd100_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->dma_mode, 3); +} + +static void amd133_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x40, adev->dma_mode, 4); +} + + +/** + * nv_probe_init - cable detection + * @ap: ATA port + * + * Perform cable detection. The BIOS stores this in PCI config + * space for us. + */ + +static int nv_pre_reset(struct ata_port *ap) { + static const u8 bitmask[2] = {0x03, 0xC0}; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 ata66; + u16 udma; + + pci_read_config_byte(pdev, 0x52, &ata66); + if (ata66 & bitmask[ap->port_no]) + ap->cbl = ATA_CBL_PATA80; + else + ap->cbl = ATA_CBL_PATA40; + + /* We now have to double check because the Nvidia boxes BIOS + doesn't always set the cable bits but does set mode bits */ + + pci_read_config_word(pdev, 0x62 - 2 * ap->port_no, &udma); + if ((udma & 0xC4) == 0xC4 || (udma & 0xC400) == 0xC400) + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +static void nv_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, nv_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} +/** + * nv100_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the AMD registers for PIO mode. + */ + +static void nv100_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x50, adev->pio_mode, 3); +} + +static void nv133_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x50, adev->pio_mode, 4); +} + +/** + * nv100_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the MWDMA/UDMA modes for the AMD and Nvidia + * chipset. + */ + +static void nv100_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x50, adev->dma_mode, 3); +} + +static void nv133_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + timing_setup(ap, adev, 0x50, adev->dma_mode, 4); +} + +static struct scsi_host_template amd_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations amd33_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = amd33_set_piomode, + .set_dmamode = amd33_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = amd_early_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations amd66_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = amd66_set_piomode, + .set_dmamode = amd66_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = amd_early_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations amd100_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = amd100_set_piomode, + .set_dmamode = amd100_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = amd_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations amd133_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = amd133_set_piomode, + .set_dmamode = amd133_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = amd_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations nv100_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = nv100_set_piomode, + .set_dmamode = nv100_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = nv_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations nv133_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = nv133_set_piomode, + .set_dmamode = nv133_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = nv_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int amd_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + static struct ata_port_info info[10] = { + { /* 0: AMD 7401 */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, /* No SWDMA */ + .udma_mask = 0x07, /* UDMA 33 */ + .port_ops = &amd33_port_ops + }, + { /* 1: Early AMD7409 - no swdma */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, /* UDMA 66 */ + .port_ops = &amd66_port_ops + }, + { /* 2: AMD 7409, no swdma errata */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, /* UDMA 66 */ + .port_ops = &amd66_port_ops + }, + { /* 3: AMD 7411 */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, /* UDMA 100 */ + .port_ops = &amd100_port_ops + }, + { /* 4: AMD 7441 */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, /* UDMA 100 */ + .port_ops = &amd100_port_ops + }, + { /* 5: AMD 8111*/ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, /* UDMA 133, no swdma */ + .port_ops = &amd133_port_ops + }, + { /* 6: AMD 8111 UDMA 100 (Serenade) */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, /* UDMA 100, no swdma */ + .port_ops = &amd133_port_ops + }, + { /* 7: Nvidia Nforce */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, /* UDMA 100 */ + .port_ops = &nv100_port_ops + }, + { /* 8: Nvidia Nforce2 and later */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, /* UDMA 133, no swdma */ + .port_ops = &nv133_port_ops + }, + { /* 9: AMD CS5536 (Geode companion) */ + .sht = &amd_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, /* UDMA 100 */ + .port_ops = &amd100_port_ops + } + }; + static struct ata_port_info *port_info[2]; + static int printed_version; + int type = id->driver_data; + u8 rev; + u8 fifo; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); + + pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); + pci_read_config_byte(pdev, 0x41, &fifo); + + /* Check for AMD7409 without swdma errata and if found adjust type */ + if (type == 1 && rev > 0x7) + type = 2; + + /* Check for AMD7411 */ + if (type == 3) + /* FIFO is broken */ + pci_write_config_byte(pdev, 0x41, fifo & 0x0F); + else + pci_write_config_byte(pdev, 0x41, fifo | 0xF0); + + /* Serenade ? */ + if (type == 5 && pdev->subsystem_vendor == PCI_VENDOR_ID_AMD && + pdev->subsystem_device == PCI_DEVICE_ID_AMD_SERENADE) + type = 6; /* UDMA 100 only */ + + if (type < 3) + ata_pci_clear_simplex(pdev); + + /* And fire it up */ + + port_info[0] = port_info[1] = &info[type]; + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id amd[] = { + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_COBRA_7401, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7409, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1 }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7411, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3 }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_OPUS_7441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9 }, + { 0, }, +}; + +static struct pci_driver amd_pci_driver = { + .name = DRV_NAME, + .id_table = amd, + .probe = amd_init_one, + .remove = ata_pci_remove_one +}; + +static int __init amd_init(void) +{ + return pci_register_driver(&amd_pci_driver); +} + +static void __exit amd_exit(void) +{ + pci_unregister_driver(&amd_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for AMD PATA IDE"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, amd); +MODULE_VERSION(DRV_VERSION); + +module_init(amd_init); +module_exit(amd_exit); diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c new file mode 100644 index 000000000000..d6ef3bf1bac7 --- /dev/null +++ b/drivers/ata/pata_artop.c @@ -0,0 +1,518 @@ +/* + * pata_artop.c - ARTOP ATA controller driver + * + * (C) 2006 Red Hat + * + * Based in part on drivers/ide/pci/aec62xx.c + * Copyright (C) 1999-2002 Andre Hedrick + * 865/865R fixes for Macintosh card version from a patch to the old + * driver by Thibaut VARENE + * When setting the PCI latency we must set 0x80 or higher for burst + * performance Alessandro Zummo + * + * TODO + * 850 serialization once the core supports it + * Investigate no_dsc on 850R + * Clock detect + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_artop" +#define DRV_VERSION "0.4.1" + +/* + * The ARTOP has 33 Mhz and "over clocked" timing tables. Until we + * get PCI bus speed functionality we leave this as 0. Its a variable + * for when we get the functionality and also for folks wanting to + * test stuff. + */ + +static int clock = 0; + +static int artop6210_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + const struct pci_bits artop_enable_bits[] = { + { 0x4AU, 1U, 0x02UL, 0x02UL }, /* port 0 */ + { 0x4AU, 1U, 0x04UL, 0x04UL }, /* port 1 */ + }; + + if (!pci_test_config_bits(pdev, &artop_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * artop6210_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6210_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, artop6210_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} + +/** + * artop6260_pre_reset - check for 40/80 pin + * @ap: Port + * + * The ARTOP hardware reports the cable detect bits in register 0x49. + * Nothing complicated needed here. + */ + +static int artop6260_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits artop_enable_bits[] = { + { 0x4AU, 1U, 0x02UL, 0x02UL }, /* port 0 */ + { 0x4AU, 1U, 0x04UL, 0x04UL }, /* port 1 */ + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 tmp; + + /* Odd numbered device ids are the units with enable bits (the -R cards) */ + if (pdev->device % 1 && !pci_test_config_bits(pdev, &artop_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + pci_read_config_byte(pdev, 0x49, &tmp); + if (tmp & (1 >> ap->port_no)) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +/** + * artop6260_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6260_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, artop6260_pre_reset, + ata_std_softreset, NULL, + ata_std_postreset); +} + +/** + * artop6210_load_piomode - Load a set of PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device + * @pio: PIO mode + * + * Set PIO mode for device, in host controller PCI config space. This + * is used both to set PIO timings in PIO mode and also to set the + * matching PIO clocking for UDMA, as well as the MWDMA timings. + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6210_load_piomode(struct ata_port *ap, struct ata_device *adev, unsigned int pio) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = adev->devno + 2 * ap->port_no; + const u16 timing[2][5] = { + { 0x0000, 0x000A, 0x0008, 0x0303, 0x0301 }, + { 0x0700, 0x070A, 0x0708, 0x0403, 0x0401 } + + }; + /* Load the PIO timing active/recovery bits */ + pci_write_config_word(pdev, 0x40 + 2 * dn, timing[clock][pio]); +} + +/** + * artop6210_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring + * + * Set PIO mode for device, in host controller PCI config space. For + * ARTOP we must also clear the UDMA bits if we are not doing UDMA. In + * the event UDMA is used the later call to set_dmamode will set the + * bits as required. + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6210_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = adev->devno + 2 * ap->port_no; + u8 ultra; + + artop6210_load_piomode(ap, adev, adev->pio_mode - XFER_PIO_0); + + /* Clear the UDMA mode bits (set_dmamode will redo this if needed) */ + pci_read_config_byte(pdev, 0x54, &ultra); + ultra &= ~(3 << (2 * dn)); + pci_write_config_byte(pdev, 0x54, ultra); +} + +/** + * artop6260_load_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring + * @pio: PIO mode + * + * Set PIO mode for device, in host controller PCI config space. The + * ARTOP6260 and relatives store the timing data differently. + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6260_load_piomode (struct ata_port *ap, struct ata_device *adev, unsigned int pio) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = adev->devno + 2 * ap->port_no; + const u8 timing[2][5] = { + { 0x00, 0x0A, 0x08, 0x33, 0x31 }, + { 0x70, 0x7A, 0x78, 0x43, 0x41 } + + }; + /* Load the PIO timing active/recovery bits */ + pci_write_config_byte(pdev, 0x40 + dn, timing[clock][pio]); +} + +/** + * artop6260_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring + * + * Set PIO mode for device, in host controller PCI config space. For + * ARTOP we must also clear the UDMA bits if we are not doing UDMA. In + * the event UDMA is used the later call to set_dmamode will set the + * bits as required. + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6260_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 ultra; + + artop6260_load_piomode(ap, adev, adev->pio_mode - XFER_PIO_0); + + /* Clear the UDMA mode bits (set_dmamode will redo this if needed) */ + pci_read_config_byte(pdev, 0x44 + ap->port_no, &ultra); + ultra &= ~(7 << (4 * adev->devno)); /* One nibble per drive */ + pci_write_config_byte(pdev, 0x44 + ap->port_no, ultra); +} + +/** + * artop6210_set_dmamode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: um + * + * Set DMA mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6210_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + unsigned int pio; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = adev->devno + 2 * ap->port_no; + u8 ultra; + + if (adev->dma_mode == XFER_MW_DMA_0) + pio = 1; + else + pio = 4; + + /* Load the PIO timing active/recovery bits */ + artop6210_load_piomode(ap, adev, pio); + + pci_read_config_byte(pdev, 0x54, &ultra); + ultra &= ~(3 << (2 * dn)); + + /* Add ultra DMA bits if in UDMA mode */ + if (adev->dma_mode >= XFER_UDMA_0) { + u8 mode = (adev->dma_mode - XFER_UDMA_0) + 1 - clock; + if (mode == 0) + mode = 1; + ultra |= (mode << (2 * dn)); + } + pci_write_config_byte(pdev, 0x54, ultra); +} + +/** + * artop6260_set_dmamode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring + * + * Set DMA mode for device, in host controller PCI config space. The + * ARTOP6260 and relatives store the timing data differently. + * + * LOCKING: + * None (inherited from caller). + */ + +static void artop6260_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + unsigned int pio = adev->pio_mode - XFER_PIO_0; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 ultra; + + if (adev->dma_mode == XFER_MW_DMA_0) + pio = 1; + else + pio = 4; + + /* Load the PIO timing active/recovery bits */ + artop6260_load_piomode(ap, adev, pio); + + /* Add ultra DMA bits if in UDMA mode */ + pci_read_config_byte(pdev, 0x44 + ap->port_no, &ultra); + ultra &= ~(7 << (4 * adev->devno)); /* One nibble per drive */ + if (adev->dma_mode >= XFER_UDMA_0) { + u8 mode = adev->dma_mode - XFER_UDMA_0 + 1 - clock; + if (mode == 0) + mode = 1; + ultra |= (mode << (4 * adev->devno)); + } + pci_write_config_byte(pdev, 0x44 + ap->port_no, ultra); +} + +static struct scsi_host_template artop_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations artop6210_ops = { + .port_disable = ata_port_disable, + .set_piomode = artop6210_set_piomode, + .set_dmamode = artop6210_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = artop6210_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static const struct ata_port_operations artop6260_ops = { + .port_disable = ata_port_disable, + .set_piomode = artop6260_set_piomode, + .set_dmamode = artop6260_set_dmamode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = artop6260_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + + +/** + * artop_init_one - Register ARTOP ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in artop_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int artop_init_one (struct pci_dev *pdev, const struct pci_device_id *id) +{ + static int printed_version; + static struct ata_port_info info_6210 = { + .sht = &artop_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = ATA_UDMA2, + .port_ops = &artop6210_ops, + }; + static struct ata_port_info info_626x = { + .sht = &artop_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = ATA_UDMA4, + .port_ops = &artop6260_ops, + }; + static struct ata_port_info info_626x_fast = { + .sht = &artop_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = ATA_UDMA5, + .port_ops = &artop6260_ops, + }; + struct ata_port_info *port_info[2]; + struct ata_port_info *info; + int ports = 2; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, + "version " DRV_VERSION "\n"); + + if (id->driver_data == 0) { /* 6210 variant */ + info = &info_6210; + /* BIOS may have left us in UDMA, clear it before libata probe */ + pci_write_config_byte(pdev, 0x54, 0); + /* For the moment (also lacks dsc) */ + printk(KERN_WARNING "ARTOP 6210 requires serialize functionality not yet supported by libata.\n"); + printk(KERN_WARNING "Secondary ATA ports will not be activated.\n"); + ports = 1; + } + else if (id->driver_data == 1) /* 6260 */ + info = &info_626x; + else if (id->driver_data == 2) { /* 6260 or 6260 + fast */ + unsigned long io = pci_resource_start(pdev, 4); + u8 reg; + + info = &info_626x; + if (inb(io) & 0x10) + info = &info_626x_fast; + /* Mac systems come up with some registers not set as we + will need them */ + + /* Clear reset & test bits */ + pci_read_config_byte(pdev, 0x49, ®); + pci_write_config_byte(pdev, 0x49, reg & ~ 0x30); + + /* PCI latency must be > 0x80 for burst mode, tweak it + * if required. + */ + pci_read_config_byte(pdev, PCI_LATENCY_TIMER, ®); + if (reg <= 0x80) + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x90); + + /* Enable IRQ output and burst mode */ + pci_read_config_byte(pdev, 0x4a, ®); + pci_write_config_byte(pdev, 0x4a, (reg & ~0x01) | 0x80); + + } + port_info[0] = port_info[1] = info; + return ata_pci_init_one(pdev, port_info, ports); +} + +static const struct pci_device_id artop_pci_tbl[] = { + { 0x1191, 0x0005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { 0x1191, 0x0006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1}, + { 0x1191, 0x0007, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1}, + { 0x1191, 0x0008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2}, + { 0x1191, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2}, + { } /* terminate list */ +}; + +static struct pci_driver artop_pci_driver = { + .name = DRV_NAME, + .id_table = artop_pci_tbl, + .probe = artop_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init artop_init(void) +{ + return pci_register_driver(&artop_pci_driver); +} + +static void __exit artop_exit(void) +{ + pci_unregister_driver(&artop_pci_driver); +} + + +module_init(artop_init); +module_exit(artop_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for ARTOP PATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, artop_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c new file mode 100644 index 000000000000..3f78a1e54a75 --- /dev/null +++ b/drivers/ata/pata_atiixp.c @@ -0,0 +1,306 @@ +/* + * pata_atiixp.c - ATI PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * Based on + * + * linux/drivers/ide/pci/atiixp.c Version 0.01-bart2 Feb. 26, 2004 + * + * Copyright (C) 2003 ATI Inc. + * Copyright (C) 2004 Bartlomiej Zolnierkiewicz + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_atiixp" +#define DRV_VERSION "0.4.2" + +enum { + ATIIXP_IDE_PIO_TIMING = 0x40, + ATIIXP_IDE_MWDMA_TIMING = 0x44, + ATIIXP_IDE_PIO_CONTROL = 0x48, + ATIIXP_IDE_PIO_MODE = 0x4a, + ATIIXP_IDE_UDMA_CONTROL = 0x54, + ATIIXP_IDE_UDMA_MODE = 0x56 +}; + +static int atiixp_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static struct pci_bits atiixp_enable_bits[] = { + { 0x48, 1, 0x01, 0x00 }, + { 0x48, 1, 0x08, 0x00 } + }; + + if (!pci_test_config_bits(pdev, &atiixp_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +static void atiixp_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, atiixp_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * atiixp_set_pio_timing - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called by both the pio and dma setup functions to set the controller + * timings for PIO transfers. We must load both the mode number and + * timing values into the controller. + */ + +static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev, int pio) +{ + static u8 pio_timings[5] = { 0x5D, 0x47, 0x34, 0x22, 0x20 }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = 2 * ap->port_no + adev->devno; + + /* Check this is correct - the order is odd in both drivers */ + int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); + u16 pio_mode_data, pio_timing_data; + + pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); + pio_mode_data &= ~(0x7 << (4 * dn)); + pio_mode_data |= pio << (4 * dn); + pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data); + + pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); + pio_mode_data &= ~(0xFF << timing_shift); + pio_mode_data |= (pio_timings[pio] << timing_shift); + pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); +} + +/** + * atiixp_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. We use a shared helper for this + * as the DMA setup must also adjust the PIO timing information. + */ + +static void atiixp_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + atiixp_set_pio_timing(ap, adev, adev->pio_mode - XFER_PIO_0); +} + +/** + * atiixp_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the DMA mode setup. We use timing tables for most + * modes but must tune an appropriate PIO mode to match. + */ + +static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static u8 mwdma_timings[5] = { 0x77, 0x21, 0x20 }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dma = adev->dma_mode; + int dn = 2 * ap->port_no + adev->devno; + int wanted_pio; + + if (adev->dma_mode >= XFER_UDMA_0) { + u16 udma_mode_data; + + dma -= XFER_UDMA_0; + + pci_read_config_word(pdev, ATIIXP_IDE_UDMA_MODE, &udma_mode_data); + udma_mode_data &= ~(0x7 << (4 * dn)); + udma_mode_data |= dma << (4 * dn); + pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data); + } else { + u16 mwdma_timing_data; + /* Check this is correct - the order is odd in both drivers */ + int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); + + dma -= XFER_MW_DMA_0; + + pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data); + mwdma_timing_data &= ~(0xFF << timing_shift); + mwdma_timing_data |= (mwdma_timings[dma] << timing_shift); + pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data); + } + /* + * We must now look at the PIO mode situation. We may need to + * adjust the PIO mode to keep the timings acceptable + */ + if (adev->dma_mode >= XFER_MW_DMA_2) + wanted_pio = 4; + else if (adev->dma_mode == XFER_MW_DMA_1) + wanted_pio = 3; + else if (adev->dma_mode == XFER_MW_DMA_0) + wanted_pio = 0; + else BUG(); + + if (adev->pio_mode != wanted_pio) + atiixp_set_pio_timing(ap, adev, wanted_pio); +} + +/** + * atiixp_bmdma_start - DMA start callback + * @qc: Command in progress + * + * When DMA begins we need to ensure that the UDMA control + * register for the channel is correctly set. + */ + +static void atiixp_bmdma_start(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = (2 * ap->port_no) + adev->devno; + u16 tmp16; + + pci_read_config_word(pdev, ATIIXP_IDE_UDMA_CONTROL, &tmp16); + if (adev->dma_mode >= XFER_UDMA_0) + tmp16 |= (1 << dn); + else + tmp16 &= ~(1 << dn); + pci_write_config_word(pdev, ATIIXP_IDE_UDMA_CONTROL, tmp16); + ata_bmdma_start(qc); +} + +/** + * atiixp_dma_stop - DMA stop callback + * @qc: Command in progress + * + * DMA has completed. Clear the UDMA flag as the next operations will + * be PIO ones not UDMA data transfer. + */ + +static void atiixp_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = (2 * ap->port_no) + qc->dev->devno; + u16 tmp16; + + pci_read_config_word(pdev, ATIIXP_IDE_UDMA_CONTROL, &tmp16); + tmp16 &= ~(1 << dn); + pci_write_config_word(pdev, ATIIXP_IDE_UDMA_CONTROL, tmp16); + ata_bmdma_stop(qc); +} + +static struct scsi_host_template atiixp_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations atiixp_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = atiixp_set_piomode, + .set_dmamode = atiixp_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = atiixp_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = atiixp_bmdma_start, + .bmdma_stop = atiixp_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int atiixp_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &atiixp_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x06, /* No MWDMA0 support */ + .udma_mask = 0x3F, + .port_ops = &atiixp_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id atiixp[] = { + { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP200_IDE), }, + { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP300_IDE), }, + { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), }, + { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), }, + { 0, }, +}; + +static struct pci_driver atiixp_pci_driver = { + .name = DRV_NAME, + .id_table = atiixp, + .probe = atiixp_init_one, + .remove = ata_pci_remove_one +}; + +static int __init atiixp_init(void) +{ + return pci_register_driver(&atiixp_pci_driver); +} + + +static void __exit atiixp_exit(void) +{ + pci_unregister_driver(&atiixp_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for ATI IXP200/300/400"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, atiixp); +MODULE_VERSION(DRV_VERSION); + +module_init(atiixp_init); +module_exit(atiixp_exit); diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c new file mode 100644 index 000000000000..03284611a727 --- /dev/null +++ b/drivers/ata/pata_cmd64x.c @@ -0,0 +1,505 @@ +/* + * pata_cmd64x.c - ATI PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * Based upon + * linux/drivers/ide/pci/cmd64x.c Version 1.30 Sept 10, 2002 + * + * cmd64x.c: Enable interrupts at initialization time on Ultra/PCI machines. + * Note, this driver is not used at all on other systems because + * there the "BIOS" has done all of the following already. + * Due to massive hardware bugs, UltraDMA is only supported + * on the 646U2 and not on the 646U. + * + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1998 David S. Miller (davem@redhat.com) + * + * Copyright (C) 1999-2002 Andre Hedrick + * + * TODO + * Testing work + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_cmd64x" +#define DRV_VERSION "0.2.1" + +/* + * CMD64x specific registers definition. + */ + +enum { + CFR = 0x50, + CFR_INTR_CH0 = 0x02, + CNTRL = 0x51, + CNTRL_DIS_RA0 = 0x40, + CNTRL_DIS_RA1 = 0x80, + CNTRL_ENA_2ND = 0x08, + CMDTIM = 0x52, + ARTTIM0 = 0x53, + DRWTIM0 = 0x54, + ARTTIM1 = 0x55, + DRWTIM1 = 0x56, + ARTTIM23 = 0x57, + ARTTIM23_DIS_RA2 = 0x04, + ARTTIM23_DIS_RA3 = 0x08, + ARTTIM23_INTR_CH1 = 0x10, + ARTTIM2 = 0x57, + ARTTIM3 = 0x57, + DRWTIM23 = 0x58, + DRWTIM2 = 0x58, + BRST = 0x59, + DRWTIM3 = 0x5b, + BMIDECR0 = 0x70, + MRDMODE = 0x71, + MRDMODE_INTR_CH0 = 0x04, + MRDMODE_INTR_CH1 = 0x08, + MRDMODE_BLK_CH0 = 0x10, + MRDMODE_BLK_CH1 = 0x20, + BMIDESR0 = 0x72, + UDIDETCR0 = 0x73, + DTPR0 = 0x74, + BMIDECR1 = 0x78, + BMIDECSR = 0x79, + BMIDESR1 = 0x7A, + UDIDETCR1 = 0x7B, + DTPR1 = 0x7C +}; + +static int cmd64x_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +static int cmd648_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 r; + + /* Check cable detect bits */ + pci_read_config_byte(pdev, BMIDECSR, &r); + if (r & (1 << ap->port_no)) + ap->cbl = ATA_CBL_PATA80; + else + ap->cbl = ATA_CBL_PATA40; + + return ata_std_prereset(ap); +} + +static void cmd64x_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, cmd64x_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +static void cmd648_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, cmd648_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * cmd64x_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. + */ + +static void cmd64x_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct ata_timing t; + const unsigned long T = 1000000 / 33; + const u8 setup_data[] = { 0x40, 0x40, 0x40, 0x80, 0x00 }; + + u8 reg; + + /* Port layout is not logical so use a table */ + const u8 arttim_port[2][2] = { + { ARTTIM0, ARTTIM1 }, + { ARTTIM23, ARTTIM23 } + }; + const u8 drwtim_port[2][2] = { + { DRWTIM0, DRWTIM1 }, + { DRWTIM2, DRWTIM3 } + }; + + int arttim = arttim_port[ap->port_no][adev->devno]; + int drwtim = drwtim_port[ap->port_no][adev->devno]; + + + if (ata_timing_compute(adev, adev->pio_mode, &t, T, 0) < 0) { + printk(KERN_ERR DRV_NAME ": mode computation failed.\n"); + return; + } + if (ap->port_no) { + /* Slave has shared address setup */ + struct ata_device *pair = ata_dev_pair(adev); + + if (pair) { + struct ata_timing tp; + ata_timing_compute(pair, pair->pio_mode, &tp, T, 0); + ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP); + } + } + + printk(KERN_DEBUG DRV_NAME ": active %d recovery %d setup %d.\n", + t.active, t.recover, t.setup); + if (t.recover > 16) { + t.active += t.recover - 16; + t.recover = 16; + } + if (t.active > 16) + t.active = 16; + + /* Now convert the clocks into values we can actually stuff into + the chip */ + + if (t.recover > 1) + t.recover--; + else + t.recover = 15; + + if (t.setup > 4) + t.setup = 0xC0; + else + t.setup = setup_data[t.setup]; + + t.active &= 0x0F; /* 0 = 16 */ + + /* Load setup timing */ + pci_read_config_byte(pdev, arttim, ®); + reg &= 0x3F; + reg |= t.setup; + pci_write_config_byte(pdev, arttim, reg); + + /* Load active/recovery */ + pci_write_config_byte(pdev, drwtim, (t.active << 4) | t.recover); +} + +/** + * cmd64x_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the DMA mode setup. + */ + +static void cmd64x_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const u8 udma_data[] = { + 0x31, 0x21, 0x11, 0x25, 0x15, 0x05 + }; + static const u8 mwdma_data[] = { + 0x30, 0x20, 0x10 + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 regU, regD; + + int pciU = UDIDETCR0 + 8 * ap->port_no; + int pciD = BMIDESR0 + 8 * ap->port_no; + int shift = 2 * adev->devno; + + pci_read_config_byte(pdev, pciD, ®D); + pci_read_config_byte(pdev, pciU, ®U); + + regD &= ~(0x20 << shift); + regU &= ~(0x35 << shift); + + if (adev->dma_mode >= XFER_UDMA_0) + regU |= udma_data[adev->dma_mode - XFER_UDMA_0] << shift; + else + regD |= mwdma_data[adev->dma_mode - XFER_MW_DMA_0] << shift; + + regD |= 0x20 << adev->devno; + + pci_write_config_byte(pdev, pciU, regU); + pci_write_config_byte(pdev, pciD, regD); +} + +/** + * cmd648_dma_stop - DMA stop callback + * @qc: Command in progress + * + * DMA has completed. + */ + +static void cmd648_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 dma_intr; + int dma_reg = ap->port_no ? ARTTIM23_INTR_CH1 : CFR_INTR_CH0; + int dma_mask = ap->port_no ? ARTTIM2 : CFR; + + ata_bmdma_stop(qc); + + pci_read_config_byte(pdev, dma_reg, &dma_intr); + pci_write_config_byte(pdev, dma_reg, dma_intr | dma_mask); +} + +/** + * cmd646r1_dma_stop - DMA stop callback + * @qc: Command in progress + * + * Stub for now while investigating the r1 quirk in the old driver. + */ + +static void cmd646r1_bmdma_stop(struct ata_queued_cmd *qc) +{ + ata_bmdma_stop(qc); +} + +static struct scsi_host_template cmd64x_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations cmd64x_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cmd64x_set_piomode, + .set_dmamode = cmd64x_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cmd64x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations cmd646r1_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cmd64x_set_piomode, + .set_dmamode = cmd64x_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cmd64x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = cmd646r1_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations cmd648_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cmd64x_set_piomode, + .set_dmamode = cmd64x_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cmd648_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = cmd648_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + u32 class_rev; + + static struct ata_port_info cmd_info[6] = { + { /* CMD 643 - no UDMA */ + .sht = &cmd64x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &cmd64x_port_ops + }, + { /* CMD 646 with broken UDMA */ + .sht = &cmd64x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &cmd64x_port_ops + }, + { /* CMD 646 with working UDMA */ + .sht = &cmd64x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA1, + .port_ops = &cmd64x_port_ops + }, + { /* CMD 646 rev 1 */ + .sht = &cmd64x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &cmd646r1_port_ops + }, + { /* CMD 648 */ + .sht = &cmd64x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA2, + .port_ops = &cmd648_port_ops + }, + { /* CMD 649 */ + .sht = &cmd64x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA3, + .port_ops = &cmd648_port_ops + } + }; + static struct ata_port_info *port_info[2], *info; + u8 mrdmode; + + info = &cmd_info[id->driver_data]; + + pci_read_config_dword(pdev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xFF; + + if (id->driver_data == 0) /* 643 */ + ata_pci_clear_simplex(pdev); + + if (pdev->device == PCI_DEVICE_ID_CMD_646) { + /* Does UDMA work ? */ + if (class_rev > 4) + info = &cmd_info[2]; + /* Early rev with other problems ? */ + else if (class_rev == 1) + info = &cmd_info[3]; + } + + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64); + pci_read_config_byte(pdev, MRDMODE, &mrdmode); + mrdmode &= ~ 0x30; /* IRQ set up */ + mrdmode |= 0x02; /* Memory read line enable */ + pci_write_config_byte(pdev, MRDMODE, mrdmode); + + /* Force PIO 0 here.. */ + + /* PPC specific fixup copied from old driver */ +#ifdef CONFIG_PPC + pci_write_config_byte(pdev, UDIDETCR0, 0xF0); +#endif + + port_info[0] = port_info[1] = info; + return ata_pci_init_one(pdev, port_info, 2); +} + +static struct pci_device_id cmd64x[] = { + { PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_643, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1}, + { PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4}, + { PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5}, + { 0, }, +}; + +static struct pci_driver cmd64x_pci_driver = { + .name = DRV_NAME, + .id_table = cmd64x, + .probe = cmd64x_init_one, + .remove = ata_pci_remove_one +}; + +static int __init cmd64x_init(void) +{ + return pci_register_driver(&cmd64x_pci_driver); +} + + +static void __exit cmd64x_exit(void) +{ + pci_unregister_driver(&cmd64x_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for CMD64x series PATA controllers"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cmd64x); +MODULE_VERSION(DRV_VERSION); + +module_init(cmd64x_init); +module_exit(cmd64x_exit); diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c new file mode 100644 index 000000000000..792ce4828510 --- /dev/null +++ b/drivers/ata/pata_cs5520.c @@ -0,0 +1,336 @@ +/* + * IDE tuning and bus mastering support for the CS5510/CS5520 + * chipsets + * + * The CS5510/CS5520 are slightly unusual devices. Unlike the + * typical IDE controllers they do bus mastering with the drive in + * PIO mode and smarter silicon. + * + * The practical upshot of this is that we must always tune the + * drive for the right PIO mode. We must also ignore all the blacklists + * and the drive bus mastering DMA information. Also to confuse matters + * further we can do DMA on PIO only drives. + * + * DMA on the 5510 also requires we disable_hlt() during DMA on early + * revisions. + * + * *** This driver is strictly experimental *** + * + * (c) Copyright Red Hat Inc 2002 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Documentation: + * Not publically available. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_cs5520" +#define DRV_VERSION "0.6.2" + +struct pio_clocks +{ + int address; + int assert; + int recovery; +}; + +static const struct pio_clocks cs5520_pio_clocks[]={ + {3, 6, 11}, + {2, 5, 6}, + {1, 4, 3}, + {1, 3, 2}, + {1, 2, 1} +}; + +/** + * cs5520_set_timings - program PIO timings + * @ap: ATA port + * @adev: ATA device + * + * Program the PIO mode timings for the controller according to the pio + * clocking table. + */ + +static void cs5520_set_timings(struct ata_port *ap, struct ata_device *adev, int pio) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int slave = adev->devno; + + pio -= XFER_PIO_0; + + /* Channel command timing */ + pci_write_config_byte(pdev, 0x62 + ap->port_no, + (cs5520_pio_clocks[pio].recovery << 4) | + (cs5520_pio_clocks[pio].assert)); + /* FIXME: should these use address ? */ + /* Read command timing */ + pci_write_config_byte(pdev, 0x64 + 4*ap->port_no + slave, + (cs5520_pio_clocks[pio].recovery << 4) | + (cs5520_pio_clocks[pio].assert)); + /* Write command timing */ + pci_write_config_byte(pdev, 0x66 + 4*ap->port_no + slave, + (cs5520_pio_clocks[pio].recovery << 4) | + (cs5520_pio_clocks[pio].assert)); +} + +/** + * cs5520_enable_dma - turn on DMA bits + * + * Turn on the DMA bits for this disk. Needed because the BIOS probably + * has not done the work for us. Belongs in the core SATA code. + */ + +static void cs5520_enable_dma(struct ata_port *ap, struct ata_device *adev) +{ + /* Set the DMA enable/disable flag */ + u8 reg = inb(ap->ioaddr.bmdma_addr + 0x02); + reg |= 1<<(adev->devno + 5); + outb(reg, ap->ioaddr.bmdma_addr + 0x02); +} + +/** + * cs5520_set_dmamode - program DMA timings + * @ap: ATA port + * @adev: ATA device + * + * Program the DMA mode timings for the controller according to the pio + * clocking table. Note that this device sets the DMA timings to PIO + * mode values. This may seem bizarre but the 5520 architecture talks + * PIO mode to the disk and DMA mode to the controller so the underlying + * transfers are PIO timed. + */ + +static void cs5520_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const int dma_xlate[3] = { XFER_PIO_0, XFER_PIO_3, XFER_PIO_4 }; + cs5520_set_timings(ap, adev, dma_xlate[adev->dma_mode]); + cs5520_enable_dma(ap, adev); +} + +/** + * cs5520_set_piomode - program PIO timings + * @ap: ATA port + * @adev: ATA device + * + * Program the PIO mode timings for the controller according to the pio + * clocking table. We know pio_mode will equal dma_mode because of the + * CS5520 architecture. At least once we turned DMA on and wrote a + * mode setter. + */ + +static void cs5520_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + cs5520_set_timings(ap, adev, adev->pio_mode); +} + + +static int cs5520_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +static void cs5520_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, cs5520_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +static struct scsi_host_template cs5520_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations cs5520_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cs5520_set_piomode, + .set_dmamode = cs5520_set_dmamode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cs5520_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + u8 pcicfg; + static struct ata_probe_ent probe[2]; + int ports = 0; + + /* IDE port enable bits */ + pci_read_config_byte(dev, 0x60, &pcicfg); + + /* Check if the ATA ports are enabled */ + if ((pcicfg & 3) == 0) + return -ENODEV; + + if ((pcicfg & 0x40) == 0) { + printk(KERN_WARNING DRV_NAME ": DMA mode disabled. Enabling.\n"); + pci_write_config_byte(dev, 0x60, pcicfg | 0x40); + } + + /* Perform set up for DMA */ + if (pci_enable_device_bars(dev, 1<<2)) { + printk(KERN_ERR DRV_NAME ": unable to configure BAR2.\n"); + return -ENODEV; + } + pci_set_master(dev); + if (pci_set_dma_mask(dev, DMA_32BIT_MASK)) { + printk(KERN_ERR DRV_NAME ": unable to configure DMA mask.\n"); + return -ENODEV; + } + if (pci_set_consistent_dma_mask(dev, DMA_32BIT_MASK)) { + printk(KERN_ERR DRV_NAME ": unable to configure consistent DMA mask.\n"); + return -ENODEV; + } + + /* We have to do our own plumbing as the PCI setup for this + chipset is non-standard so we can't punt to the libata code */ + + INIT_LIST_HEAD(&probe[0].node); + probe[0].dev = pci_dev_to_dev(dev); + probe[0].port_ops = &cs5520_port_ops; + probe[0].sht = &cs5520_sht; + probe[0].pio_mask = 0x1F; + probe[0].mwdma_mask = id->driver_data; + probe[0].irq = 14; + probe[0].irq_flags = 0; + probe[0].port_flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST; + probe[0].n_ports = 1; + probe[0].port[0].cmd_addr = 0x1F0; + probe[0].port[0].ctl_addr = 0x3F6; + probe[0].port[0].altstatus_addr = 0x3F6; + probe[0].port[0].bmdma_addr = pci_resource_start(dev, 2); + + /* The secondary lurks at different addresses but is otherwise + the same beastie */ + + probe[1] = probe[0]; + INIT_LIST_HEAD(&probe[1].node); + probe[1].irq = 15; + probe[1].port[0].cmd_addr = 0x170; + probe[1].port[0].ctl_addr = 0x376; + probe[1].port[0].altstatus_addr = 0x376; + probe[1].port[0].bmdma_addr = pci_resource_start(dev, 2) + 8; + + /* Let libata fill in the port details */ + ata_std_ports(&probe[0].port[0]); + ata_std_ports(&probe[1].port[0]); + + /* Now add the ports that are active */ + if (pcicfg & 1) + ports += ata_device_add(&probe[0]); + if (pcicfg & 2) + ports += ata_device_add(&probe[1]); + if (ports) + return 0; + return -ENODEV; +} + +/** + * cs5520_remove_one - device unload + * @pdev: PCI device being removed + * + * Handle an unplug/unload event for a PCI device. Unload the + * PCI driver but do not use the default handler as we manage + * resources ourself and *MUST NOT* disable the device as it has + * other functions. + */ + +static void __devexit cs5520_remove_one(struct pci_dev *pdev) +{ + struct device *dev = pci_dev_to_dev(pdev); + struct ata_host *host = dev_get_drvdata(dev); + + ata_host_remove(host); + dev_set_drvdata(dev, NULL); +} + +/* For now keep DMA off. We can set it for all but A rev CS5510 once the + core ATA code can handle it */ + +static struct pci_device_id pata_cs5520[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, + { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, + { 0, }, +}; + +static struct pci_driver cs5520_pci_driver = { + .name = DRV_NAME, + .id_table = pata_cs5520, + .probe = cs5520_init_one, + .remove = cs5520_remove_one +}; + + +static int __init cs5520_init(void) +{ + return pci_register_driver(&cs5520_pci_driver); +} + +static void __exit cs5520_exit(void) +{ + pci_unregister_driver(&cs5520_pci_driver); +} + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Cyrix CS5510/5520"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, pata_cs5520); +MODULE_VERSION(DRV_VERSION); + +module_init(cs5520_init); +module_exit(cs5520_exit); + diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c new file mode 100644 index 000000000000..f3d8a3bc1e78 --- /dev/null +++ b/drivers/ata/pata_cs5530.c @@ -0,0 +1,387 @@ +/* + * pata-cs5530.c - CS5530 PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * based upon cs5530.c by Mark Lord. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Loosely based on the piix & svwks drivers. + * + * Documentation: + * Available from AMD web site. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_cs5530" +#define DRV_VERSION "0.6" + +/** + * cs5530_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Set our PIO requirements. This is fairly simple on the CS5530 + * chips. + */ + +static void cs5530_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static const unsigned int cs5530_pio_timings[2][5] = { + {0x00009172, 0x00012171, 0x00020080, 0x00032010, 0x00040010}, + {0xd1329172, 0x71212171, 0x30200080, 0x20102010, 0x00100010} + }; + unsigned long base = ( ap->ioaddr.bmdma_addr & ~0x0F) + 0x20 + 0x10 * ap->port_no; + u32 tuning; + int format; + + /* Find out which table to use */ + tuning = inl(base + 0x04); + format = (tuning & 0x80000000UL) ? 1 : 0; + + /* Now load the right timing register */ + if (adev->devno) + base += 0x08; + + outl(cs5530_pio_timings[format][adev->pio_mode - XFER_PIO_0], base); +} + +/** + * cs5530_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * We cannot mix MWDMA and UDMA without reloading timings each switch + * master to slave. We track the last DMA setup in order to minimise + * reloads. + */ + +static void cs5530_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + unsigned long base = ( ap->ioaddr.bmdma_addr & ~0x0F) + 0x20 + 0x10 * ap->port_no; + u32 tuning, timing = 0; + u8 reg; + + /* Find out which table to use */ + tuning = inl(base + 0x04); + + switch(adev->dma_mode) { + case XFER_UDMA_0: + timing = 0x00921250;break; + case XFER_UDMA_1: + timing = 0x00911140;break; + case XFER_UDMA_2: + timing = 0x00911030;break; + case XFER_MW_DMA_0: + timing = 0x00077771;break; + case XFER_MW_DMA_1: + timing = 0x00012121;break; + case XFER_MW_DMA_2: + timing = 0x00002020;break; + default: + BUG(); + } + /* Merge in the PIO format bit */ + timing |= (tuning & 0x80000000UL); + if (adev->devno == 0) /* Master */ + outl(timing, base + 0x04); + else { + if (timing & 0x00100000) + tuning |= 0x00100000; /* UDMA for both */ + else + tuning &= ~0x00100000; /* MWDMA for both */ + outl(tuning, base + 0x04); + outl(timing, base + 0x0C); + } + + /* Set the DMA capable bit in the BMDMA area */ + reg = inb(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS); + reg |= (1 << (5 + adev->devno)); + outb(reg, ap->ioaddr.bmdma_addr + ATA_DMA_STATUS); + + /* Remember the last DMA setup we did */ + + ap->private_data = adev; +} + +/** + * cs5530_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings if + * neccessary. Specifically we have a problem that there is only + * one MWDMA/UDMA bit. + */ + +static unsigned int cs5530_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct ata_device *prev = ap->private_data; + + /* See if the DMA settings could be wrong */ + if (adev->dma_mode != 0 && adev != prev && prev != NULL) { + /* Maybe, but do the channels match MWDMA/UDMA ? */ + if ((adev->dma_mode >= XFER_UDMA_0 && prev->dma_mode < XFER_UDMA_0) || + (adev->dma_mode < XFER_UDMA_0 && prev->dma_mode >= XFER_UDMA_0)) + /* Switch the mode bits */ + cs5530_set_dmamode(ap, adev); + } + + return ata_qc_issue_prot(qc); +} + +static int cs5530_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +static void cs5530_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, cs5530_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + + +static struct scsi_host_template cs5530_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations cs5530_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cs5530_set_piomode, + .set_dmamode = cs5530_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cs5530_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = cs5530_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct dmi_system_id palmax_dmi_table[] = { + { + .ident = "Palmax PD1100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Cyrix"), + DMI_MATCH(DMI_PRODUCT_NAME, "Caddis"), + }, + }, + { } +}; + +static int cs5530_is_palmax(void) +{ + if (dmi_check_system(palmax_dmi_table)) { + printk(KERN_INFO "Palmax PD1100: Disabling DMA on docking port.\n"); + return 1; + } + return 0; +} + +/** + * cs5530_init_one - Initialise a CS5530 + * @dev: PCI device + * @id: Entry in match table + * + * Install a driver for the newly found CS5530 companion chip. Most of + * this is just housekeeping. We have to set the chip up correctly and + * turn off various bits of emulation magic. + */ + +static int cs5530_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + int compiler_warning_pointless_fix; + struct pci_dev *master_0 = NULL, *cs5530_0 = NULL; + static struct ata_port_info info = { + .sht = &cs5530_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x07, + .port_ops = &cs5530_port_ops + }; + /* The docking connector doesn't do UDMA, and it seems not MWDMA */ + static struct ata_port_info info_palmax_secondary = { + .sht = &cs5530_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .port_ops = &cs5530_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + dev = NULL; + while ((dev = pci_get_device(PCI_VENDOR_ID_CYRIX, PCI_ANY_ID, dev)) != NULL) { + switch (dev->device) { + case PCI_DEVICE_ID_CYRIX_PCI_MASTER: + master_0 = pci_dev_get(dev); + break; + case PCI_DEVICE_ID_CYRIX_5530_LEGACY: + cs5530_0 = pci_dev_get(dev); + break; + } + } + if (!master_0) { + printk(KERN_ERR DRV_NAME ": unable to locate PCI MASTER function\n"); + goto fail_put; + } + if (!cs5530_0) { + printk(KERN_ERR DRV_NAME ": unable to locate CS5530 LEGACY function\n"); + goto fail_put; + } + + pci_set_master(cs5530_0); + compiler_warning_pointless_fix = pci_set_mwi(cs5530_0); + + /* + * Set PCI CacheLineSize to 16-bytes: + * --> Write 0x04 into 8-bit PCI CACHELINESIZE reg of function 0 of the cs5530 + * + * Note: This value is constant because the 5530 is only a Geode companion + */ + + pci_write_config_byte(cs5530_0, PCI_CACHE_LINE_SIZE, 0x04); + + /* + * Disable trapping of UDMA register accesses (Win98 hack): + * --> Write 0x5006 into 16-bit reg at offset 0xd0 of function 0 of the cs5530 + */ + + pci_write_config_word(cs5530_0, 0xd0, 0x5006); + + /* + * Bit-1 at 0x40 enables MemoryWriteAndInvalidate on internal X-bus: + * The other settings are what is necessary to get the register + * into a sane state for IDE DMA operation. + */ + + pci_write_config_byte(master_0, 0x40, 0x1e); + + /* + * Set max PCI burst size (16-bytes seems to work best): + * 16bytes: set bit-1 at 0x41 (reg value of 0x16) + * all others: clear bit-1 at 0x41, and do: + * 128bytes: OR 0x00 at 0x41 + * 256bytes: OR 0x04 at 0x41 + * 512bytes: OR 0x08 at 0x41 + * 1024bytes: OR 0x0c at 0x41 + */ + + pci_write_config_byte(master_0, 0x41, 0x14); + + /* + * These settings are necessary to get the chip + * into a sane state for IDE DMA operation. + */ + + pci_write_config_byte(master_0, 0x42, 0x00); + pci_write_config_byte(master_0, 0x43, 0xc1); + + pci_dev_put(master_0); + pci_dev_put(cs5530_0); + + if (cs5530_is_palmax()) + port_info[1] = &info_palmax_secondary; + + /* Now kick off ATA set up */ + return ata_pci_init_one(dev, port_info, 2); + +fail_put: + if (master_0) + pci_dev_put(master_0); + if (cs5530_0) + pci_dev_put(cs5530_0); + return -ENODEV; +} + +static struct pci_device_id cs5530[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_IDE), }, + { 0, }, +}; + +static struct pci_driver cs5530_pci_driver = { + .name = DRV_NAME, + .id_table = cs5530, + .probe = cs5530_init_one, + .remove = ata_pci_remove_one +}; + +static int __init cs5530_init(void) +{ + return pci_register_driver(&cs5530_pci_driver); +} + + +static void __exit cs5530_exit(void) +{ + pci_unregister_driver(&cs5530_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the Cyrix/NS/AMD 5530"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cs5530); +MODULE_VERSION(DRV_VERSION); + +module_init(cs5530_init); +module_exit(cs5530_exit); diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c new file mode 100644 index 000000000000..69d6b4258724 --- /dev/null +++ b/drivers/ata/pata_cs5535.c @@ -0,0 +1,291 @@ +/* + * pata-cs5535.c - CS5535 PATA for new ATA layer + * (C) 2005-2006 Red Hat Inc + * Alan Cox + * + * based upon cs5535.c from AMD as cleaned up and + * made readable and Linux style by Wolfgang Zuleger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Loosely based on the piix & svwks drivers. + * + * Documentation: + * Available from AMD web site. + * TODO + * Review errata to see if serializing is neccessary + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "cs5535" +#define DRV_VERSION "0.2.10" + +/* + * The Geode (Aka Athlon GX now) uses an internal MSR based + * bus system for control. Demented but there you go. + */ + +#define MSR_ATAC_BASE 0x51300000 +#define ATAC_GLD_MSR_CAP (MSR_ATAC_BASE+0) +#define ATAC_GLD_MSR_CONFIG (MSR_ATAC_BASE+0x01) +#define ATAC_GLD_MSR_SMI (MSR_ATAC_BASE+0x02) +#define ATAC_GLD_MSR_ERROR (MSR_ATAC_BASE+0x03) +#define ATAC_GLD_MSR_PM (MSR_ATAC_BASE+0x04) +#define ATAC_GLD_MSR_DIAG (MSR_ATAC_BASE+0x05) +#define ATAC_IO_BAR (MSR_ATAC_BASE+0x08) +#define ATAC_RESET (MSR_ATAC_BASE+0x10) +#define ATAC_CH0D0_PIO (MSR_ATAC_BASE+0x20) +#define ATAC_CH0D0_DMA (MSR_ATAC_BASE+0x21) +#define ATAC_CH0D1_PIO (MSR_ATAC_BASE+0x22) +#define ATAC_CH0D1_DMA (MSR_ATAC_BASE+0x23) +#define ATAC_PCI_ABRTERR (MSR_ATAC_BASE+0x24) + +#define ATAC_BM0_CMD_PRIM 0x00 +#define ATAC_BM0_STS_PRIM 0x02 +#define ATAC_BM0_PRD 0x04 + +#define CS5535_CABLE_DETECT 0x48 + +#define CS5535_BAD_PIO(timings) ( (timings&~0x80000000UL)==0x00009172 ) + +/** + * cs5535_pre_reset - detect cable type + * @ap: Port to detect on + * + * Perform cable detection for ATA66 capable cable. Return a libata + * cable type. + */ + +static int cs5535_pre_reset(struct ata_port *ap) +{ + u8 cable; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_byte(pdev, CS5535_CABLE_DETECT, &cable); + if (cable & 1) + ap->cbl = ATA_CBL_PATA80; + else + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * cs5535_error_handler - reset/probe + * @ap: Port to reset + * + * Reset and configure a port + */ + +static void cs5535_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, cs5535_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * cs5535_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Set our PIO requirements. The CS5535 is pretty clean about all this + */ + +static void cs5535_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static const u16 pio_timings[5] = { + 0xF7F4, 0x53F3, 0x13F1, 0x5131, 0x1131 + }; + static const u16 pio_cmd_timings[5] = { + 0xF7F4, 0x53F3, 0x13F1, 0x5131, 0x1131 + }; + u32 reg, dummy; + struct ata_device *pair = ata_dev_pair(adev); + + int mode = adev->pio_mode - XFER_PIO_0; + int cmdmode = mode; + + /* Command timing has to be for the lowest of the pair of devices */ + if (pair) { + int pairmode = pair->pio_mode - XFER_PIO_0; + cmdmode = min(mode, pairmode); + /* Write the other drive timing register if it changed */ + if (cmdmode < pairmode) + wrmsr(ATAC_CH0D0_PIO + 2 * pair->devno, + pio_cmd_timings[cmdmode] << 16 | pio_timings[pairmode], 0); + } + /* Write the drive timing register */ + wrmsr(ATAC_CH0D0_PIO + 2 * adev->devno, + pio_cmd_timings[cmdmode] << 16 | pio_timings[mode], 0); + + /* Set the PIO "format 1" bit in the DMA timing register */ + rdmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg, dummy); + wrmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg | 0x80000000UL, 0); +} + +/** + * cs5535_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + */ + +static void cs5535_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const u32 udma_timings[5] = { + 0x7F7436A1, 0x7F733481, 0x7F723261, 0x7F713161, 0x7F703061 + }; + static const u32 mwdma_timings[3] = { + 0x7F0FFFF3, 0x7F035352, 0x7F024241 + }; + u32 reg, dummy; + int mode = adev->dma_mode; + + rdmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg, dummy); + reg &= 0x80000000UL; + if (mode >= XFER_UDMA_0) + reg |= udma_timings[mode - XFER_UDMA_0]; + else + reg |= mwdma_timings[mode - XFER_MW_DMA_0]; + wrmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg, 0); +} + +static struct scsi_host_template cs5535_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations cs5535_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cs5535_set_piomode, + .set_dmamode = cs5535_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cs5535_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * cs5535_init_one - Initialise a CS5530 + * @dev: PCI device + * @id: Entry in match table + * + * Install a driver for the newly found CS5530 companion chip. Most of + * this is just housekeeping. We have to set the chip up correctly and + * turn off various bits of emulation magic. + */ + +static int cs5535_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &cs5535_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, + .port_ops = &cs5535_port_ops + }; + struct ata_port_info *ports[1] = { &info }; + + u32 timings, dummy; + + /* Check the BIOS set the initial timing clock. If not set the + timings for PIO0 */ + rdmsr(ATAC_CH0D0_PIO, timings, dummy); + if (CS5535_BAD_PIO(timings)) + wrmsr(ATAC_CH0D0_PIO, 0xF7F4F7F4UL, 0); + rdmsr(ATAC_CH0D1_PIO, timings, dummy); + if (CS5535_BAD_PIO(timings)) + wrmsr(ATAC_CH0D1_PIO, 0xF7F4F7F4UL, 0); + return ata_pci_init_one(dev, ports, 1); +} + +static struct pci_device_id cs5535[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, 0x002D), }, + { 0, }, +}; + +static struct pci_driver cs5535_pci_driver = { + .name = DRV_NAME, + .id_table = cs5535, + .probe = cs5535_init_one, + .remove = ata_pci_remove_one +}; + +static int __init cs5535_init(void) +{ + return pci_register_driver(&cs5535_pci_driver); +} + + +static void __exit cs5535_exit(void) +{ + pci_unregister_driver(&cs5535_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox, Jens Altmann, Wolfgan Zuleger, Alexander Kiausch"); +MODULE_DESCRIPTION("low-level driver for the NS/AMD 5530"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cs5535); +MODULE_VERSION(DRV_VERSION); + +module_init(cs5535_init); +module_exit(cs5535_exit); diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c new file mode 100644 index 000000000000..322043d8cf7f --- /dev/null +++ b/drivers/ata/pata_cypress.c @@ -0,0 +1,227 @@ +/* + * pata_cypress.c - Cypress PATA for new ATA layer + * (C) 2006 Red Hat Inc + * Alan Cox + * + * Based heavily on + * linux/drivers/ide/pci/cy82c693.c Version 0.40 Sep. 10, 2002 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_cypress" +#define DRV_VERSION "0.1.2" + +/* here are the offset definitions for the registers */ + +enum { + CY82_IDE_CMDREG = 0x04, + CY82_IDE_ADDRSETUP = 0x48, + CY82_IDE_MASTER_IOR = 0x4C, + CY82_IDE_MASTER_IOW = 0x4D, + CY82_IDE_SLAVE_IOR = 0x4E, + CY82_IDE_SLAVE_IOW = 0x4F, + CY82_IDE_MASTER_8BIT = 0x50, + CY82_IDE_SLAVE_8BIT = 0x51, + + CY82_INDEX_PORT = 0x22, + CY82_DATA_PORT = 0x23, + + CY82_INDEX_CTRLREG1 = 0x01, + CY82_INDEX_CHANNEL0 = 0x30, + CY82_INDEX_CHANNEL1 = 0x31, + CY82_INDEX_TIMEOUT = 0x32 +}; + +static int cy82c693_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +static void cy82c693_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, cy82c693_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * cy82c693_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. + */ + +static void cy82c693_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct ata_timing t; + const unsigned long T = 1000000 / 33; + short time_16, time_8; + u32 addr; + + if (ata_timing_compute(adev, adev->pio_mode, &t, T, 1) < 0) { + printk(KERN_ERR DRV_NAME ": mome computation failed.\n"); + return; + } + + time_16 = FIT(t.recover, 0, 15) | (FIT(t.active, 0, 15) << 4); + time_8 = FIT(t.act8b, 0, 15) | (FIT(t.rec8b, 0, 15) << 4); + + if (adev->devno == 0) { + pci_read_config_dword(pdev, CY82_IDE_ADDRSETUP, &addr); + + addr &= ~0x0F; /* Mask bits */ + addr |= FIT(t.setup, 0, 15); + + pci_write_config_dword(pdev, CY82_IDE_ADDRSETUP, addr); + pci_write_config_byte(pdev, CY82_IDE_MASTER_IOR, time_16); + pci_write_config_byte(pdev, CY82_IDE_MASTER_IOW, time_16); + pci_write_config_byte(pdev, CY82_IDE_MASTER_8BIT, time_8); + } else { + pci_read_config_dword(pdev, CY82_IDE_ADDRSETUP, &addr); + + addr &= ~0xF0; /* Mask bits */ + addr |= (FIT(t.setup, 0, 15) << 4); + + pci_write_config_dword(pdev, CY82_IDE_ADDRSETUP, addr); + pci_write_config_byte(pdev, CY82_IDE_SLAVE_IOR, time_16); + pci_write_config_byte(pdev, CY82_IDE_SLAVE_IOW, time_16); + pci_write_config_byte(pdev, CY82_IDE_SLAVE_8BIT, time_8); + } +} + +/** + * cy82c693_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the DMA mode setup. + */ + +static void cy82c693_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + int reg = CY82_INDEX_CHANNEL0 + ap->port_no; + + /* Be afraid, be very afraid. Magic registers in low I/O space */ + outb(reg, 0x22); + outb(adev->dma_mode - XFER_MW_DMA_0, 0x23); + + /* 0x50 gives the best behaviour on the Alpha's using this chip */ + outb(CY82_INDEX_TIMEOUT, 0x22); + outb(0x50, 0x23); +} + +static struct scsi_host_template cy82c693_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations cy82c693_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = cy82c693_set_piomode, + .set_dmamode = cy82c693_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = cy82c693_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int cy82c693_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &cy82c693_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &cy82c693_port_ops + }; + static struct ata_port_info *port_info[1] = { &info }; + + /* Devfn 1 is the ATA primary. The secondary is magic and on devfn2. For the + moment we don't handle the secondary. FIXME */ + + if (PCI_FUNC(pdev->devfn) != 1) + return -ENODEV; + + return ata_pci_init_one(pdev, port_info, 1); +} + +static struct pci_device_id cy82c693[] = { + { PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { 0, }, +}; + +static struct pci_driver cy82c693_pci_driver = { + .name = DRV_NAME, + .id_table = cy82c693, + .probe = cy82c693_init_one, + .remove = ata_pci_remove_one +}; + +static int __init cy82c693_init(void) +{ + return pci_register_driver(&cy82c693_pci_driver); +} + + +static void __exit cy82c693_exit(void) +{ + pci_unregister_driver(&cy82c693_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the CY82C693 PATA controller"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cy82c693); +MODULE_VERSION(DRV_VERSION); + +module_init(cy82c693_init); +module_exit(cy82c693_exit); diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c new file mode 100644 index 000000000000..c30bc181304f --- /dev/null +++ b/drivers/ata/pata_efar.c @@ -0,0 +1,342 @@ +/* + * pata_efar.c - EFAR PIIX clone controller driver + * + * (C) 2005 Red Hat + * + * Some parts based on ata_piix.c by Jeff Garzik and others. + * + * The EFAR is a PIIX4 clone with UDMA66 support. Unlike the later + * Intel ICH controllers the EFAR widened the UDMA mode register bits + * and doesn't require the funky clock selection. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_efar" +#define DRV_VERSION "0.4.1" + +/** + * efar_pre_reset - check for 40/80 pin + * @ap: Port + * + * Perform cable detection for the EFAR ATA interface. This is + * different to the PIIX arrangement + */ + +static int efar_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits efar_enable_bits[] = { + { 0x41U, 1U, 0x80UL, 0x80UL }, /* port 0 */ + { 0x43U, 1U, 0x80UL, 0x80UL }, /* port 1 */ + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 tmp; + + if (!pci_test_config_bits(pdev, &efar_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + pci_read_config_byte(pdev, 0x47, &tmp); + if (tmp & (2 >> ap->port_no)) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +/** + * efar_probe_reset - Probe specified port on PATA host controller + * @ap: Port to probe + * + * LOCKING: + * None (inherited from caller). + */ + +static void efar_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, efar_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * efar_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: um + * + * Set PIO mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev) +{ + unsigned int pio = adev->pio_mode - XFER_PIO_0; + struct pci_dev *dev = to_pci_dev(ap->host->dev); + unsigned int idetm_port= ap->port_no ? 0x42 : 0x40; + u16 idetm_data; + int control = 0; + + /* + * See Intel Document 298600-004 for the timing programing rules + * for PIIX/ICH. The EFAR is a clone so very similar + */ + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + if (pio > 2) + control |= 1; /* TIME1 enable */ + if (ata_pio_need_iordy(adev)) /* PIO 3/4 require IORDY */ + control |= 2; /* IE enable */ + /* Intel specifies that the PPE functionality is for disk only */ + if (adev->class == ATA_DEV_ATA) + control |= 4; /* PPE enable */ + + pci_read_config_word(dev, idetm_port, &idetm_data); + + /* Enable PPE, IE and TIME as appropriate */ + + if (adev->devno == 0) { + idetm_data &= 0xCCF0; + idetm_data |= control; + idetm_data |= (timings[pio][0] << 12) | + (timings[pio][1] << 8); + } else { + int shift = 4 * ap->port_no; + u8 slave_data; + + idetm_data &= 0xCC0F; + idetm_data |= (control << 4); + + /* Slave timing in seperate register */ + pci_read_config_byte(dev, 0x44, &slave_data); + slave_data &= 0x0F << shift; + slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << shift; + pci_write_config_byte(dev, 0x44, slave_data); + } + + idetm_data |= 0x4000; /* Ensure SITRE is enabled */ + pci_write_config_word(dev, idetm_port, idetm_data); +} + +/** + * efar_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * + * Set UDMA/MWDMA mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void efar_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *dev = to_pci_dev(ap->host->dev); + u8 master_port = ap->port_no ? 0x42 : 0x40; + u16 master_data; + u8 speed = adev->dma_mode; + int devid = adev->devno + 2 * ap->port_no; + u8 udma_enable; + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + pci_read_config_word(dev, master_port, &master_data); + pci_read_config_byte(dev, 0x48, &udma_enable); + + if (speed >= XFER_UDMA_0) { + unsigned int udma = adev->dma_mode - XFER_UDMA_0; + u16 udma_timing; + + udma_enable |= (1 << devid); + + /* Load the UDMA mode number */ + pci_read_config_word(dev, 0x4A, &udma_timing); + udma_timing &= ~(7 << (4 * devid)); + udma_timing |= udma << (4 * devid); + pci_write_config_word(dev, 0x4A, udma_timing); + } else { + /* + * MWDMA is driven by the PIO timings. We must also enable + * IORDY unconditionally along with TIME1. PPE has already + * been set when the PIO timing was set. + */ + unsigned int mwdma = adev->dma_mode - XFER_MW_DMA_0; + unsigned int control; + u8 slave_data; + const unsigned int needed_pio[3] = { + XFER_PIO_0, XFER_PIO_3, XFER_PIO_4 + }; + int pio = needed_pio[mwdma] - XFER_PIO_0; + + control = 3; /* IORDY|TIME1 */ + + /* If the drive MWDMA is faster than it can do PIO then + we must force PIO into PIO0 */ + + if (adev->pio_mode < needed_pio[mwdma]) + /* Enable DMA timing only */ + control |= 8; /* PIO cycles in PIO0 */ + + if (adev->devno) { /* Slave */ + master_data &= 0xFF4F; /* Mask out IORDY|TIME1|DMAONLY */ + master_data |= control << 4; + pci_read_config_byte(dev, 0x44, &slave_data); + slave_data &= (0x0F + 0xE1 * ap->port_no); + /* Load the matching timing */ + slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << (ap->port_no ? 4 : 0); + pci_write_config_byte(dev, 0x44, slave_data); + } else { /* Master */ + master_data &= 0xCCF4; /* Mask out IORDY|TIME1|DMAONLY + and master timing bits */ + master_data |= control; + master_data |= + (timings[pio][0] << 12) | + (timings[pio][1] << 8); + } + udma_enable &= ~(1 << devid); + pci_write_config_word(dev, master_port, master_data); + } + pci_write_config_byte(dev, 0x48, udma_enable); +} + +static struct scsi_host_template efar_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations efar_ops = { + .port_disable = ata_port_disable, + .set_piomode = efar_set_piomode, + .set_dmamode = efar_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = efar_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + + +/** + * efar_init_one - Register EFAR ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in efar_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int efar_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + static struct ata_port_info info = { + .sht = &efar_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma1-2 */ + .udma_mask = 0x0f, /* UDMA 66 */ + .port_ops = &efar_ops, + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, + "version " DRV_VERSION "\n"); + + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id efar_pci_tbl[] = { + { 0x1055, 0x9130, PCI_ANY_ID, PCI_ANY_ID, }, + { } /* terminate list */ +}; + +static struct pci_driver efar_pci_driver = { + .name = DRV_NAME, + .id_table = efar_pci_tbl, + .probe = efar_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init efar_init(void) +{ + return pci_register_driver(&efar_pci_driver); +} + +static void __exit efar_exit(void) +{ + pci_unregister_driver(&efar_pci_driver); +} + + +module_init(efar_init); +module_exit(efar_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for EFAR PIIX clones"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, efar_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c new file mode 100644 index 000000000000..e54a0c51681d --- /dev/null +++ b/drivers/ata/pata_hpt366.c @@ -0,0 +1,478 @@ +/* + * Libata driver for the highpoint 366 and 368 UDMA66 ATA controllers. + * + * This driver is heavily based upon: + * + * linux/drivers/ide/pci/hpt366.c Version 0.36 April 25, 2003 + * + * Copyright (C) 1999-2003 Andre Hedrick + * Portions Copyright (C) 2001 Sun Microsystems, Inc. + * Portions Copyright (C) 2003 Red Hat Inc + * + * + * TODO + * Maybe PLL mode + * Look into engine reset on timeout errors. Should not be + * required. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_hpt366" +#define DRV_VERSION "0.5" + +struct hpt_clock { + u8 xfer_speed; + u32 timing; +}; + +/* key for bus clock timings + * bit + * 0:3 data_high_time. inactive time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 4:8 data_low_time. active time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 9:12 cmd_high_time. inactive time of DIOW_/DIOR_ during task file + * register access. + * 13:17 cmd_low_time. active time of DIOW_/DIOR_ during task file + * register access. + * 18:21 udma_cycle_time. clock freq and clock cycles for UDMA xfer. + * during task file register access. + * 22:24 pre_high_time. time to initialize 1st cycle for PIO and MW DMA + * xfer. + * 25:27 cmd_pre_high_time. time to initialize 1st PIO cycle for task + * register access. + * 28 UDMA enable + * 29 DMA enable + * 30 PIO_MST enable. if set, the chip is in bus master mode during + * PIO. + * 31 FIFO enable. + */ + +static const struct hpt_clock hpt366_40[] = { + { XFER_UDMA_4, 0x900fd943 }, + { XFER_UDMA_3, 0x900ad943 }, + { XFER_UDMA_2, 0x900bd943 }, + { XFER_UDMA_1, 0x9008d943 }, + { XFER_UDMA_0, 0x9008d943 }, + + { XFER_MW_DMA_2, 0xa008d943 }, + { XFER_MW_DMA_1, 0xa010d955 }, + { XFER_MW_DMA_0, 0xa010d9fc }, + + { XFER_PIO_4, 0xc008d963 }, + { XFER_PIO_3, 0xc010d974 }, + { XFER_PIO_2, 0xc010d997 }, + { XFER_PIO_1, 0xc010d9c7 }, + { XFER_PIO_0, 0xc018d9d9 }, + { 0, 0x0120d9d9 } +}; + +static const struct hpt_clock hpt366_33[] = { + { XFER_UDMA_4, 0x90c9a731 }, + { XFER_UDMA_3, 0x90cfa731 }, + { XFER_UDMA_2, 0x90caa731 }, + { XFER_UDMA_1, 0x90cba731 }, + { XFER_UDMA_0, 0x90c8a731 }, + + { XFER_MW_DMA_2, 0xa0c8a731 }, + { XFER_MW_DMA_1, 0xa0c8a732 }, /* 0xa0c8a733 */ + { XFER_MW_DMA_0, 0xa0c8a797 }, + + { XFER_PIO_4, 0xc0c8a731 }, + { XFER_PIO_3, 0xc0c8a742 }, + { XFER_PIO_2, 0xc0d0a753 }, + { XFER_PIO_1, 0xc0d0a7a3 }, /* 0xc0d0a793 */ + { XFER_PIO_0, 0xc0d0a7aa }, /* 0xc0d0a7a7 */ + { 0, 0x0120a7a7 } +}; + +static const struct hpt_clock hpt366_25[] = { + { XFER_UDMA_4, 0x90c98521 }, + { XFER_UDMA_3, 0x90cf8521 }, + { XFER_UDMA_2, 0x90cf8521 }, + { XFER_UDMA_1, 0x90cb8521 }, + { XFER_UDMA_0, 0x90cb8521 }, + + { XFER_MW_DMA_2, 0xa0ca8521 }, + { XFER_MW_DMA_1, 0xa0ca8532 }, + { XFER_MW_DMA_0, 0xa0ca8575 }, + + { XFER_PIO_4, 0xc0ca8521 }, + { XFER_PIO_3, 0xc0ca8532 }, + { XFER_PIO_2, 0xc0ca8542 }, + { XFER_PIO_1, 0xc0d08572 }, + { XFER_PIO_0, 0xc0d08585 }, + { 0, 0x01208585 } +}; + +static const char *bad_ata33[] = { + "Maxtor 92720U8", "Maxtor 92040U6", "Maxtor 91360U4", "Maxtor 91020U3", "Maxtor 90845U3", "Maxtor 90650U2", + "Maxtor 91360D8", "Maxtor 91190D7", "Maxtor 91020D6", "Maxtor 90845D5", "Maxtor 90680D4", "Maxtor 90510D3", "Maxtor 90340D2", + "Maxtor 91152D8", "Maxtor 91008D7", "Maxtor 90845D6", "Maxtor 90840D6", "Maxtor 90720D5", "Maxtor 90648D5", "Maxtor 90576D4", + "Maxtor 90510D4", + "Maxtor 90432D3", "Maxtor 90288D2", "Maxtor 90256D2", + "Maxtor 91000D8", "Maxtor 90910D8", "Maxtor 90875D7", "Maxtor 90840D7", "Maxtor 90750D6", "Maxtor 90625D5", "Maxtor 90500D4", + "Maxtor 91728D8", "Maxtor 91512D7", "Maxtor 91303D6", "Maxtor 91080D5", "Maxtor 90845D4", "Maxtor 90680D4", "Maxtor 90648D3", "Maxtor 90432D2", + NULL +}; + +static const char *bad_ata66_4[] = { + "IBM-DTLA-307075", + "IBM-DTLA-307060", + "IBM-DTLA-307045", + "IBM-DTLA-307030", + "IBM-DTLA-307020", + "IBM-DTLA-307015", + "IBM-DTLA-305040", + "IBM-DTLA-305030", + "IBM-DTLA-305020", + "IC35L010AVER07-0", + "IC35L020AVER07-0", + "IC35L030AVER07-0", + "IC35L040AVER07-0", + "IC35L060AVER07-0", + "WDC AC310200R", + NULL +}; + +static const char *bad_ata66_3[] = { + "WDC AC310200R", + NULL +}; + +static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr, const char *list[]) +{ + unsigned char model_num[40]; + char *s; + unsigned int len; + int i = 0; + + ata_id_string(dev->id, model_num, ATA_ID_PROD_OFS, sizeof(model_num)); + s = &model_num[0]; + len = strnlen(s, sizeof(model_num)); + + /* ATAPI specifies that empty space is blank-filled; remove blanks */ + while ((len > 0) && (s[len - 1] == ' ')) { + len--; + s[len] = 0; + } + + while(list[i] != NULL) { + if (!strncmp(list[i], s, len)) { + printk(KERN_WARNING DRV_NAME ": %s is not supported for %s.\n", + modestr, list[i]); + return 1; + } + i++; + } + return 0; +} + +/** + * hpt366_filter - mode selection filter + * @ap: ATA interface + * @adev: ATA device + * + * Block UDMA on devices that cause trouble with this controller. + */ + +static unsigned long hpt366_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) +{ + if (adev->class == ATA_DEV_ATA) { + if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33)) + mask &= ~ATA_MASK_UDMA; + if (hpt_dma_blacklisted(adev, "UDMA3", bad_ata66_3)) + mask &= ~(0x07 << ATA_SHIFT_UDMA); + if (hpt_dma_blacklisted(adev, "UDMA4", bad_ata66_4)) + mask &= ~(0x0F << ATA_SHIFT_UDMA); + } + return ata_pci_default_filter(ap, adev, mask); +} + +/** + * hpt36x_find_mode - reset the hpt36x bus + * @ap: ATA port + * @speed: transfer mode + * + * Return the 32bit register programming information for this channel + * that matches the speed provided. + */ + +static u32 hpt36x_find_mode(struct ata_port *ap, int speed) +{ + struct hpt_clock *clocks = ap->host->private_data; + + while(clocks->xfer_speed) { + if (clocks->xfer_speed == speed) + return clocks->timing; + clocks++; + } + BUG(); + return 0xffffffffU; /* silence compiler warning */ +} + +static int hpt36x_pre_reset(struct ata_port *ap) +{ + u8 ata66; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_byte(pdev, 0x5A, &ata66); + if (ata66 & (1 << ap->port_no)) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +/** + * hpt36x_error_handler - reset the hpt36x bus + * @ap: ATA port to reset + * + * Perform the reset handling for the 366/368 + */ + +static void hpt36x_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, hpt36x_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * hpt366_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Perform PIO mode setup. + */ + +static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + if (fast & 0x80) { + fast &= ~0x80; + pci_write_config_byte(pdev, addr2, fast); + } + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt36x_find_mode(ap, adev->pio_mode); + mode &= ~0x8000000; /* No FIFO in PIO */ + mode &= ~0x30070000; /* Leave config bits alone */ + reg &= 0x30070000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt366_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * Set up the channel for MWDMA or UDMA modes. Much the same as with + * PIO, load the mode number and then set MWDMA or UDMA flag. + */ + +static void hpt366_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + if (fast & 0x80) { + fast &= ~0x80; + pci_write_config_byte(pdev, addr2, fast); + } + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt36x_find_mode(ap, adev->dma_mode); + mode |= 0x8000000; /* FIFO in MWDMA or UDMA */ + mode &= ~0xC0000000; /* Leave config bits alone */ + reg &= 0xC0000000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +static struct scsi_host_template hpt36x_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +/* + * Configuration for HPT366/68 + */ + +static struct ata_port_operations hpt366_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt366_set_piomode, + .set_dmamode = hpt366_set_dmamode, + .mode_filter = hpt366_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt36x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * hpt36x_init_one - Initialise an HPT366/368 + * @dev: PCI device + * @id: Entry in match table + * + * Initialise an HPT36x device. There are some interesting complications + * here. Firstly the chip may report 366 and be one of several variants. + * Secondly all the timings depend on the clock for the chip which we must + * detect and look up + * + * This is the known chip mappings. It may be missing a couple of later + * releases. + * + * Chip version PCI Rev Notes + * HPT366 4 (HPT366) 0 UDMA66 + * HPT366 4 (HPT366) 1 UDMA66 + * HPT368 4 (HPT366) 2 UDMA66 + * HPT37x/30x 4 (HPT366) 3+ Other driver + * + */ + +static int hpt36x_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info_hpt366 = { + .sht = &hpt36x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, + .port_ops = &hpt366_port_ops + }; + struct ata_port_info *port_info[2] = {&info_hpt366, &info_hpt366}; + + u32 class_rev; + u32 reg1; + u8 drive_fast; + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xFF; + + /* May be a later chip in disguise. Check */ + /* Newer chips are not in the HPT36x driver. Ignore them */ + if (class_rev > 2) + return -ENODEV; + + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4)); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78); + pci_write_config_byte(dev, PCI_MIN_GNT, 0x08); + pci_write_config_byte(dev, PCI_MAX_LAT, 0x08); + + pci_read_config_byte(dev, 0x51, &drive_fast); + if (drive_fast & 0x80) + pci_write_config_byte(dev, 0x51, drive_fast & ~0x80); + + pci_read_config_dword(dev, 0x40, ®1); + + /* PCI clocking determines the ATA timing values to use */ + /* info_hpt366 is safe against re-entry so we can scribble on it */ + switch(reg1 & 0x700) { + case 5: + info_hpt366.private_data = &hpt366_40; + break; + case 9: + info_hpt366.private_data = &hpt366_25; + break; + default: + info_hpt366.private_data = &hpt366_33; + break; + } + /* Now kick off ATA set up */ + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id hpt36x[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT366), }, + { 0, }, +}; + +static struct pci_driver hpt36x_pci_driver = { + .name = DRV_NAME, + .id_table = hpt36x, + .probe = hpt36x_init_one, + .remove = ata_pci_remove_one +}; + +static int __init hpt36x_init(void) +{ + return pci_register_driver(&hpt36x_pci_driver); +} + + +static void __exit hpt36x_exit(void) +{ + pci_unregister_driver(&hpt36x_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the Highpoint HPT366/368"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, hpt36x); +MODULE_VERSION(DRV_VERSION); + +module_init(hpt36x_init); +module_exit(hpt36x_exit); diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c new file mode 100644 index 000000000000..7c3da53f1e0c --- /dev/null +++ b/drivers/ata/pata_hpt37x.c @@ -0,0 +1,1257 @@ +/* + * Libata driver for the highpoint 37x and 30x UDMA66 ATA controllers. + * + * This driver is heavily based upon: + * + * linux/drivers/ide/pci/hpt366.c Version 0.36 April 25, 2003 + * + * Copyright (C) 1999-2003 Andre Hedrick + * Portions Copyright (C) 2001 Sun Microsystems, Inc. + * Portions Copyright (C) 2003 Red Hat Inc + * + * TODO + * PLL mode + * Look into engine reset on timeout errors. Should not be + * required. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_hpt37x" +#define DRV_VERSION "0.5" + +struct hpt_clock { + u8 xfer_speed; + u32 timing; +}; + +struct hpt_chip { + const char *name; + unsigned int base; + struct hpt_clock const *clocks[4]; +}; + +/* key for bus clock timings + * bit + * 0:3 data_high_time. inactive time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 4:8 data_low_time. active time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 9:12 cmd_high_time. inactive time of DIOW_/DIOR_ during task file + * register access. + * 13:17 cmd_low_time. active time of DIOW_/DIOR_ during task file + * register access. + * 18:21 udma_cycle_time. clock freq and clock cycles for UDMA xfer. + * during task file register access. + * 22:24 pre_high_time. time to initialize 1st cycle for PIO and MW DMA + * xfer. + * 25:27 cmd_pre_high_time. time to initialize 1st PIO cycle for task + * register access. + * 28 UDMA enable + * 29 DMA enable + * 30 PIO_MST enable. if set, the chip is in bus master mode during + * PIO. + * 31 FIFO enable. + */ + +/* from highpoint documentation. these are old values */ +static const struct hpt_clock hpt370_timings_33[] = { +/* { XFER_UDMA_5, 0x1A85F442, 0x16454e31 }, */ + { XFER_UDMA_5, 0x16454e31 }, + { XFER_UDMA_4, 0x16454e31 }, + { XFER_UDMA_3, 0x166d4e31 }, + { XFER_UDMA_2, 0x16494e31 }, + { XFER_UDMA_1, 0x164d4e31 }, + { XFER_UDMA_0, 0x16514e31 }, + + { XFER_MW_DMA_2, 0x26514e21 }, + { XFER_MW_DMA_1, 0x26514e33 }, + { XFER_MW_DMA_0, 0x26514e97 }, + + { XFER_PIO_4, 0x06514e21 }, + { XFER_PIO_3, 0x06514e22 }, + { XFER_PIO_2, 0x06514e33 }, + { XFER_PIO_1, 0x06914e43 }, + { XFER_PIO_0, 0x06914e57 }, + { 0, 0x06514e57 } +}; + +static const struct hpt_clock hpt370_timings_66[] = { + { XFER_UDMA_5, 0x14846231 }, + { XFER_UDMA_4, 0x14886231 }, + { XFER_UDMA_3, 0x148c6231 }, + { XFER_UDMA_2, 0x148c6231 }, + { XFER_UDMA_1, 0x14906231 }, + { XFER_UDMA_0, 0x14986231 }, + + { XFER_MW_DMA_2, 0x26514e21 }, + { XFER_MW_DMA_1, 0x26514e33 }, + { XFER_MW_DMA_0, 0x26514e97 }, + + { XFER_PIO_4, 0x06514e21 }, + { XFER_PIO_3, 0x06514e22 }, + { XFER_PIO_2, 0x06514e33 }, + { XFER_PIO_1, 0x06914e43 }, + { XFER_PIO_0, 0x06914e57 }, + { 0, 0x06514e57 } +}; + +/* these are the current (4 sep 2001) timings from highpoint */ +static const struct hpt_clock hpt370a_timings_33[] = { + { XFER_UDMA_5, 0x12446231 }, + { XFER_UDMA_4, 0x12446231 }, + { XFER_UDMA_3, 0x126c6231 }, + { XFER_UDMA_2, 0x12486231 }, + { XFER_UDMA_1, 0x124c6233 }, + { XFER_UDMA_0, 0x12506297 }, + + { XFER_MW_DMA_2, 0x22406c31 }, + { XFER_MW_DMA_1, 0x22406c33 }, + { XFER_MW_DMA_0, 0x22406c97 }, + + { XFER_PIO_4, 0x06414e31 }, + { XFER_PIO_3, 0x06414e42 }, + { XFER_PIO_2, 0x06414e53 }, + { XFER_PIO_1, 0x06814e93 }, + { XFER_PIO_0, 0x06814ea7 }, + { 0, 0x06814ea7 } +}; + +/* 2x 33MHz timings */ +static const struct hpt_clock hpt370a_timings_66[] = { + { XFER_UDMA_5, 0x1488e673 }, + { XFER_UDMA_4, 0x1488e673 }, + { XFER_UDMA_3, 0x1498e673 }, + { XFER_UDMA_2, 0x1490e673 }, + { XFER_UDMA_1, 0x1498e677 }, + { XFER_UDMA_0, 0x14a0e73f }, + + { XFER_MW_DMA_2, 0x2480fa73 }, + { XFER_MW_DMA_1, 0x2480fa77 }, + { XFER_MW_DMA_0, 0x2480fb3f }, + + { XFER_PIO_4, 0x0c82be73 }, + { XFER_PIO_3, 0x0c82be95 }, + { XFER_PIO_2, 0x0c82beb7 }, + { XFER_PIO_1, 0x0d02bf37 }, + { XFER_PIO_0, 0x0d02bf5f }, + { 0, 0x0d02bf5f } +}; + +static const struct hpt_clock hpt370a_timings_50[] = { + { XFER_UDMA_5, 0x12848242 }, + { XFER_UDMA_4, 0x12ac8242 }, + { XFER_UDMA_3, 0x128c8242 }, + { XFER_UDMA_2, 0x120c8242 }, + { XFER_UDMA_1, 0x12148254 }, + { XFER_UDMA_0, 0x121882ea }, + + { XFER_MW_DMA_2, 0x22808242 }, + { XFER_MW_DMA_1, 0x22808254 }, + { XFER_MW_DMA_0, 0x228082ea }, + + { XFER_PIO_4, 0x0a81f442 }, + { XFER_PIO_3, 0x0a81f443 }, + { XFER_PIO_2, 0x0a81f454 }, + { XFER_PIO_1, 0x0ac1f465 }, + { XFER_PIO_0, 0x0ac1f48a }, + { 0, 0x0ac1f48a } +}; + +static const struct hpt_clock hpt372_timings_33[] = { + { XFER_UDMA_6, 0x1c81dc62 }, + { XFER_UDMA_5, 0x1c6ddc62 }, + { XFER_UDMA_4, 0x1c8ddc62 }, + { XFER_UDMA_3, 0x1c8edc62 }, /* checkme */ + { XFER_UDMA_2, 0x1c91dc62 }, + { XFER_UDMA_1, 0x1c9adc62 }, /* checkme */ + { XFER_UDMA_0, 0x1c82dc62 }, /* checkme */ + + { XFER_MW_DMA_2, 0x2c829262 }, + { XFER_MW_DMA_1, 0x2c829266 }, /* checkme */ + { XFER_MW_DMA_0, 0x2c82922e }, /* checkme */ + + { XFER_PIO_4, 0x0c829c62 }, + { XFER_PIO_3, 0x0c829c84 }, + { XFER_PIO_2, 0x0c829ca6 }, + { XFER_PIO_1, 0x0d029d26 }, + { XFER_PIO_0, 0x0d029d5e }, + { 0, 0x0d029d5e } +}; + +static const struct hpt_clock hpt372_timings_50[] = { + { XFER_UDMA_5, 0x12848242 }, + { XFER_UDMA_4, 0x12ac8242 }, + { XFER_UDMA_3, 0x128c8242 }, + { XFER_UDMA_2, 0x120c8242 }, + { XFER_UDMA_1, 0x12148254 }, + { XFER_UDMA_0, 0x121882ea }, + + { XFER_MW_DMA_2, 0x22808242 }, + { XFER_MW_DMA_1, 0x22808254 }, + { XFER_MW_DMA_0, 0x228082ea }, + + { XFER_PIO_4, 0x0a81f442 }, + { XFER_PIO_3, 0x0a81f443 }, + { XFER_PIO_2, 0x0a81f454 }, + { XFER_PIO_1, 0x0ac1f465 }, + { XFER_PIO_0, 0x0ac1f48a }, + { 0, 0x0a81f443 } +}; + +static const struct hpt_clock hpt372_timings_66[] = { + { XFER_UDMA_6, 0x1c869c62 }, + { XFER_UDMA_5, 0x1cae9c62 }, + { XFER_UDMA_4, 0x1c8a9c62 }, + { XFER_UDMA_3, 0x1c8e9c62 }, + { XFER_UDMA_2, 0x1c929c62 }, + { XFER_UDMA_1, 0x1c9a9c62 }, + { XFER_UDMA_0, 0x1c829c62 }, + + { XFER_MW_DMA_2, 0x2c829c62 }, + { XFER_MW_DMA_1, 0x2c829c66 }, + { XFER_MW_DMA_0, 0x2c829d2e }, + + { XFER_PIO_4, 0x0c829c62 }, + { XFER_PIO_3, 0x0c829c84 }, + { XFER_PIO_2, 0x0c829ca6 }, + { XFER_PIO_1, 0x0d029d26 }, + { XFER_PIO_0, 0x0d029d5e }, + { 0, 0x0d029d26 } +}; + +static const struct hpt_clock hpt374_timings_33[] = { + { XFER_UDMA_6, 0x12808242 }, + { XFER_UDMA_5, 0x12848242 }, + { XFER_UDMA_4, 0x12ac8242 }, + { XFER_UDMA_3, 0x128c8242 }, + { XFER_UDMA_2, 0x120c8242 }, + { XFER_UDMA_1, 0x12148254 }, + { XFER_UDMA_0, 0x121882ea }, + + { XFER_MW_DMA_2, 0x22808242 }, + { XFER_MW_DMA_1, 0x22808254 }, + { XFER_MW_DMA_0, 0x228082ea }, + + { XFER_PIO_4, 0x0a81f442 }, + { XFER_PIO_3, 0x0a81f443 }, + { XFER_PIO_2, 0x0a81f454 }, + { XFER_PIO_1, 0x0ac1f465 }, + { XFER_PIO_0, 0x0ac1f48a }, + { 0, 0x06814e93 } +}; + +static const struct hpt_chip hpt370 = { + "HPT370", + 48, + { + hpt370_timings_33, + NULL, + NULL, + hpt370_timings_66 + } +}; + +static const struct hpt_chip hpt370a = { + "HPT370A", + 48, + { + hpt370a_timings_33, + NULL, + hpt370a_timings_50, + hpt370a_timings_66 + } +}; + +static const struct hpt_chip hpt372 = { + "HPT372", + 55, + { + hpt372_timings_33, + NULL, + hpt372_timings_50, + hpt372_timings_66 + } +}; + +static const struct hpt_chip hpt302 = { + "HPT302", + 66, + { + hpt372_timings_33, + NULL, + hpt372_timings_50, + hpt372_timings_66 + } +}; + +static const struct hpt_chip hpt371 = { + "HPT371", + 66, + { + hpt372_timings_33, + NULL, + hpt372_timings_50, + hpt372_timings_66 + } +}; + +static const struct hpt_chip hpt372a = { + "HPT372A", + 66, + { + hpt372_timings_33, + NULL, + hpt372_timings_50, + hpt372_timings_66 + } +}; + +static const struct hpt_chip hpt374 = { + "HPT374", + 48, + { + hpt374_timings_33, + NULL, + NULL, + NULL + } +}; + +/** + * hpt37x_find_mode - reset the hpt37x bus + * @ap: ATA port + * @speed: transfer mode + * + * Return the 32bit register programming information for this channel + * that matches the speed provided. + */ + +static u32 hpt37x_find_mode(struct ata_port *ap, int speed) +{ + struct hpt_clock *clocks = ap->host->private_data; + + while(clocks->xfer_speed) { + if (clocks->xfer_speed == speed) + return clocks->timing; + clocks++; + } + BUG(); + return 0xffffffffU; /* silence compiler warning */ +} + +static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr, const char *list[]) +{ + unsigned char model_num[40]; + char *s; + unsigned int len; + int i = 0; + + ata_id_string(dev->id, model_num, ATA_ID_PROD_OFS, + sizeof(model_num)); + s = &model_num[0]; + len = strnlen(s, sizeof(model_num)); + + /* ATAPI specifies that empty space is blank-filled; remove blanks */ + while ((len > 0) && (s[len - 1] == ' ')) { + len--; + s[len] = 0; + } + + while(list[i] != NULL) { + if (!strncmp(list[i], s, len)) { + printk(KERN_WARNING DRV_NAME ": %s is not supported for %s.\n", + modestr, list[i]); + return 1; + } + i++; + } + return 0; +} + +static const char *bad_ata33[] = { + "Maxtor 92720U8", "Maxtor 92040U6", "Maxtor 91360U4", "Maxtor 91020U3", "Maxtor 90845U3", "Maxtor 90650U2", + "Maxtor 91360D8", "Maxtor 91190D7", "Maxtor 91020D6", "Maxtor 90845D5", "Maxtor 90680D4", "Maxtor 90510D3", "Maxtor 90340D2", + "Maxtor 91152D8", "Maxtor 91008D7", "Maxtor 90845D6", "Maxtor 90840D6", "Maxtor 90720D5", "Maxtor 90648D5", "Maxtor 90576D4", + "Maxtor 90510D4", + "Maxtor 90432D3", "Maxtor 90288D2", "Maxtor 90256D2", + "Maxtor 91000D8", "Maxtor 90910D8", "Maxtor 90875D7", "Maxtor 90840D7", "Maxtor 90750D6", "Maxtor 90625D5", "Maxtor 90500D4", + "Maxtor 91728D8", "Maxtor 91512D7", "Maxtor 91303D6", "Maxtor 91080D5", "Maxtor 90845D4", "Maxtor 90680D4", "Maxtor 90648D3", "Maxtor 90432D2", + NULL +}; + +static const char *bad_ata100_5[] = { + "IBM-DTLA-307075", + "IBM-DTLA-307060", + "IBM-DTLA-307045", + "IBM-DTLA-307030", + "IBM-DTLA-307020", + "IBM-DTLA-307015", + "IBM-DTLA-305040", + "IBM-DTLA-305030", + "IBM-DTLA-305020", + "IC35L010AVER07-0", + "IC35L020AVER07-0", + "IC35L030AVER07-0", + "IC35L040AVER07-0", + "IC35L060AVER07-0", + "WDC AC310200R", + NULL +}; + +/** + * hpt370_filter - mode selection filter + * @ap: ATA interface + * @adev: ATA device + * + * Block UDMA on devices that cause trouble with this controller. + */ + +static unsigned long hpt370_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) +{ + if (adev->class != ATA_DEV_ATA) { + if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33)) + mask &= ~ATA_MASK_UDMA; + if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5)) + mask &= ~(0x1F << ATA_SHIFT_UDMA); + } + return ata_pci_default_filter(ap, adev, mask); +} + +/** + * hpt370a_filter - mode selection filter + * @ap: ATA interface + * @adev: ATA device + * + * Block UDMA on devices that cause trouble with this controller. + */ + +static unsigned long hpt370a_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) +{ + if (adev->class != ATA_DEV_ATA) { + if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5)) + mask &= ~ (0x1F << ATA_SHIFT_UDMA); + } + return ata_pci_default_filter(ap, adev, mask); +} + +/** + * hpt37x_pre_reset - reset the hpt37x bus + * @ap: ATA port to reset + * + * Perform the initial reset handling for the 370/372 and 374 func 0 + */ + +static int hpt37x_pre_reset(struct ata_port *ap) +{ + u8 scr2, ata66; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_byte(pdev, 0x5B, &scr2); + pci_write_config_byte(pdev, 0x5B, scr2 & ~0x01); + /* Cable register now active */ + pci_read_config_byte(pdev, 0x5A, &ata66); + /* Restore state */ + pci_write_config_byte(pdev, 0x5B, scr2); + + if (ata66 & (1 << ap->port_no)) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + + /* Reset the state machine */ + pci_write_config_byte(pdev, 0x50, 0x37); + pci_write_config_byte(pdev, 0x54, 0x37); + udelay(100); + + return ata_std_prereset(ap); +} + +/** + * hpt37x_error_handler - reset the hpt374 + * @ap: ATA port to reset + * + * Perform probe for HPT37x, except for HPT374 channel 2 + */ + +static void hpt37x_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, hpt37x_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +static int hpt374_pre_reset(struct ata_port *ap) +{ + u16 mcr3, mcr6; + u8 ata66; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + /* Do the extra channel work */ + pci_read_config_word(pdev, 0x52, &mcr3); + pci_read_config_word(pdev, 0x56, &mcr6); + /* Set bit 15 of 0x52 to enable TCBLID as input + Set bit 15 of 0x56 to enable FCBLID as input + */ + pci_write_config_word(pdev, 0x52, mcr3 | 0x8000); + pci_write_config_word(pdev, 0x56, mcr6 | 0x8000); + pci_read_config_byte(pdev, 0x5A, &ata66); + /* Reset TCBLID/FCBLID to output */ + pci_write_config_word(pdev, 0x52, mcr3); + pci_write_config_word(pdev, 0x56, mcr6); + + if (ata66 & (1 << ap->port_no)) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + + /* Reset the state machine */ + pci_write_config_byte(pdev, 0x50, 0x37); + pci_write_config_byte(pdev, 0x54, 0x37); + udelay(100); + + return ata_std_prereset(ap); +} + +/** + * hpt374_error_handler - reset the hpt374 + * @classes: + * + * The 374 cable detect is a little different due to the extra + * channels. The function 0 channels work like usual but function 1 + * is special + */ + +static void hpt374_error_handler(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (!(PCI_FUNC(pdev->devfn) & 1)) + hpt37x_error_handler(ap); + else + ata_bmdma_drive_eh(ap, hpt374_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * hpt370_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Perform PIO mode setup. + */ + +static void hpt370_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + fast &= ~0x02; + fast |= 0x01; + pci_write_config_byte(pdev, addr2, fast); + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt37x_find_mode(ap, adev->pio_mode); + mode &= ~0x8000000; /* No FIFO in PIO */ + mode &= ~0x30070000; /* Leave config bits alone */ + reg &= 0x30070000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt370_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * Set up the channel for MWDMA or UDMA modes. Much the same as with + * PIO, load the mode number and then set MWDMA or UDMA flag. + */ + +static void hpt370_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + fast &= ~0x02; + fast |= 0x01; + pci_write_config_byte(pdev, addr2, fast); + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt37x_find_mode(ap, adev->dma_mode); + mode |= 0x8000000; /* FIFO in MWDMA or UDMA */ + mode &= ~0xC0000000; /* Leave config bits alone */ + reg &= 0xC0000000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt370_bmdma_start - DMA engine begin + * @qc: ATA command + * + * The 370 and 370A want us to reset the DMA engine each time we + * use it. The 372 and later are fine. + */ + +static void hpt370_bmdma_start(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + pci_write_config_byte(pdev, 0x50 + 4 * ap->port_no, 0x37); + udelay(10); + ata_bmdma_start(qc); +} + +/** + * hpt370_bmdma_end - DMA engine stop + * @qc: ATA command + * + * Work around the HPT370 DMA engine. + */ + +static void hpt370_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 dma_stat = inb(ap->ioaddr.bmdma_addr + 2); + u8 dma_cmd; + unsigned long bmdma = ap->ioaddr.bmdma_addr; + + if (dma_stat & 0x01) { + udelay(20); + dma_stat = inb(bmdma + 2); + } + if (dma_stat & 0x01) { + /* Clear the engine */ + pci_write_config_byte(pdev, 0x50 + 4 * ap->port_no, 0x37); + udelay(10); + /* Stop DMA */ + dma_cmd = inb(bmdma ); + outb(dma_cmd & 0xFE, bmdma); + /* Clear Error */ + dma_stat = inb(bmdma + 2); + outb(dma_stat | 0x06 , bmdma + 2); + /* Clear the engine */ + pci_write_config_byte(pdev, 0x50 + 4 * ap->port_no, 0x37); + udelay(10); + } + ata_bmdma_stop(qc); +} + +/** + * hpt372_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Perform PIO mode setup. + */ + +static void hpt372_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + fast &= ~0x07; + pci_write_config_byte(pdev, addr2, fast); + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt37x_find_mode(ap, adev->pio_mode); + + printk("Find mode for %d reports %X\n", adev->pio_mode, mode); + mode &= ~0x80000000; /* No FIFO in PIO */ + mode &= ~0x30070000; /* Leave config bits alone */ + reg &= 0x30070000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt372_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * Set up the channel for MWDMA or UDMA modes. Much the same as with + * PIO, load the mode number and then set MWDMA or UDMA flag. + */ + +static void hpt372_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + fast &= ~0x07; + pci_write_config_byte(pdev, addr2, fast); + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt37x_find_mode(ap, adev->dma_mode); + printk("Find mode for DMA %d reports %X\n", adev->dma_mode, mode); + mode &= ~0xC0000000; /* Leave config bits alone */ + mode |= 0x80000000; /* FIFO in MWDMA or UDMA */ + reg &= 0xC0000000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt37x_bmdma_end - DMA engine stop + * @qc: ATA command + * + * Clean up after the HPT372 and later DMA engine + */ + +static void hpt37x_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int mscreg = 0x50 + 2 * ap->port_no; + u8 bwsr_stat, msc_stat; + + pci_read_config_byte(pdev, 0x6A, &bwsr_stat); + pci_read_config_byte(pdev, mscreg, &msc_stat); + if (bwsr_stat & (1 << ap->port_no)) + pci_write_config_byte(pdev, mscreg, msc_stat | 0x30); + ata_bmdma_stop(qc); +} + + +static struct scsi_host_template hpt37x_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +/* + * Configuration for HPT370 + */ + +static struct ata_port_operations hpt370_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt370_set_piomode, + .set_dmamode = hpt370_set_dmamode, + .mode_filter = hpt370_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt37x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = hpt370_bmdma_start, + .bmdma_stop = hpt370_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Configuration for HPT370A. Close to 370 but less filters + */ + +static struct ata_port_operations hpt370a_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt370_set_piomode, + .set_dmamode = hpt370_set_dmamode, + .mode_filter = hpt370a_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt37x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = hpt370_bmdma_start, + .bmdma_stop = hpt370_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Configuration for HPT372, HPT371, HPT302. Slightly different PIO + * and DMA mode setting functionality. + */ + +static struct ata_port_operations hpt372_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt372_set_piomode, + .set_dmamode = hpt372_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt37x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = hpt37x_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Configuration for HPT374. Mode setting works like 372 and friends + * but we have a different cable detection procedure. + */ + +static struct ata_port_operations hpt374_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt372_set_piomode, + .set_dmamode = hpt372_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt374_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = hpt37x_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * htp37x_clock_slot - Turn timing to PC clock entry + * @freq: Reported frequency timing + * @base: Base timing + * + * Turn the timing data intoa clock slot (0 for 33, 1 for 40, 2 for 50 + * and 3 for 66Mhz) + */ + +static int hpt37x_clock_slot(unsigned int freq, unsigned int base) +{ + unsigned int f = (base * freq) / 192; /* Mhz */ + if (f < 40) + return 0; /* 33Mhz slot */ + if (f < 45) + return 1; /* 40Mhz slot */ + if (f < 55) + return 2; /* 50Mhz slot */ + return 3; /* 60Mhz slot */ +} + +/** + * hpt37x_calibrate_dpll - Calibrate the DPLL loop + * @dev: PCI device + * + * Perform a calibration cycle on the HPT37x DPLL. Returns 1 if this + * succeeds + */ + +static int hpt37x_calibrate_dpll(struct pci_dev *dev) +{ + u8 reg5b; + u32 reg5c; + int tries; + + for(tries = 0; tries < 0x5000; tries++) { + udelay(50); + pci_read_config_byte(dev, 0x5b, ®5b); + if (reg5b & 0x80) { + /* See if it stays set */ + for(tries = 0; tries < 0x1000; tries ++) { + pci_read_config_byte(dev, 0x5b, ®5b); + /* Failed ? */ + if ((reg5b & 0x80) == 0) + return 0; + } + /* Turn off tuning, we have the DPLL set */ + pci_read_config_dword(dev, 0x5c, ®5c); + pci_write_config_dword(dev, 0x5c, reg5c & ~ 0x100); + return 1; + } + } + /* Never went stable */ + return 0; +} +/** + * hpt37x_init_one - Initialise an HPT37X/302 + * @dev: PCI device + * @id: Entry in match table + * + * Initialise an HPT37x device. There are some interesting complications + * here. Firstly the chip may report 366 and be one of several variants. + * Secondly all the timings depend on the clock for the chip which we must + * detect and look up + * + * This is the known chip mappings. It may be missing a couple of later + * releases. + * + * Chip version PCI Rev Notes + * HPT366 4 (HPT366) 0 Other driver + * HPT366 4 (HPT366) 1 Other driver + * HPT368 4 (HPT366) 2 Other driver + * HPT370 4 (HPT366) 3 UDMA100 + * HPT370A 4 (HPT366) 4 UDMA100 + * HPT372 4 (HPT366) 5 UDMA133 (1) + * HPT372N 4 (HPT366) 6 Other driver + * HPT372A 5 (HPT372) 1 UDMA133 (1) + * HPT372N 5 (HPT372) 2 Other driver + * HPT302 6 (HPT302) 1 UDMA133 + * HPT302N 6 (HPT302) 2 Other driver + * HPT371 7 (HPT371) * UDMA133 + * HPT374 8 (HPT374) * UDMA133 4 channel + * HPT372N 9 (HPT372N) * Other driver + * + * (1) UDMA133 support depends on the bus clock + */ + +static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + /* HPT370 - UDMA100 */ + static struct ata_port_info info_hpt370 = { + .sht = &hpt37x_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &hpt370_port_ops + }; + /* HPT370A - UDMA100 */ + static struct ata_port_info info_hpt370a = { + .sht = &hpt37x_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &hpt370a_port_ops + }; + /* HPT371, 372 and friends - UDMA133 */ + static struct ata_port_info info_hpt372 = { + .sht = &hpt37x_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &hpt372_port_ops + }; + /* HPT371, 372 and friends - UDMA100 at 50MHz clock */ + static struct ata_port_info info_hpt372_50 = { + .sht = &hpt37x_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &hpt372_port_ops + }; + /* HPT374 - UDMA133 */ + static struct ata_port_info info_hpt374 = { + .sht = &hpt37x_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &hpt374_port_ops + }; + + static const int MHz[4] = { 33, 40, 50, 66 }; + + struct ata_port_info *port_info[2]; + struct ata_port_info *port; + + u8 irqmask; + u32 class_rev; + u32 freq; + + const struct hpt_chip *chip_table; + int clock_slot; + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xFF; + + if (dev->device == PCI_DEVICE_ID_TTI_HPT366) { + /* May be a later chip in disguise. Check */ + /* Older chips are in the HPT366 driver. Ignore them */ + if (class_rev < 3) + return -ENODEV; + /* N series chips have their own driver. Ignore */ + if (class_rev == 6) + return -ENODEV; + + switch(class_rev) { + case 3: + port = &info_hpt370; + chip_table = &hpt370; + break; + case 4: + port = &info_hpt370a; + chip_table = &hpt370a; + break; + case 5: + port = &info_hpt372; + chip_table = &hpt372; + break; + default: + printk(KERN_ERR "pata_hpt37x: Unknown HPT366 subtype please report (%d).\n", class_rev); + return -ENODEV; + } + } else { + switch(dev->device) { + case PCI_DEVICE_ID_TTI_HPT372: + /* 372N if rev >= 2*/ + if (class_rev >= 2) + return -ENODEV; + port = &info_hpt372; + chip_table = &hpt372a; + break; + case PCI_DEVICE_ID_TTI_HPT302: + /* 302N if rev > 1 */ + if (class_rev > 1) + return -ENODEV; + port = &info_hpt372; + /* Check this */ + chip_table = &hpt302; + break; + case PCI_DEVICE_ID_TTI_HPT371: + port = &info_hpt372; + chip_table = &hpt371; + break; + case PCI_DEVICE_ID_TTI_HPT374: + chip_table = &hpt374; + port = &info_hpt374; + break; + default: + printk(KERN_ERR "pata_hpt37x: PCI table is bogus please report (%d).\n", dev->device); + return -ENODEV; + } + } + /* Ok so this is a chip we support */ + + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4)); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78); + pci_write_config_byte(dev, PCI_MIN_GNT, 0x08); + pci_write_config_byte(dev, PCI_MAX_LAT, 0x08); + + pci_read_config_byte(dev, 0x5A, &irqmask); + irqmask &= ~0x10; + pci_write_config_byte(dev, 0x5a, irqmask); + + /* + * default to pci clock. make sure MA15/16 are set to output + * to prevent drives having problems with 40-pin cables. Needed + * for some drives such as IBM-DTLA which will not enter ready + * state on reset when PDIAG is a input. + */ + + pci_write_config_byte(dev, 0x5b, 0x23); + + pci_read_config_dword(dev, 0x70, &freq); + if ((freq >> 12) != 0xABCDE) { + int i; + u8 sr; + u32 total = 0; + + printk(KERN_WARNING "pata_hpt37x: BIOS has not set timing clocks.\n"); + + /* This is the process the HPT371 BIOS is reported to use */ + for(i = 0; i < 128; i++) { + pci_read_config_byte(dev, 0x78, &sr); + total += sr; + udelay(15); + } + freq = total / 128; + } + freq &= 0x1FF; + + /* + * Turn the frequency check into a band and then find a timing + * table to match it. + */ + + clock_slot = hpt37x_clock_slot(freq, chip_table->base); + if (chip_table->clocks[clock_slot] == NULL) { + /* + * We need to try PLL mode instead + */ + unsigned int f_low = (MHz[clock_slot] * chip_table->base) / 192; + unsigned int f_high = f_low + 2; + int adjust; + + for(adjust = 0; adjust < 8; adjust++) { + if (hpt37x_calibrate_dpll(dev)) + break; + /* See if it'll settle at a fractionally different clock */ + if ((adjust & 3) == 3) { + f_low --; + f_high ++; + } + pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low); + } + if (adjust == 8) { + printk(KERN_WARNING "hpt37x: DPLL did not stabilize.\n"); + return -ENODEV; + } + /* Check if this works for all cases */ + port->private_data = (void *)hpt370_timings_66; + + printk(KERN_INFO "hpt37x: Bus clock %dMHz, using DPLL.\n", MHz[clock_slot]); + } else { + port->private_data = (void *)chip_table->clocks[clock_slot]; + /* + * Perform a final fixup. The 371 and 372 clock determines + * if UDMA133 is available. + */ + + if (clock_slot == 2 && chip_table == &hpt372) { /* 50Mhz */ + printk(KERN_WARNING "pata_hpt37x: No UDMA133 support available with 50MHz bus clock.\n"); + if (port == &info_hpt372) + port = &info_hpt372_50; + else BUG(); + } + printk(KERN_INFO "hpt37x: %s: Bus clock %dMHz.\n", chip_table->name, MHz[clock_slot]); + } + port_info[0] = port_info[1] = port; + /* Now kick off ATA set up */ + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id hpt37x[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT366), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT371), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT372), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT374), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT302), }, + { 0, }, +}; + +static struct pci_driver hpt37x_pci_driver = { + .name = DRV_NAME, + .id_table = hpt37x, + .probe = hpt37x_init_one, + .remove = ata_pci_remove_one +}; + +static int __init hpt37x_init(void) +{ + return pci_register_driver(&hpt37x_pci_driver); +} + + +static void __exit hpt37x_exit(void) +{ + pci_unregister_driver(&hpt37x_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the Highpoint HPT37x/30x"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, hpt37x); +MODULE_VERSION(DRV_VERSION); + +module_init(hpt37x_init); +module_exit(hpt37x_exit); diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c new file mode 100644 index 000000000000..40fcda62c7a2 --- /dev/null +++ b/drivers/ata/pata_hpt3x2n.c @@ -0,0 +1,597 @@ +/* + * Libata driver for the highpoint 372N and 302N UDMA66 ATA controllers. + * + * This driver is heavily based upon: + * + * linux/drivers/ide/pci/hpt366.c Version 0.36 April 25, 2003 + * + * Copyright (C) 1999-2003 Andre Hedrick + * Portions Copyright (C) 2001 Sun Microsystems, Inc. + * Portions Copyright (C) 2003 Red Hat Inc + * + * + * TODO + * 371N + * Work out best PLL policy + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_hpt3x2n" +#define DRV_VERSION "0.3" + +enum { + HPT_PCI_FAST = (1 << 31), + PCI66 = (1 << 1), + USE_DPLL = (1 << 0) +}; + +struct hpt_clock { + u8 xfer_speed; + u32 timing; +}; + +struct hpt_chip { + const char *name; + struct hpt_clock *clocks[3]; +}; + +/* key for bus clock timings + * bit + * 0:3 data_high_time. inactive time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 4:8 data_low_time. active time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 9:12 cmd_high_time. inactive time of DIOW_/DIOR_ during task file + * register access. + * 13:17 cmd_low_time. active time of DIOW_/DIOR_ during task file + * register access. + * 18:21 udma_cycle_time. clock freq and clock cycles for UDMA xfer. + * during task file register access. + * 22:24 pre_high_time. time to initialize 1st cycle for PIO and MW DMA + * xfer. + * 25:27 cmd_pre_high_time. time to initialize 1st PIO cycle for task + * register access. + * 28 UDMA enable + * 29 DMA enable + * 30 PIO_MST enable. if set, the chip is in bus master mode during + * PIO. + * 31 FIFO enable. + */ + +/* 66MHz DPLL clocks */ + +static struct hpt_clock hpt3x2n_clocks[] = { + { XFER_UDMA_7, 0x1c869c62 }, + { XFER_UDMA_6, 0x1c869c62 }, + { XFER_UDMA_5, 0x1c8a9c62 }, + { XFER_UDMA_4, 0x1c8a9c62 }, + { XFER_UDMA_3, 0x1c8e9c62 }, + { XFER_UDMA_2, 0x1c929c62 }, + { XFER_UDMA_1, 0x1c9a9c62 }, + { XFER_UDMA_0, 0x1c829c62 }, + + { XFER_MW_DMA_2, 0x2c829c62 }, + { XFER_MW_DMA_1, 0x2c829c66 }, + { XFER_MW_DMA_0, 0x2c829d2c }, + + { XFER_PIO_4, 0x0c829c62 }, + { XFER_PIO_3, 0x0c829c84 }, + { XFER_PIO_2, 0x0c829ca6 }, + { XFER_PIO_1, 0x0d029d26 }, + { XFER_PIO_0, 0x0d029d5e }, + { 0, 0x0d029d5e } +}; + +/** + * hpt3x2n_find_mode - reset the hpt3x2n bus + * @ap: ATA port + * @speed: transfer mode + * + * Return the 32bit register programming information for this channel + * that matches the speed provided. For the moment the clocks table + * is hard coded but easy to change. This will be needed if we use + * different DPLLs + */ + +static u32 hpt3x2n_find_mode(struct ata_port *ap, int speed) +{ + struct hpt_clock *clocks = hpt3x2n_clocks; + + while(clocks->xfer_speed) { + if (clocks->xfer_speed == speed) + return clocks->timing; + clocks++; + } + BUG(); + return 0xffffffffU; /* silence compiler warning */ +} + +/** + * hpt3x2n_pre_reset - reset the hpt3x2n bus + * @ap: ATA port to reset + * + * Perform the initial reset handling for the 3x2n series controllers. + * Reset the hardware and state machine, obtain the cable type. + */ + +static int hpt3xn_pre_reset(struct ata_port *ap) +{ + u8 scr2, ata66; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_byte(pdev, 0x5B, &scr2); + pci_write_config_byte(pdev, 0x5B, scr2 & ~0x01); + /* Cable register now active */ + pci_read_config_byte(pdev, 0x5A, &ata66); + /* Restore state */ + pci_write_config_byte(pdev, 0x5B, scr2); + + if (ata66 & (1 << ap->port_no)) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + + /* Reset the state machine */ + pci_write_config_byte(pdev, 0x50, 0x37); + pci_write_config_byte(pdev, 0x54, 0x37); + udelay(100); + + return ata_std_prereset(ap); +} + +/** + * hpt3x2n_error_handler - probe the hpt3x2n bus + * @ap: ATA port to reset + * + * Perform the probe reset handling for the 3x2N + */ + +static void hpt3x2n_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, hpt3xn_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * hpt3x2n_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Perform PIO mode setup. + */ + +static void hpt3x2n_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + fast &= ~0x07; + pci_write_config_byte(pdev, addr2, fast); + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt3x2n_find_mode(ap, adev->pio_mode); + mode &= ~0x8000000; /* No FIFO in PIO */ + mode &= ~0x30070000; /* Leave config bits alone */ + reg &= 0x30070000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt3x2n_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * Set up the channel for MWDMA or UDMA modes. Much the same as with + * PIO, load the mode number and then set MWDMA or UDMA flag. + */ + +static void hpt3x2n_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 addr1, addr2; + u32 reg; + u32 mode; + u8 fast; + + addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); + addr2 = 0x51 + 4 * ap->port_no; + + /* Fast interrupt prediction disable, hold off interrupt disable */ + pci_read_config_byte(pdev, addr2, &fast); + fast &= ~0x07; + pci_write_config_byte(pdev, addr2, fast); + + pci_read_config_dword(pdev, addr1, ®); + mode = hpt3x2n_find_mode(ap, adev->dma_mode); + mode |= 0x8000000; /* FIFO in MWDMA or UDMA */ + mode &= ~0xC0000000; /* Leave config bits alone */ + reg &= 0xC0000000; /* Strip timing bits */ + pci_write_config_dword(pdev, addr1, reg | mode); +} + +/** + * hpt3x2n_bmdma_end - DMA engine stop + * @qc: ATA command + * + * Clean up after the HPT3x2n and later DMA engine + */ + +static void hpt3x2n_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int mscreg = 0x50 + 2 * ap->port_no; + u8 bwsr_stat, msc_stat; + + pci_read_config_byte(pdev, 0x6A, &bwsr_stat); + pci_read_config_byte(pdev, mscreg, &msc_stat); + if (bwsr_stat & (1 << ap->port_no)) + pci_write_config_byte(pdev, mscreg, msc_stat | 0x30); + ata_bmdma_stop(qc); +} + +/** + * hpt3x2n_set_clock - clock control + * @ap: ATA port + * @source: 0x21 or 0x23 for PLL or PCI sourced clock + * + * Switch the ATA bus clock between the PLL and PCI clock sources + * while correctly isolating the bus and resetting internal logic + * + * We must use the DPLL for + * - writing + * - second channel UDMA7 (SATA ports) or higher + * - 66MHz PCI + * + * or we will underclock the device and get reduced performance. + */ + +static void hpt3x2n_set_clock(struct ata_port *ap, int source) +{ + unsigned long bmdma = ap->ioaddr.bmdma_addr; + + /* Tristate the bus */ + outb(0x80, bmdma+0x73); + outb(0x80, bmdma+0x77); + + /* Switch clock and reset channels */ + outb(source, bmdma+0x7B); + outb(0xC0, bmdma+0x79); + + /* Reset state machines */ + outb(0x37, bmdma+0x70); + outb(0x37, bmdma+0x74); + + /* Complete reset */ + outb(0x00, bmdma+0x79); + + /* Reconnect channels to bus */ + outb(0x00, bmdma+0x73); + outb(0x00, bmdma+0x77); +} + +/* Check if our partner interface is busy */ + +static int hpt3x2n_pair_idle(struct ata_port *ap) +{ + struct ata_host *host = ap->host; + struct ata_port *pair = host->ports[ap->port_no ^ 1]; + + if (pair->hsm_task_state == HSM_ST_IDLE) + return 1; + return 0; +} + +static int hpt3x2n_use_dpll(struct ata_port *ap, int reading) +{ + long flags = (long)ap->host->private_data; + /* See if we should use the DPLL */ + if (reading == 0) + return USE_DPLL; /* Needed for write */ + if (flags & PCI66) + return USE_DPLL; /* Needed at 66Mhz */ + return 0; +} + +static unsigned int hpt3x2n_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_taskfile *tf = &qc->tf; + struct ata_port *ap = qc->ap; + int flags = (long)ap->host->private_data; + + if (hpt3x2n_pair_idle(ap)) { + int dpll = hpt3x2n_use_dpll(ap, (tf->flags & ATA_TFLAG_WRITE)); + if ((flags & USE_DPLL) != dpll) { + if (dpll == 1) + hpt3x2n_set_clock(ap, 0x21); + else + hpt3x2n_set_clock(ap, 0x23); + } + } + return ata_qc_issue_prot(qc); +} + +static struct scsi_host_template hpt3x2n_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +/* + * Configuration for HPT3x2n. + */ + +static struct ata_port_operations hpt3x2n_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt3x2n_set_piomode, + .set_dmamode = hpt3x2n_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt3x2n_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = hpt3x2n_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = hpt3x2n_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * hpt3xn_calibrate_dpll - Calibrate the DPLL loop + * @dev: PCI device + * + * Perform a calibration cycle on the HPT3xN DPLL. Returns 1 if this + * succeeds + */ + +static int hpt3xn_calibrate_dpll(struct pci_dev *dev) +{ + u8 reg5b; + u32 reg5c; + int tries; + + for(tries = 0; tries < 0x5000; tries++) { + udelay(50); + pci_read_config_byte(dev, 0x5b, ®5b); + if (reg5b & 0x80) { + /* See if it stays set */ + for(tries = 0; tries < 0x1000; tries ++) { + pci_read_config_byte(dev, 0x5b, ®5b); + /* Failed ? */ + if ((reg5b & 0x80) == 0) + return 0; + } + /* Turn off tuning, we have the DPLL set */ + pci_read_config_dword(dev, 0x5c, ®5c); + pci_write_config_dword(dev, 0x5c, reg5c & ~ 0x100); + return 1; + } + } + /* Never went stable */ + return 0; +} + +static int hpt3x2n_pci_clock(struct pci_dev *pdev) +{ + unsigned long freq; + u32 fcnt; + + pci_read_config_dword(pdev, 0x70/*CHECKME*/, &fcnt); + if ((fcnt >> 12) != 0xABCDE) { + printk(KERN_WARNING "hpt3xn: BIOS clock data not set.\n"); + return 33; /* Not BIOS set */ + } + fcnt &= 0x1FF; + + freq = (fcnt * 77) / 192; + + /* Clamp to bands */ + if (freq < 40) + return 33; + if (freq < 45) + return 40; + if (freq < 55) + return 50; + return 66; +} + +/** + * hpt3x2n_init_one - Initialise an HPT37X/302 + * @dev: PCI device + * @id: Entry in match table + * + * Initialise an HPT3x2n device. There are some interesting complications + * here. Firstly the chip may report 366 and be one of several variants. + * Secondly all the timings depend on the clock for the chip which we must + * detect and look up + * + * This is the known chip mappings. It may be missing a couple of later + * releases. + * + * Chip version PCI Rev Notes + * HPT372 4 (HPT366) 5 Other driver + * HPT372N 4 (HPT366) 6 UDMA133 + * HPT372 5 (HPT372) 1 Other driver + * HPT372N 5 (HPT372) 2 UDMA133 + * HPT302 6 (HPT302) * Other driver + * HPT302N 6 (HPT302) > 1 UDMA133 + * HPT371 7 (HPT371) * Other driver + * HPT371N 7 (HPT371) > 1 UDMA133 + * HPT374 8 (HPT374) * Other driver + * HPT372N 9 (HPT372N) * UDMA133 + * + * (1) UDMA133 support depends on the bus clock + * + * To pin down HPT371N + */ + +static int hpt3x2n_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + /* HPT372N and friends - UDMA133 */ + static struct ata_port_info info = { + .sht = &hpt3x2n_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &hpt3x2n_port_ops + }; + struct ata_port_info *port_info[2]; + struct ata_port_info *port = &info; + + u8 irqmask; + u32 class_rev; + + unsigned int pci_mhz; + unsigned int f_low, f_high; + int adjust; + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xFF; + + switch(dev->device) { + case PCI_DEVICE_ID_TTI_HPT366: + if (class_rev < 6) + return -ENODEV; + break; + case PCI_DEVICE_ID_TTI_HPT372: + /* 372N if rev >= 1*/ + if (class_rev == 0) + return -ENODEV; + break; + case PCI_DEVICE_ID_TTI_HPT302: + if (class_rev < 2) + return -ENODEV; + break; + case PCI_DEVICE_ID_TTI_HPT372N: + break; + default: + printk(KERN_ERR "pata_hpt3x2n: PCI table is bogus please report (%d).\n", dev->device); + return -ENODEV; + } + + /* Ok so this is a chip we support */ + + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4)); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78); + pci_write_config_byte(dev, PCI_MIN_GNT, 0x08); + pci_write_config_byte(dev, PCI_MAX_LAT, 0x08); + + pci_read_config_byte(dev, 0x5A, &irqmask); + irqmask &= ~0x10; + pci_write_config_byte(dev, 0x5a, irqmask); + + /* Tune the PLL. HPT recommend using 75 for SATA, 66 for UDMA133 or + 50 for UDMA100. Right now we always use 66 */ + + pci_mhz = hpt3x2n_pci_clock(dev); + + f_low = (pci_mhz * 48) / 66; /* PCI Mhz for 66Mhz DPLL */ + f_high = f_low + 2; /* Tolerance */ + + pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low | 0x100); + /* PLL clock */ + pci_write_config_byte(dev, 0x5B, 0x21); + + /* Unlike the 37x we don't try jiggling the frequency */ + for(adjust = 0; adjust < 8; adjust++) { + if (hpt3xn_calibrate_dpll(dev)) + break; + pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low); + } + if (adjust == 8) + printk(KERN_WARNING "hpt3xn: DPLL did not stabilize.\n"); + + /* Set our private data up. We only need a few flags so we use + it directly */ + port->private_data = NULL; + if (pci_mhz > 60) + port->private_data = (void *)PCI66; + + /* Now kick off ATA set up */ + port_info[0] = port_info[1] = port; + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id hpt3x2n[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT366), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT372), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT302), }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT372N), }, + { 0, }, +}; + +static struct pci_driver hpt3x2n_pci_driver = { + .name = DRV_NAME, + .id_table = hpt3x2n, + .probe = hpt3x2n_init_one, + .remove = ata_pci_remove_one +}; + +static int __init hpt3x2n_init(void) +{ + return pci_register_driver(&hpt3x2n_pci_driver); +} + + +static void __exit hpt3x2n_exit(void) +{ + pci_unregister_driver(&hpt3x2n_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the Highpoint HPT3x2n/30x"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, hpt3x2n); +MODULE_VERSION(DRV_VERSION); + +module_init(hpt3x2n_init); +module_exit(hpt3x2n_exit); diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c new file mode 100644 index 000000000000..c93406ebf6ed --- /dev/null +++ b/drivers/ata/pata_hpt3x3.c @@ -0,0 +1,226 @@ +/* + * pata_hpt3x3 - HPT3x3 driver + * (c) Copyright 2005-2006 Red Hat + * + * Was pata_hpt34x but the naming was confusing as it supported the + * 343 and 363 so it has been renamed. + * + * Based on: + * linux/drivers/ide/pci/hpt34x.c Version 0.40 Sept 10, 2002 + * Copyright (C) 1998-2000 Andre Hedrick + * + * May be copied or modified under the terms of the GNU General Public + * License + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_hpt3x3" +#define DRV_VERSION "0.4.1" + +static int hpt3x3_probe_init(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * hpt3x3_probe_reset - reset the hpt3x3 bus + * @ap: ATA port to reset + * + * Perform the housekeeping when doing an ATA bus reeset. We just + * need to force the cable type. + */ + +static void hpt3x3_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, hpt3x3_probe_init, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * hpt3x3_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Set our PIO requirements. This is fairly simple on the HPT3x3 as + * all we have to do is clear the MWDMA and UDMA bits then load the + * mode number. + */ + +static void hpt3x3_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 r1, r2; + int dn = 2 * ap->port_no + adev->devno; + + pci_read_config_dword(pdev, 0x44, &r1); + pci_read_config_dword(pdev, 0x48, &r2); + /* Load the PIO timing number */ + r1 &= ~(7 << (3 * dn)); + r1 |= (adev->pio_mode - XFER_PIO_0) << (3 * dn); + r2 &= ~(0x11 << dn); /* Clear MWDMA and UDMA bits */ + + pci_write_config_dword(pdev, 0x44, r1); + pci_write_config_dword(pdev, 0x48, r2); +} + +/** + * hpt3x3_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * Set up the channel for MWDMA or UDMA modes. Much the same as with + * PIO, load the mode number and then set MWDMA or UDMA flag. + */ + +static void hpt3x3_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 r1, r2; + int dn = 2 * ap->port_no + adev->devno; + int mode_num = adev->dma_mode & 0x0F; + + pci_read_config_dword(pdev, 0x44, &r1); + pci_read_config_dword(pdev, 0x48, &r2); + /* Load the timing number */ + r1 &= ~(7 << (3 * dn)); + r1 |= (mode_num << (3 * dn)); + r2 &= ~(0x11 << dn); /* Clear MWDMA and UDMA bits */ + + if (adev->dma_mode >= XFER_UDMA_0) + r2 |= 0x01 << dn; /* Ultra mode */ + else + r2 |= 0x10 << dn; /* MWDMA */ + + pci_write_config_dword(pdev, 0x44, r1); + pci_write_config_dword(pdev, 0x48, r2); +} + +static struct scsi_host_template hpt3x3_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations hpt3x3_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = hpt3x3_set_piomode, + .set_dmamode = hpt3x3_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = hpt3x3_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * hpt3x3_init_one - Initialise an HPT343/363 + * @dev: PCI device + * @id: Entry in match table + * + * Perform basic initialisation. The chip has a quirk that it won't + * function unless it is at XX00. The old ATA driver touched this up + * but we leave it for pci quirks to do properly. + */ + +static int hpt3x3_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &hpt3x3_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x07, + .port_ops = &hpt3x3_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + u16 cmd; + + /* Initialize the board */ + pci_write_config_word(dev, 0x80, 0x00); + /* Check if it is a 343 or a 363. 363 has COMMAND_MEMORY set */ + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (cmd & PCI_COMMAND_MEMORY) + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF0); + else + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20); + + /* Now kick off ATA set up */ + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id hpt3x3[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT343), }, + { 0, }, +}; + +static struct pci_driver hpt3x3_pci_driver = { + .name = DRV_NAME, + .id_table = hpt3x3, + .probe = hpt3x3_init_one, + .remove = ata_pci_remove_one +}; + +static int __init hpt3x3_init(void) +{ + return pci_register_driver(&hpt3x3_pci_driver); +} + + +static void __exit hpt3x3_exit(void) +{ + pci_unregister_driver(&hpt3x3_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the Highpoint HPT343/363"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, hpt3x3); +MODULE_VERSION(DRV_VERSION); + +module_init(hpt3x3_init); +module_exit(hpt3x3_exit); diff --git a/drivers/ata/pata_isapnp.c b/drivers/ata/pata_isapnp.c new file mode 100644 index 000000000000..73948c8b7270 --- /dev/null +++ b/drivers/ata/pata_isapnp.c @@ -0,0 +1,156 @@ + +/* + * pata-isapnp.c - ISA PnP PATA controller driver. + * Copyright 2005/2006 Red Hat Inc , all rights reserved. + * + * Based in part on ide-pnp.c by Andrey Panin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_isapnp" +#define DRV_VERSION "0.1.5" + +static struct scsi_host_template isapnp_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations isapnp_port_ops = { + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ata_bmdma_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * isapnp_init_one - attach an isapnp interface + * @idev: PnP device + * @dev_id: matching detect line + * + * Register an ISA bus IDE interface. Such interfaces are PIO 0 and + * non shared IRQ. + */ + +static int isapnp_init_one(struct pnp_dev *idev, const struct pnp_device_id *dev_id) +{ + struct ata_probe_ent ae; + + if (pnp_port_valid(idev, 0) == 0) + return -ENODEV; + + /* FIXME: Should selected polled PIO here not fail */ + if (pnp_irq_valid(idev, 0) == 0) + return -ENODEV; + + memset(&ae, 0, sizeof(struct ata_probe_ent)); + INIT_LIST_HEAD(&ae.node); + ae.dev = &idev->dev; + ae.port_ops = &isapnp_port_ops; + ae.sht = &isapnp_sht; + ae.n_ports = 1; + ae.pio_mask = 1; /* ISA so PIO 0 cycles */ + ae.irq = pnp_irq(idev, 0); + ae.irq_flags = 0; + ae.port_flags = ATA_FLAG_SLAVE_POSS; + ae.port[0].cmd_addr = pnp_port_start(idev, 0); + + if (pnp_port_valid(idev, 1) == 0) { + ae.port[0].altstatus_addr = pnp_port_start(idev, 1); + ae.port[0].ctl_addr = pnp_port_start(idev, 1); + ae.port_flags |= ATA_FLAG_SRST; + } + ata_std_ports(&ae.port[0]); + + if (ata_device_add(&ae) == 0) + return -ENODEV; + return 0; +} + +/** + * isapnp_remove_one - unplug an isapnp interface + * @idev: PnP device + * + * Remove a previously configured PnP ATA port. Called only on module + * unload events as the core does not currently deal with ISAPnP docking. + */ + +static void isapnp_remove_one(struct pnp_dev *idev) +{ + struct device *dev = &idev->dev; + struct ata_host *host = dev_get_drvdata(dev); + + ata_host_remove(host); + dev_set_drvdata(dev, NULL); +} + +static struct pnp_device_id isapnp_devices[] = { + /* Generic ESDI/IDE/ATA compatible hard disk controller */ + {.id = "PNP0600", .driver_data = 0}, + {.id = ""} +}; + +static struct pnp_driver isapnp_driver = { + .name = DRV_NAME, + .id_table = isapnp_devices, + .probe = isapnp_init_one, + .remove = isapnp_remove_one, +}; + +static int __init isapnp_init(void) +{ + return pnp_register_driver(&isapnp_driver); +} + +static void __exit isapnp_exit(void) +{ + pnp_unregister_driver(&isapnp_driver); +} + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for ISA PnP ATA"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +module_init(isapnp_init); +module_exit(isapnp_exit); diff --git a/drivers/ata/pata_it8172.c b/drivers/ata/pata_it8172.c new file mode 100644 index 000000000000..f8367191fc7b --- /dev/null +++ b/drivers/ata/pata_it8172.c @@ -0,0 +1,288 @@ +/* + * pata_it8172.c - IT8172 PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * Based heavily on + * + * BRIEF MODULE DESCRIPTION + * IT8172 IDE controller support + * + * Copyright 2000 MontaVista Software Inc. + * Author: MontaVista Software, Inc. + * stevel@mvista.com or source@mvista.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + * TODO + * Check for errata + * See if we really need to force native mode + * PIO timings (also lacking in original) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_it8172" +#define DRV_VERSION "0.3.1" + +static int it8172_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits it8172_enable_bits[] = { + { 0x00, 0, 0x00, 0x00 }, + { 0x40, 1, 0x00, 0x01 } + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (ap->port_no && !pci_test_config_bits(pdev, &it8172_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +static void it8172_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, it8172_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * it8172_set_pio_timing - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called by both the pio and dma setup functions to set the controller + * timings for PIO transfers. We must load both the mode number and + * timing values into the controller. + */ + +static void it8172_set_pio_timing(struct ata_port *ap, struct ata_device *adev, int pio) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u16 reg40; + + pci_read_config_word(pdev, 0x40, ®40); + + /* + * FIX! The DIOR/DIOW pulse width and recovery times in port 0x44 + * are being left at the default values of 8 PCI clocks (242 nsec + * for a 33 MHz clock). These can be safely shortened at higher + * PIO modes. The DIOR/DIOW pulse width and recovery times only + * apply to PIO modes, not to the DMA modes. + */ + + /* + * Enable port 0x44. The IT8172G spec is confused; it calls + * this register the "Slave IDE Timing Register", but in fact, + * it controls timing for both master and slave drives. + */ + + reg40 |= 0x4000; + if (adev->devno) { + reg40 &= 0xC006; + if (pio > 1) + /* Enable prefetch and IORDY sample-point */ + reg40 |= 0x0060; + } else { + reg40 &= 0xC060; + if (pio > 1) + /* Enable prefetch and IORDY sample-point */ + reg40 |= 0x0006; + } + /* Write back the enables */ + pci_write_config_word(pdev, 0x40, reg40); +} + +/** + * it8172_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. We use a shared helper for this + * as the DMA setup must also adjust the PIO timing information. + */ + +static void it8172_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + it8172_set_pio_timing(ap, adev, adev->pio_mode - XFER_PIO_0); +} + +/** + * it8172_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the DMA mode setup. We must tune an appropriate PIO + * mode to match. + */ + +static void it8172_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int dn = (2 * ap->port_no) + adev->devno; + u8 reg48, reg4a; + int pio; + + static const int pio_map[] = { 1, 3, 4}; + /* + * Setting the DMA cycle time to 2 or 3 PCI clocks (60 and 91 nsec + * at 33 MHz PCI clock) seems to cause BadCRC errors during DMA + * transfers on some drives, even though both numbers meet the minimum + * ATAPI-4 spec of 73 and 54 nsec for UDMA 1 and 2 respectively. + * So the faster times are just commented out here. The good news is + * that the slower cycle time has very little affect on transfer + * performance. + */ + + pci_read_config_byte(pdev, 0x48, ®48); + pci_read_config_byte(pdev, 0x4A, ®4a); + + reg4a &= ~(3 << (4 * dn)); + + if (adev->dma_mode >= XFER_UDMA_0) { + reg48 |= 1 << dn; +#ifdef UDMA_TIMING_SET + reg4a |= ((adev->dma_mode - XFER_UDMA_0) << (4 * dn)); +#endif + pio = 4; + } else { + pio = pio_map[adev->dma_mode - XFER_MW_DMA_0]; + reg48 &= ~ (1 << dn); + } + pci_write_config_byte(pdev, 0x48, reg48); + pci_write_config_byte(pdev, 0x4A, reg4a); + it8172_set_pio_timing(ap, adev, pio); + +} + +static struct scsi_host_template it8172_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations it8172_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = it8172_set_piomode, + .set_dmamode = it8172_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = it8172_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int it8172_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &it8172_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x06, /* No MWDMA0 support */ + .udma_mask = 0x7, + .port_ops = &it8172_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + if ((!(PCI_FUNC(dev->devfn) & 1) || + (!((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))) + return -ENODEV; /* IT8172 is more than an IDE controller */ + + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id it8172[] = { + { PCI_DEVICE(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_IT8172G), }, + { 0, }, +}; + +static struct pci_driver it8172_pci_driver = { + .name = DRV_NAME, + .id_table = it8172, + .probe = it8172_init_one, + .remove = ata_pci_remove_one +}; + +static int __init it8172_init(void) +{ + return pci_register_driver(&it8172_pci_driver); +} + + +static void __exit it8172_exit(void) +{ + pci_unregister_driver(&it8172_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for ITE IT8172"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, it8172); +MODULE_VERSION(DRV_VERSION); + +module_init(it8172_init); +module_exit(it8172_exit); diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c new file mode 100644 index 000000000000..c8a7798f8ce5 --- /dev/null +++ b/drivers/ata/pata_it821x.c @@ -0,0 +1,847 @@ +/* + * ata-it821x.c - IT821x PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * based upon + * + * it821x.c + * + * linux/drivers/ide/pci/it821x.c Version 0.09 December 2004 + * + * Copyright (C) 2004 Red Hat + * + * May be copied or modified under the terms of the GNU General Public License + * Based in part on the ITE vendor provided SCSI driver. + * + * Documentation available from + * http://www.ite.com.tw/pc/IT8212F_V04.pdf + * Some other documents are NDA. + * + * The ITE8212 isn't exactly a standard IDE controller. It has two + * modes. In pass through mode then it is an IDE controller. In its smart + * mode its actually quite a capable hardware raid controller disguised + * as an IDE controller. Smart mode only understands DMA read/write and + * identify, none of the fancier commands apply. The IT8211 is identical + * in other respects but lacks the raid mode. + * + * Errata: + * o Rev 0x10 also requires master/slave hold the same DMA timings and + * cannot do ATAPI MWDMA. + * o The identify data for raid volumes lacks CHS info (technically ok) + * but also fails to set the LBA28 and other bits. We fix these in + * the IDE probe quirk code. + * o If you write LBA48 sized I/O's (ie > 256 sector) in smart mode + * raid then the controller firmware dies + * o Smart mode without RAID doesn't clear all the necessary identify + * bits to reduce the command set to the one used + * + * This has a few impacts on the driver + * - In pass through mode we do all the work you would expect + * - In smart mode the clocking set up is done by the controller generally + * but we must watch the other limits and filter. + * - There are a few extra vendor commands that actually talk to the + * controller but only work PIO with no IRQ. + * + * Vendor areas of the identify block in smart mode are used for the + * timing and policy set up. Each HDD in raid mode also has a serial + * block on the disk. The hardware extra commands are get/set chip status, + * rebuild, get rebuild status. + * + * In Linux the driver supports pass through mode as if the device was + * just another IDE controller. If the smart mode is running then + * volumes are managed by the controller firmware and each IDE "disk" + * is a raid volume. Even more cute - the controller can do automated + * hotplug and rebuild. + * + * The pass through controller itself is a little demented. It has a + * flaw that it has a single set of PIO/MWDMA timings per channel so + * non UDMA devices restrict each others performance. It also has a + * single clock source per channel so mixed UDMA100/133 performance + * isn't perfect and we have to pick a clock. Thankfully none of this + * matters in smart mode. ATAPI DMA is not currently supported. + * + * It seems the smart mode is a win for RAID1/RAID10 but otherwise not. + * + * TODO + * - ATAPI and other speed filtering + * - Command filter in smart mode + * - RAID configuration ioctls + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define DRV_NAME "pata_it821x" +#define DRV_VERSION "0.3.2" + +struct it821x_dev +{ + unsigned int smart:1, /* Are we in smart raid mode */ + timing10:1; /* Rev 0x10 */ + u8 clock_mode; /* 0, ATA_50 or ATA_66 */ + u8 want[2][2]; /* Mode/Pri log for master slave */ + /* We need these for switching the clock when DMA goes on/off + The high byte is the 66Mhz timing */ + u16 pio[2]; /* Cached PIO values */ + u16 mwdma[2]; /* Cached MWDMA values */ + u16 udma[2]; /* Cached UDMA values (per drive) */ + u16 last_device; /* Master or slave loaded ? */ +}; + +#define ATA_66 0 +#define ATA_50 1 +#define ATA_ANY 2 + +#define UDMA_OFF 0 +#define MWDMA_OFF 0 + +/* + * We allow users to force the card into non raid mode without + * flashing the alternative BIOS. This is also neccessary right now + * for embedded platforms that cannot run a PC BIOS but are using this + * device. + */ + +static int it8212_noraid; + +/** + * it821x_pre_reset - probe + * @ap: ATA port + * + * Set the cable type + */ + +static int it821x_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +/** + * it821x_error_handler - probe/reset + * @ap: ATA port + * + * Set the cable type and trigger a probe + */ + +static void it821x_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, it821x_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * it821x_program - program the PIO/MWDMA registers + * @ap: ATA port + * @adev: Device to program + * @timing: Timing value (66Mhz in top 8bits, 50 in the low 8) + * + * Program the PIO/MWDMA timing for this channel according to the + * current clock. These share the same register so are managed by + * the DMA start/stop sequence as with the old driver. + */ + +static void it821x_program(struct ata_port *ap, struct ata_device *adev, u16 timing) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct it821x_dev *itdev = ap->private_data; + int channel = ap->port_no; + u8 conf; + + /* Program PIO/MWDMA timing bits */ + if (itdev->clock_mode == ATA_66) + conf = timing >> 8; + else + conf = timing & 0xFF; + pci_write_config_byte(pdev, 0x54 + 4 * channel, conf); +} + + +/** + * it821x_program_udma - program the UDMA registers + * @ap: ATA port + * @adev: ATA device to update + * @timing: Timing bits. Top 8 are for 66Mhz bottom for 50Mhz + * + * Program the UDMA timing for this drive according to the + * current clock. Handles the dual clocks and also knows about + * the errata on the 0x10 revision. The UDMA errata is partly handled + * here and partly in start_dma. + */ + +static void it821x_program_udma(struct ata_port *ap, struct ata_device *adev, u16 timing) +{ + struct it821x_dev *itdev = ap->private_data; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int channel = ap->port_no; + int unit = adev->devno; + u8 conf; + + /* Program UDMA timing bits */ + if (itdev->clock_mode == ATA_66) + conf = timing >> 8; + else + conf = timing & 0xFF; + if (itdev->timing10 == 0) + pci_write_config_byte(pdev, 0x56 + 4 * channel + unit, conf); + else { + /* Early revision must be programmed for both together */ + pci_write_config_byte(pdev, 0x56 + 4 * channel, conf); + pci_write_config_byte(pdev, 0x56 + 4 * channel + 1, conf); + } +} + +/** + * it821x_clock_strategy + * @ap: ATA interface + * @adev: ATA device being updated + * + * Select between the 50 and 66Mhz base clocks to get the best + * results for this interface. + */ + +static void it821x_clock_strategy(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct it821x_dev *itdev = ap->private_data; + u8 unit = adev->devno; + struct ata_device *pair = ata_dev_pair(adev); + + int clock, altclock; + u8 v; + int sel = 0; + + /* Look for the most wanted clocking */ + if (itdev->want[0][0] > itdev->want[1][0]) { + clock = itdev->want[0][1]; + altclock = itdev->want[1][1]; + } else { + clock = itdev->want[1][1]; + altclock = itdev->want[0][1]; + } + + /* Master doesn't care does the slave ? */ + if (clock == ATA_ANY) + clock = altclock; + + /* Nobody cares - keep the same clock */ + if (clock == ATA_ANY) + return; + /* No change */ + if (clock == itdev->clock_mode) + return; + + /* Load this into the controller */ + if (clock == ATA_66) + itdev->clock_mode = ATA_66; + else { + itdev->clock_mode = ATA_50; + sel = 1; + } + pci_read_config_byte(pdev, 0x50, &v); + v &= ~(1 << (1 + ap->port_no)); + v |= sel << (1 + ap->port_no); + pci_write_config_byte(pdev, 0x50, v); + + /* + * Reprogram the UDMA/PIO of the pair drive for the switch + * MWDMA will be dealt with by the dma switcher + */ + if (pair && itdev->udma[1-unit] != UDMA_OFF) { + it821x_program_udma(ap, pair, itdev->udma[1-unit]); + it821x_program(ap, pair, itdev->pio[1-unit]); + } + /* + * Reprogram the UDMA/PIO of our drive for the switch. + * MWDMA will be dealt with by the dma switcher + */ + if (itdev->udma[unit] != UDMA_OFF) { + it821x_program_udma(ap, adev, itdev->udma[unit]); + it821x_program(ap, adev, itdev->pio[unit]); + } +} + +/** + * it821x_passthru_set_piomode - set PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Configure for PIO mode. This is complicated as the register is + * shared by PIO and MWDMA and for both channels. + */ + +static void it821x_passthru_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + /* Spec says 89 ref driver uses 88 */ + static const u16 pio[] = { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 }; + static const u8 pio_want[] = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY }; + + struct it821x_dev *itdev = ap->private_data; + int unit = adev->devno; + int mode_wanted = adev->pio_mode - XFER_PIO_0; + + /* We prefer 66Mhz clock for PIO 0-3, don't care for PIO4 */ + itdev->want[unit][1] = pio_want[mode_wanted]; + itdev->want[unit][0] = 1; /* PIO is lowest priority */ + itdev->pio[unit] = pio[mode_wanted]; + it821x_clock_strategy(ap, adev); + it821x_program(ap, adev, itdev->pio[unit]); +} + +/** + * it821x_passthru_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Set up the DMA modes. The actions taken depend heavily on the mode + * to use. If UDMA is used as is hopefully the usual case then the + * timing register is private and we need only consider the clock. If + * we are using MWDMA then we have to manage the setting ourself as + * we switch devices and mode. + */ + +static void it821x_passthru_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const u16 dma[] = { 0x8866, 0x3222, 0x3121 }; + static const u8 mwdma_want[] = { ATA_ANY, ATA_66, ATA_ANY }; + static const u16 udma[] = { 0x4433, 0x4231, 0x3121, 0x2121, 0x1111, 0x2211, 0x1111 }; + static const u8 udma_want[] = { ATA_ANY, ATA_50, ATA_ANY, ATA_66, ATA_66, ATA_50, ATA_66 }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct it821x_dev *itdev = ap->private_data; + int channel = ap->port_no; + int unit = adev->devno; + u8 conf; + + if (adev->dma_mode >= XFER_UDMA_0) { + int mode_wanted = adev->dma_mode - XFER_UDMA_0; + + itdev->want[unit][1] = udma_want[mode_wanted]; + itdev->want[unit][0] = 3; /* UDMA is high priority */ + itdev->mwdma[unit] = MWDMA_OFF; + itdev->udma[unit] = udma[mode_wanted]; + if (mode_wanted >= 5) + itdev->udma[unit] |= 0x8080; /* UDMA 5/6 select on */ + + /* UDMA on. Again revision 0x10 must do the pair */ + pci_read_config_byte(pdev, 0x50, &conf); + if (itdev->timing10) + conf &= channel ? 0x9F: 0xE7; + else + conf &= ~ (1 << (3 + 2 * channel + unit)); + pci_write_config_byte(pdev, 0x50, conf); + it821x_clock_strategy(ap, adev); + it821x_program_udma(ap, adev, itdev->udma[unit]); + } else { + int mode_wanted = adev->dma_mode - XFER_MW_DMA_0; + + itdev->want[unit][1] = mwdma_want[mode_wanted]; + itdev->want[unit][0] = 2; /* MWDMA is low priority */ + itdev->mwdma[unit] = dma[mode_wanted]; + itdev->udma[unit] = UDMA_OFF; + + /* UDMA bits off - Revision 0x10 do them in pairs */ + pci_read_config_byte(pdev, 0x50, &conf); + if (itdev->timing10) + conf |= channel ? 0x60: 0x18; + else + conf |= 1 << (3 + 2 * channel + unit); + pci_write_config_byte(pdev, 0x50, conf); + it821x_clock_strategy(ap, adev); + } +} + +/** + * it821x_passthru_dma_start - DMA start callback + * @qc: Command in progress + * + * Usually drivers set the DMA timing at the point the set_dmamode call + * is made. IT821x however requires we load new timings on the + * transitions in some cases. + */ + +static void it821x_passthru_bmdma_start(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct it821x_dev *itdev = ap->private_data; + int unit = adev->devno; + + if (itdev->mwdma[unit] != MWDMA_OFF) + it821x_program(ap, adev, itdev->mwdma[unit]); + else if (itdev->udma[unit] != UDMA_OFF && itdev->timing10) + it821x_program_udma(ap, adev, itdev->udma[unit]); + ata_bmdma_start(qc); +} + +/** + * it821x_passthru_dma_stop - DMA stop callback + * @qc: ATA command + * + * We loaded new timings in dma_start, as a result we need to restore + * the PIO timings in dma_stop so that the next command issue gets the + * right clock values. + */ + +static void it821x_passthru_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct it821x_dev *itdev = ap->private_data; + int unit = adev->devno; + + ata_bmdma_stop(qc); + if (itdev->mwdma[unit] != MWDMA_OFF) + it821x_program(ap, adev, itdev->pio[unit]); +} + + +/** + * it821x_passthru_dev_select - Select master/slave + * @ap: ATA port + * @device: Device number (not pointer) + * + * Device selection hook. If neccessary perform clock switching + */ + +static void it821x_passthru_dev_select(struct ata_port *ap, + unsigned int device) +{ + struct it821x_dev *itdev = ap->private_data; + if (itdev && device != itdev->last_device) { + struct ata_device *adev = &ap->device[device]; + it821x_program(ap, adev, itdev->pio[adev->devno]); + itdev->last_device = device; + } + ata_std_dev_select(ap, device); +} + +/** + * it821x_smart_qc_issue_prot - wrap qc issue prot + * @qc: command + * + * Wrap the command issue sequence for the IT821x. We need to + * perform out own device selection timing loads before the + * usual happenings kick off + */ + +static unsigned int it821x_smart_qc_issue_prot(struct ata_queued_cmd *qc) +{ + switch(qc->tf.command) + { + /* Commands the firmware supports */ + case ATA_CMD_READ: + case ATA_CMD_READ_EXT: + case ATA_CMD_WRITE: + case ATA_CMD_WRITE_EXT: + case ATA_CMD_PIO_READ: + case ATA_CMD_PIO_READ_EXT: + case ATA_CMD_PIO_WRITE: + case ATA_CMD_PIO_WRITE_EXT: + case ATA_CMD_READ_MULTI: + case ATA_CMD_READ_MULTI_EXT: + case ATA_CMD_WRITE_MULTI: + case ATA_CMD_WRITE_MULTI_EXT: + case ATA_CMD_ID_ATA: + /* Arguably should just no-op this one */ + case ATA_CMD_SET_FEATURES: + return ata_qc_issue_prot(qc); + } + printk(KERN_DEBUG "it821x: can't process command 0x%02X\n", qc->tf.command); + return AC_ERR_INVALID; +} + +/** + * it821x_passthru_qc_issue_prot - wrap qc issue prot + * @qc: command + * + * Wrap the command issue sequence for the IT821x. We need to + * perform out own device selection timing loads before the + * usual happenings kick off + */ + +static unsigned int it821x_passthru_qc_issue_prot(struct ata_queued_cmd *qc) +{ + it821x_passthru_dev_select(qc->ap, qc->dev->devno); + return ata_qc_issue_prot(qc); +} + +/** + * it821x_smart_set_mode - mode setting + * @ap: interface to set up + * + * Use a non standard set_mode function. We don't want to be tuned. + * The BIOS configured everything. Our job is not to fiddle. We + * read the dma enabled bits from the PCI configuration of the device + * and respect them. + */ + +static void it821x_smart_set_mode(struct ata_port *ap) +{ + int dma_enabled = 0; + int i; + + /* Bits 5 and 6 indicate if DMA is active on master/slave */ + /* It is possible that BMDMA isn't allocated */ + if (ap->ioaddr.bmdma_addr) + dma_enabled = inb(ap->ioaddr.bmdma_addr + ATA_DMA_CMD); + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + if (ata_dev_enabled(dev)) { + /* We don't really care */ + dev->pio_mode = XFER_PIO_0; + dev->dma_mode = XFER_MW_DMA_0; + /* We do need the right mode information for DMA or PIO + and this comes from the current configuration flags */ + if (dma_enabled & (1 << (5 + i))) { + dev->xfer_mode = XFER_MW_DMA_0; + dev->xfer_shift = ATA_SHIFT_MWDMA; + dev->flags &= ~ATA_DFLAG_PIO; + } else { + dev->xfer_mode = XFER_PIO_0; + dev->xfer_shift = ATA_SHIFT_PIO; + dev->flags |= ATA_DFLAG_PIO; + } + } + } +} + +/** + * it821x_dev_config - Called each device identify + * @ap: ATA port + * @adev: Device that has just been identified + * + * Perform the initial setup needed for each device that is chip + * special. In our case we need to lock the sector count to avoid + * blowing the brains out of the firmware with large LBA48 requests + * + * FIXME: When FUA appears we need to block FUA too. And SMART and + * basically we need to filter commands for this chip. + */ + +static void it821x_dev_config(struct ata_port *ap, struct ata_device *adev) +{ + unsigned char model_num[40]; + char *s; + unsigned int len; + + /* This block ought to be a library routine as it is in several + drivers now */ + + ata_id_string(adev->id, model_num, ATA_ID_PROD_OFS, + sizeof(model_num)); + s = &model_num[0]; + len = strnlen(s, sizeof(model_num)); + + /* ATAPI specifies that empty space is blank-filled; remove blanks */ + while ((len > 0) && (s[len - 1] == ' ')) { + len--; + s[len] = 0; + } + + if (adev->max_sectors > 255) + adev->max_sectors = 255; + + if (strstr(model_num, "Integrated Technology Express")) { + /* RAID mode */ + printk(KERN_INFO "IT821x %sRAID%d volume", + adev->id[147]?"Bootable ":"", + adev->id[129]); + if (adev->id[129] != 1) + printk("(%dK stripe)", adev->id[146]); + printk(".\n"); + } +} + + +/** + * it821x_check_atapi_dma - ATAPI DMA handler + * @qc: Command we are about to issue + * + * Decide if this ATAPI command can be issued by DMA on this + * controller. Return 0 if it can be. + */ + +static int it821x_check_atapi_dma(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct it821x_dev *itdev = ap->private_data; + + /* No ATAPI DMA in smart mode */ + if (itdev->smart) + return -EOPNOTSUPP; + /* No ATAPI DMA on rev 10 */ + if (itdev->timing10) + return -EOPNOTSUPP; + /* Cool */ + return 0; +} + + +/** + * it821x_port_start - port setup + * @ap: ATA port being set up + * + * The it821x needs to maintain private data structures and also to + * use the standard PCI interface which lacks support for this + * functionality. We instead set up the private data on the port + * start hook, and tear it down on port stop + */ + +static int it821x_port_start(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct it821x_dev *itdev; + u8 conf; + + int ret = ata_port_start(ap); + if (ret < 0) + return ret; + + ap->private_data = kmalloc(sizeof(struct it821x_dev), GFP_KERNEL); + if (ap->private_data == NULL) { + ata_port_stop(ap); + return -ENOMEM; + } + + itdev = ap->private_data; + memset(itdev, 0, sizeof(struct it821x_dev)); + + pci_read_config_byte(pdev, 0x50, &conf); + + if (conf & 1) { + itdev->smart = 1; + /* Long I/O's although allowed in LBA48 space cause the + onboard firmware to enter the twighlight zone */ + /* No ATAPI DMA in this mode either */ + } + /* Pull the current clocks from 0x50 */ + if (conf & (1 << (1 + ap->port_no))) + itdev->clock_mode = ATA_50; + else + itdev->clock_mode = ATA_66; + + itdev->want[0][1] = ATA_ANY; + itdev->want[1][1] = ATA_ANY; + itdev->last_device = -1; + + pci_read_config_byte(pdev, PCI_REVISION_ID, &conf); + if (conf == 0x10) { + itdev->timing10 = 1; + /* Need to disable ATAPI DMA for this case */ + if (!itdev->smart) + printk(KERN_WARNING DRV_NAME": Revision 0x10, workarounds activated.\n"); + } + + return 0; +} + +/** + * it821x_port_stop - port shutdown + * @ap: ATA port being removed + * + * Release the private objects we added in it821x_port_start + */ + +static void it821x_port_stop(struct ata_port *ap) { + kfree(ap->private_data); + ap->private_data = NULL; /* We want an OOPS if we reuse this + too late! */ + ata_port_stop(ap); +} + +static struct scsi_host_template it821x_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + /* 255 sectors to begin with. This is locked in smart mode but not + in pass through */ + .max_sectors = 255, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations it821x_smart_port_ops = { + .set_mode = it821x_smart_set_mode, + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .mode_filter = ata_pci_default_filter, + + .check_status = ata_check_status, + .check_atapi_dma= it821x_check_atapi_dma, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + .dev_config = it821x_dev_config, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = it821x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = it821x_smart_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = it821x_port_start, + .port_stop = it821x_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations it821x_passthru_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = it821x_passthru_set_piomode, + .set_dmamode = it821x_passthru_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .check_atapi_dma= it821x_check_atapi_dma, + .dev_select = it821x_passthru_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = it821x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = it821x_passthru_bmdma_start, + .bmdma_stop = it821x_passthru_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = it821x_passthru_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_clear = ata_bmdma_irq_clear, + .irq_handler = ata_interrupt, + + .port_start = it821x_port_start, + .port_stop = it821x_port_stop, + .host_stop = ata_host_stop +}; + +static void __devinit it821x_disable_raid(struct pci_dev *pdev) +{ + /* Reset local CPU, and set BIOS not ready */ + pci_write_config_byte(pdev, 0x5E, 0x01); + + /* Set to bypass mode, and reset PCI bus */ + pci_write_config_byte(pdev, 0x50, 0x00); + pci_write_config_word(pdev, PCI_COMMAND, + PCI_COMMAND_PARITY | PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + pci_write_config_word(pdev, 0x40, 0xA0F3); + + pci_write_config_dword(pdev,0x4C, 0x02040204); + pci_write_config_byte(pdev, 0x42, 0x36); + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x20); +} + + +static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + u8 conf; + + static struct ata_port_info info_smart = { + .sht = &it821x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &it821x_smart_port_ops + }; + static struct ata_port_info info_passthru = { + .sht = &it821x_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &it821x_passthru_port_ops + }; + static struct ata_port_info *port_info[2]; + + static char *mode[2] = { "pass through", "smart" }; + + /* Force the card into bypass mode if so requested */ + if (it8212_noraid) { + printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n"); + it821x_disable_raid(pdev); + } + pci_read_config_byte(pdev, 0x50, &conf); + conf &= 1; + + printk(KERN_INFO DRV_NAME ": controller in %s mode.\n", mode[conf]); + if (conf == 0) + port_info[0] = port_info[1] = &info_passthru; + else + port_info[0] = port_info[1] = &info_smart; + + return ata_pci_init_one(pdev, port_info, 2); +} + +static struct pci_device_id it821x[] = { + { PCI_DEVICE(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8211), }, + { PCI_DEVICE(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8212), }, + { 0, }, +}; + +static struct pci_driver it821x_pci_driver = { + .name = DRV_NAME, + .id_table = it821x, + .probe = it821x_init_one, + .remove = ata_pci_remove_one +}; + +static int __init it821x_init(void) +{ + return pci_register_driver(&it821x_pci_driver); +} + + +static void __exit it821x_exit(void) +{ + pci_unregister_driver(&it821x_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for the IT8211/IT8212 IDE RAID controller"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, it821x); +MODULE_VERSION(DRV_VERSION); + + +module_param_named(noraid, it8212_noraid, int, S_IRUGO); +MODULE_PARM_DESC(it8212_noraid, "Force card into bypass mode"); + +module_init(it821x_init); +module_exit(it821x_exit); diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c new file mode 100644 index 000000000000..6832a643a9eb --- /dev/null +++ b/drivers/ata/pata_jmicron.c @@ -0,0 +1,266 @@ +/* + * pata_jmicron.c - JMicron ATA driver for non AHCI mode. This drives the + * PATA port of the controller. The SATA ports are + * driven by AHCI in the usual configuration although + * this driver can handle other setups if we need it. + * + * (c) 2006 Red Hat + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_jmicron" +#define DRV_VERSION "0.1.2" + +typedef enum { + PORT_PATA0 = 0, + PORT_PATA1 = 1, + PORT_SATA = 2, +} port_type; + +/** + * jmicron_pre_reset - check for 40/80 pin + * @ap: Port + * + * Perform the PATA port setup we need. + + * On the Jmicron 361/363 there is a single PATA port that can be mapped + * either as primary or secondary (or neither). We don't do any policy + * and setup here. We assume that has been done by init_one and the + * BIOS. + */ + +static int jmicron_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 control; + u32 control5; + int port_mask = 1<< (4 * ap->port_no); + int port = ap->port_no; + port_type port_map[2]; + + /* Check if our port is enabled */ + pci_read_config_dword(pdev, 0x40, &control); + if ((control & port_mask) == 0) + return 0; + + /* There are two basic mappings. One has the two SATA ports merged + as master/slave and the secondary as PATA, the other has only the + SATA port mapped */ + if (control & (1 << 23)) { + port_map[0] = PORT_SATA; + port_map[1] = PORT_PATA0; + } else { + port_map[0] = PORT_SATA; + port_map[1] = PORT_SATA; + } + + /* The 365/366 may have this bit set to map the second PATA port + as the internal primary channel */ + pci_read_config_dword(pdev, 0x80, &control5); + if (control5 & (1<<24)) + port_map[0] = PORT_PATA1; + + /* The two ports may then be logically swapped by the firmware */ + if (control & (1 << 22)) + port = port ^ 1; + + /* + * Now we know which physical port we are talking about we can + * actually do our cable checking etc. Thankfully we don't need + * to do the plumbing for other cases. + */ + switch (port_map[port]) + { + case PORT_PATA0: + if (control & (1 << 5)) + return 0; + if (control & (1 << 3)) /* 40/80 pin primary */ + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + break; + case PORT_PATA1: + /* Bit 21 is set if the port is enabled */ + if ((control5 & (1 << 21)) == 0) + return 0; + if (control5 & (1 << 19)) /* 40/80 pin secondary */ + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + break; + case PORT_SATA: + ap->cbl = ATA_CBL_SATA; + break; + } + return ata_std_prereset(ap); +} + +/** + * jmicron_error_handler - Setup and error handler + * @ap: Port to handle + * + * LOCKING: + * None (inherited from caller). + */ + +static void jmicron_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, jmicron_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/* No PIO or DMA methods needed for this device */ + +static struct scsi_host_template jmicron_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + /* Special handling needed if you have sector or LBA48 limits */ + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + /* Use standard CHS mapping rules */ + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations jmicron_ops = { + .port_disable = ata_port_disable, + + /* Task file is PCI ATA format, use helpers */ + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = jmicron_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + /* BMDMA handling is PCI ATA format, use helpers */ + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + /* Timeout handling. Special recovery hooks here */ + .eng_timeout = ata_eng_timeout, + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + /* Generic PATA PCI ATA helpers */ + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + + +/** + * jmicron_init_one - Register Jmicron ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in jmicron_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int jmicron_init_one (struct pci_dev *pdev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &jmicron_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + + .port_ops = &jmicron_ops, + }; + struct ata_port_info *port_info[2] = { &info, &info }; + + u32 reg; + + if (id->driver_data != 368) { + /* Put the controller into AHCI mode in case the AHCI driver + has not yet been loaded. This can be done with either + function present */ + + /* FIXME: We may want a way to override this in future */ + pci_write_config_byte(pdev, 0x41, 0xa1); + } + + /* PATA controller is fn 1, AHCI is fn 0 */ + if (PCI_FUNC(pdev->devfn) != 1) + return -ENODEV; + + if ( id->driver_data == 365 || id->driver_data == 366) { + /* The 365/66 have two PATA channels, redirect the second */ + pci_read_config_dword(pdev, 0x80, ®); + reg |= (1 << 24); /* IDE1 to PATA IDE secondary */ + pci_write_config_dword(pdev, 0x80, reg); + } + + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id jmicron_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361), 361}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363), 363}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365), 365}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366), 366}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368), 368}, + { } /* terminate list */ +}; + +static struct pci_driver jmicron_pci_driver = { + .name = DRV_NAME, + .id_table = jmicron_pci_tbl, + .probe = jmicron_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init jmicron_init(void) +{ + return pci_register_driver(&jmicron_pci_driver); +} + +static void __exit jmicron_exit(void) +{ + pci_unregister_driver(&jmicron_pci_driver); +} + +module_init(jmicron_init); +module_exit(jmicron_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for Jmicron PATA ports"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, jmicron_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c new file mode 100644 index 000000000000..eb510660b424 --- /dev/null +++ b/drivers/ata/pata_legacy.c @@ -0,0 +1,949 @@ +/* + * pata-legacy.c - Legacy port PATA/SATA controller driver. + * Copyright 2005/2006 Red Hat , all rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * An ATA driver for the legacy ATA ports. + * + * Data Sources: + * Opti 82C465/82C611 support: Data sheets at opti-inc.com + * HT6560 series: + * Promise 20230/20620: + * http://www.ryston.cz/petr/vlb/pdc20230b.html + * http://www.ryston.cz/petr/vlb/pdc20230c.html + * http://www.ryston.cz/petr/vlb/pdc20630.html + * + * Unsupported but docs exist: + * Appian/Adaptec AIC25VL01/Cirrus Logic PD7220 + * Winbond W83759A + * + * This driver handles legacy (that is "ISA/VLB side") IDE ports found + * on PC class systems. There are three hybrid devices that are exceptions + * The Cyrix 5510/5520 where a pre SFF ATA device is on the bridge and + * the MPIIX where the tuning is PCI side but the IDE is "ISA side". + * + * Specific support is included for the ht6560a/ht6560b/opti82c611a/ + * opti82c465mv/promise 20230c/20630 + * + * Use the autospeed and pio_mask options with: + * Appian ADI/2 aka CLPD7220 or AIC25VL01. + * Use the jumpers, autospeed and set pio_mask to the mode on the jumpers with + * Goldstar GM82C711, PIC-1288A-125, UMC 82C871F, Winbond W83759, + * Winbond W83759A, Promise PDC20230-B + * + * For now use autospeed and pio_mask as above with the W83759A. This may + * change. + * + * TODO + * Merge existing pata_qdi driver + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_legacy" +#define DRV_VERSION "0.5.3" + +#define NR_HOST 6 + +static int legacy_port[NR_HOST] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 }; +static int legacy_irq[NR_HOST] = { 15, 14, 11, 10, 8, 12 }; + +struct legacy_data { + unsigned long timing; + u8 clock[2]; + u8 last; + int fast; + struct platform_device *platform_dev; + +}; + +static struct legacy_data legacy_data[NR_HOST]; +static struct ata_host *legacy_host[NR_HOST]; +static int nr_legacy_host; + + +static int probe_all; /* Set to check all ISA port ranges */ +static int ht6560a; /* HT 6560A on primary 1, secondary 2, both 3 */ +static int ht6560b; /* HT 6560A on primary 1, secondary 2, both 3 */ +static int opti82c611a; /* Opti82c611A on primary 1, secondary 2, both 3 */ +static int opti82c46x; /* Opti 82c465MV present (pri/sec autodetect) */ +static int autospeed; /* Chip present which snoops speed changes */ +static int pio_mask = 0x1F; /* PIO range for autospeed devices */ + +/** + * legacy_set_mode - mode setting + * @ap: IDE interface + * + * Use a non standard set_mode function. We don't want to be tuned. + * + * The BIOS configured everything. Our job is not to fiddle. Just use + * whatever PIO the hardware is using and leave it at that. When we + * get some kind of nice user driven API for control then we can + * expand on this as per hdparm in the base kernel. + */ + +static void legacy_set_mode(struct ata_port *ap) +{ + int i; + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + if (ata_dev_enabled(dev)) { + dev->pio_mode = XFER_PIO_0; + dev->xfer_mode = XFER_PIO_0; + dev->xfer_shift = ATA_SHIFT_PIO; + dev->flags |= ATA_DFLAG_PIO; + } + } +} + +static struct scsi_host_template legacy_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +/* + * These ops are used if the user indicates the hardware + * snoops the commands to decide on the mode and handles the + * mode selection "magically" itself. Several legacy controllers + * do this. The mode range can be set if it is not 0x1F by setting + * pio_mask as well. + */ + +static struct ata_port_operations simple_port_ops = { + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ata_bmdma_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer_noirq, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations legacy_port_ops = { + .set_mode = legacy_set_mode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer_noirq, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Promise 20230C and 20620 support + * + * This controller supports PIO0 to PIO2. We set PIO timings conservatively to + * allow for 50MHz Vesa Local Bus. The 20620 DMA support is weird being DMA to + * controller and PIO'd to the host and not supported. + */ + +static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + int tries = 5; + int pio = adev->pio_mode - XFER_PIO_0; + u8 rt; + unsigned long flags; + + /* Safe as UP only. Force I/Os to occur together */ + + local_irq_save(flags); + + /* Unlock the control interface */ + do + { + inb(0x1F5); + outb(inb(0x1F2) | 0x80, 0x1F2); + inb(0x1F2); + inb(0x3F6); + inb(0x3F6); + inb(0x1F2); + inb(0x1F2); + } + while((inb(0x1F2) & 0x80) && --tries); + + local_irq_restore(flags); + + outb(inb(0x1F4) & 0x07, 0x1F4); + + rt = inb(0x1F3); + rt &= 0x07 << (3 * adev->devno); + if (pio) + rt |= (1 + 3 * pio) << (3 * adev->devno); + + udelay(100); + outb(inb(0x1F2) | 0x01, 0x1F2); + udelay(100); + inb(0x1F5); + +} + +static void pdc_data_xfer_vlb(struct ata_device *adev, unsigned char *buf, unsigned int buflen, int write_data) +{ + struct ata_port *ap = adev->ap; + int slop = buflen & 3; + unsigned long flags; + + if (ata_id_has_dword_io(adev->id)) { + local_irq_save(flags); + + /* Perform the 32bit I/O synchronization sequence */ + inb(ap->ioaddr.nsect_addr); + inb(ap->ioaddr.nsect_addr); + inb(ap->ioaddr.nsect_addr); + + /* Now the data */ + + if (write_data) + outsl(ap->ioaddr.data_addr, buf, buflen >> 2); + else + insl(ap->ioaddr.data_addr, buf, buflen >> 2); + + if (unlikely(slop)) { + u32 pad; + if (write_data) { + memcpy(&pad, buf + buflen - slop, slop); + outl(le32_to_cpu(pad), ap->ioaddr.data_addr); + } else { + pad = cpu_to_le16(inl(ap->ioaddr.data_addr)); + memcpy(buf + buflen - slop, &pad, slop); + } + } + local_irq_restore(flags); + } + else + ata_pio_data_xfer_noirq(adev, buf, buflen, write_data); +} + +static struct ata_port_operations pdc20230_port_ops = { + .set_piomode = pdc20230_set_piomode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = pdc_data_xfer_vlb, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Holtek 6560A support + * + * This controller supports PIO0 to PIO2 (no IORDY even though higher timings + * can be loaded). + */ + +static void ht6560a_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + u8 active, recover; + struct ata_timing t; + + /* Get the timing data in cycles. For now play safe at 50Mhz */ + ata_timing_compute(adev, adev->pio_mode, &t, 20000, 1000); + + active = FIT(t.active, 2, 15); + recover = FIT(t.recover, 4, 15); + + inb(0x3E6); + inb(0x3E6); + inb(0x3E6); + inb(0x3E6); + + outb(recover << 4 | active, ap->ioaddr.device_addr); + inb(ap->ioaddr.status_addr); +} + +static struct ata_port_operations ht6560a_port_ops = { + .set_piomode = ht6560a_set_piomode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, /* Check vlb/noirq */ + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Holtek 6560B support + * + * This controller supports PIO0 to PIO4. We honour the BIOS/jumper FIFO setting + * unless we see an ATAPI device in which case we force it off. + * + * FIXME: need to implement 2nd channel support. + */ + +static void ht6560b_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + u8 active, recover; + struct ata_timing t; + + /* Get the timing data in cycles. For now play safe at 50Mhz */ + ata_timing_compute(adev, adev->pio_mode, &t, 20000, 1000); + + active = FIT(t.active, 2, 15); + recover = FIT(t.recover, 2, 16); + recover &= 0x15; + + inb(0x3E6); + inb(0x3E6); + inb(0x3E6); + inb(0x3E6); + + outb(recover << 4 | active, ap->ioaddr.device_addr); + + if (adev->class != ATA_DEV_ATA) { + u8 rconf = inb(0x3E6); + if (rconf & 0x24) { + rconf &= ~ 0x24; + outb(rconf, 0x3E6); + } + } + inb(ap->ioaddr.status_addr); +} + +static struct ata_port_operations ht6560b_port_ops = { + .set_piomode = ht6560b_set_piomode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, /* FIXME: Check 32bit and noirq */ + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Opti core chipset helpers + */ + +/** + * opti_syscfg - read OPTI chipset configuration + * @reg: Configuration register to read + * + * Returns the value of an OPTI system board configuration register. + */ + +static u8 opti_syscfg(u8 reg) +{ + unsigned long flags; + u8 r; + + /* Uniprocessor chipset and must force cycles adjancent */ + local_irq_save(flags); + outb(reg, 0x22); + r = inb(0x24); + local_irq_restore(flags); + return r; +} + +/* + * Opti 82C611A + * + * This controller supports PIO0 to PIO3. + */ + +static void opti82c611a_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + u8 active, recover, setup; + struct ata_timing t; + struct ata_device *pair = ata_dev_pair(adev); + int clock; + int khz[4] = { 50000, 40000, 33000, 25000 }; + u8 rc; + + /* Enter configuration mode */ + inw(ap->ioaddr.error_addr); + inw(ap->ioaddr.error_addr); + outb(3, ap->ioaddr.nsect_addr); + + /* Read VLB clock strapping */ + clock = 1000000000 / khz[inb(ap->ioaddr.lbah_addr) & 0x03]; + + /* Get the timing data in cycles */ + ata_timing_compute(adev, adev->pio_mode, &t, clock, 1000); + + /* Setup timing is shared */ + if (pair) { + struct ata_timing tp; + ata_timing_compute(pair, pair->pio_mode, &tp, clock, 1000); + + ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP); + } + + active = FIT(t.active, 2, 17) - 2; + recover = FIT(t.recover, 1, 16) - 1; + setup = FIT(t.setup, 1, 4) - 1; + + /* Select the right timing bank for write timing */ + rc = inb(ap->ioaddr.lbal_addr); + rc &= 0x7F; + rc |= (adev->devno << 7); + outb(rc, ap->ioaddr.lbal_addr); + + /* Write the timings */ + outb(active << 4 | recover, ap->ioaddr.error_addr); + + /* Select the right bank for read timings, also + load the shared timings for address */ + rc = inb(ap->ioaddr.device_addr); + rc &= 0xC0; + rc |= adev->devno; /* Index select */ + rc |= (setup << 4) | 0x04; + outb(rc, ap->ioaddr.device_addr); + + /* Load the read timings */ + outb(active << 4 | recover, ap->ioaddr.data_addr); + + /* Ensure the timing register mode is right */ + rc = inb (ap->ioaddr.lbal_addr); + rc &= 0x73; + rc |= 0x84; + outb(rc, ap->ioaddr.lbal_addr); + + /* Exit command mode */ + outb(0x83, ap->ioaddr.nsect_addr); +} + + +static struct ata_port_operations opti82c611a_port_ops = { + .set_piomode = opti82c611a_set_piomode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/* + * Opti 82C465MV + * + * This controller supports PIO0 to PIO3. Unlike the 611A the MVB + * version is dual channel but doesn't have a lot of unique registers. + */ + +static void opti82c46x_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + u8 active, recover, setup; + struct ata_timing t; + struct ata_device *pair = ata_dev_pair(adev); + int clock; + int khz[4] = { 50000, 40000, 33000, 25000 }; + u8 rc; + u8 sysclk; + + /* Get the clock */ + sysclk = opti_syscfg(0xAC) & 0xC0; /* BIOS set */ + + /* Enter configuration mode */ + inw(ap->ioaddr.error_addr); + inw(ap->ioaddr.error_addr); + outb(3, ap->ioaddr.nsect_addr); + + /* Read VLB clock strapping */ + clock = 1000000000 / khz[sysclk]; + + /* Get the timing data in cycles */ + ata_timing_compute(adev, adev->pio_mode, &t, clock, 1000); + + /* Setup timing is shared */ + if (pair) { + struct ata_timing tp; + ata_timing_compute(pair, pair->pio_mode, &tp, clock, 1000); + + ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP); + } + + active = FIT(t.active, 2, 17) - 2; + recover = FIT(t.recover, 1, 16) - 1; + setup = FIT(t.setup, 1, 4) - 1; + + /* Select the right timing bank for write timing */ + rc = inb(ap->ioaddr.lbal_addr); + rc &= 0x7F; + rc |= (adev->devno << 7); + outb(rc, ap->ioaddr.lbal_addr); + + /* Write the timings */ + outb(active << 4 | recover, ap->ioaddr.error_addr); + + /* Select the right bank for read timings, also + load the shared timings for address */ + rc = inb(ap->ioaddr.device_addr); + rc &= 0xC0; + rc |= adev->devno; /* Index select */ + rc |= (setup << 4) | 0x04; + outb(rc, ap->ioaddr.device_addr); + + /* Load the read timings */ + outb(active << 4 | recover, ap->ioaddr.data_addr); + + /* Ensure the timing register mode is right */ + rc = inb (ap->ioaddr.lbal_addr); + rc &= 0x73; + rc |= 0x84; + outb(rc, ap->ioaddr.lbal_addr); + + /* Exit command mode */ + outb(0x83, ap->ioaddr.nsect_addr); + + /* We need to know this for quad device on the MVB */ + ap->host->private_data = ap; +} + +/** + * opt82c465mv_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings. The + * MVB has a single set of timing registers and these are shared + * across channels. As there are two registers we really ought to + * track the last two used values as a sort of register window. For + * now we just reload on a channel switch. On the single channel + * setup this condition never fires so we do nothing extra. + * + * FIXME: dual channel needs ->serialize support + */ + +static unsigned int opti82c46x_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + + /* If timings are set and for the wrong channel (2nd test is + due to a libata shortcoming and will eventually go I hope) */ + if (ap->host->private_data != ap->host + && ap->host->private_data != NULL) + opti82c46x_set_piomode(ap, adev); + + return ata_qc_issue_prot(qc); +} + +static struct ata_port_operations opti82c46x_port_ops = { + .set_piomode = opti82c46x_set_piomode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .qc_prep = ata_qc_prep, + .qc_issue = opti82c46x_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + + +/** + * legacy_init_one - attach a legacy interface + * @port: port number + * @io: I/O port start + * @ctrl: control port + * @irq: interrupt line + * + * Register an ISA bus IDE interface. Such interfaces are PIO and we + * assume do not support IRQ sharing. + */ + +static __init int legacy_init_one(int port, unsigned long io, unsigned long ctrl, int irq) +{ + struct legacy_data *ld = &legacy_data[nr_legacy_host]; + struct ata_probe_ent ae; + struct platform_device *pdev; + int ret = -EBUSY; + struct ata_port_operations *ops = &legacy_port_ops; + int pio_modes = pio_mask; + u32 mask = (1 << port); + + if (request_region(io, 8, "pata_legacy") == NULL) + return -EBUSY; + if (request_region(ctrl, 1, "pata_legacy") == NULL) + goto fail_io; + + pdev = platform_device_register_simple(DRV_NAME, nr_legacy_host, NULL, 0); + if (pdev == NULL) + goto fail_dev; + + if (ht6560a & mask) { + ops = &ht6560a_port_ops; + pio_modes = 0x07; + } + if (ht6560b & mask) { + ops = &ht6560b_port_ops; + pio_modes = 0x1F; + } + if (opti82c611a & mask) { + ops = &opti82c611a_port_ops; + pio_modes = 0x0F; + } + if (opti82c46x & mask) { + ops = &opti82c46x_port_ops; + pio_modes = 0x0F; + } + + /* Probe for automatically detectable controllers */ + + if (io == 0x1F0 && ops == &legacy_port_ops) { + unsigned long flags; + + local_irq_save(flags); + + /* Probes */ + inb(0x1F5); + outb(inb(0x1F2) | 0x80, 0x1F2); + inb(0x1F2); + inb(0x3F6); + inb(0x3F6); + inb(0x1F2); + inb(0x1F2); + + if ((inb(0x1F2) & 0x80) == 0) { + /* PDC20230c or 20630 ? */ + printk(KERN_INFO "PDC20230-C/20630 VLB ATA controller detected.\n"); + pio_modes = 0x07; + ops = &pdc20230_port_ops; + udelay(100); + inb(0x1F5); + } else { + outb(0x55, 0x1F2); + inb(0x1F2); + inb(0x1F2); + if (inb(0x1F2) == 0x00) { + printk(KERN_INFO "PDC20230-B VLB ATA controller detected.\n"); + } + } + local_irq_restore(flags); + } + + + /* Chip does mode setting by command snooping */ + if (ops == &legacy_port_ops && (autospeed & mask)) + ops = &simple_port_ops; + memset(&ae, 0, sizeof(struct ata_probe_ent)); + INIT_LIST_HEAD(&ae.node); + ae.dev = &pdev->dev; + ae.port_ops = ops; + ae.sht = &legacy_sht; + ae.n_ports = 1; + ae.pio_mask = pio_modes; + ae.irq = irq; + ae.irq_flags = 0; + ae.port_flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST; + ae.port[0].cmd_addr = io; + ae.port[0].altstatus_addr = ctrl; + ae.port[0].ctl_addr = ctrl; + ata_std_ports(&ae.port[0]); + ae.private_data = ld; + + ret = ata_device_add(&ae); + if (ret == 0) { + ret = -ENODEV; + goto fail; + } + legacy_host[nr_legacy_host++] = dev_get_drvdata(&pdev->dev); + ld->platform_dev = pdev; + return 0; + +fail: + platform_device_unregister(pdev); +fail_dev: + release_region(ctrl, 1); +fail_io: + release_region(io, 8); + return ret; +} + +/** + * legacy_check_special_cases - ATA special cases + * @p: PCI device to check + * @master: set this if we find an ATA master + * @master: set this if we find an ATA secondary + * + * A small number of vendors implemented early PCI ATA interfaces on bridge logic + * without the ATA interface being PCI visible. Where we have a matching PCI driver + * we must skip the relevant device here. If we don't know about it then the legacy + * driver is the right driver anyway. + */ + +static void legacy_check_special_cases(struct pci_dev *p, int *primary, int *secondary) +{ + /* Cyrix CS5510 pre SFF MWDMA ATA on the bridge */ + if (p->vendor == 0x1078 && p->device == 0x0000) { + *primary = *secondary = 1; + return; + } + /* Cyrix CS5520 pre SFF MWDMA ATA on the bridge */ + if (p->vendor == 0x1078 && p->device == 0x0002) { + *primary = *secondary = 1; + return; + } + /* Intel MPIIX - PIO ATA on non PCI side of bridge */ + if (p->vendor == 0x8086 && p->device == 0x1234) { + u16 r; + pci_read_config_word(p, 0x6C, &r); + if (r & 0x8000) { /* ATA port enabled */ + if (r & 0x4000) + *secondary = 1; + else + *primary = 1; + } + return; + } +} + + +/** + * legacy_init - attach legacy interfaces + * + * Attach legacy IDE interfaces by scanning the usual IRQ/port suspects. + * Right now we do not scan the ide0 and ide1 address but should do so + * for non PCI systems or systems with no PCI IDE legacy mode devices. + * If you fix that note there are special cases to consider like VLB + * drivers and CS5510/20. + */ + +static __init int legacy_init(void) +{ + int i; + int ct = 0; + int primary = 0; + int secondary = 0; + int last_port = NR_HOST; + + struct pci_dev *p = NULL; + + for_each_pci_dev(p) { + int r; + /* Check for any overlap of the system ATA mappings. Native mode controllers + stuck on these addresses or some devices in 'raid' mode won't be found by + the storage class test */ + for (r = 0; r < 6; r++) { + if (pci_resource_start(p, r) == 0x1f0) + primary = 1; + if (pci_resource_start(p, r) == 0x170) + secondary = 1; + } + /* Check for special cases */ + legacy_check_special_cases(p, &primary, &secondary); + + /* If PCI bus is present then don't probe for tertiary legacy ports */ + if (probe_all == 0) + last_port = 2; + } + + /* If an OPTI 82C46X is present find out where the channels are */ + if (opti82c46x) { + static const char *optis[4] = { + "3/463MV", "5MV", + "5MVA", "5MVB" + }; + u8 chans = 1; + u8 ctrl = (opti_syscfg(0x30) & 0xC0) >> 6; + + opti82c46x = 3; /* Assume master and slave first */ + printk(KERN_INFO DRV_NAME ": Opti 82C46%s chipset support.\n", optis[ctrl]); + if (ctrl == 3) + chans = (opti_syscfg(0x3F) & 0x20) ? 2 : 1; + ctrl = opti_syscfg(0xAC); + /* Check enabled and this port is the 465MV port. On the + MVB we may have two channels */ + if (ctrl & 8) { + if (ctrl & 4) + opti82c46x = 2; /* Slave */ + else + opti82c46x = 1; /* Master */ + if (chans == 2) + opti82c46x = 3; /* Master and Slave */ + } /* Slave only */ + else if (chans == 1) + opti82c46x = 1; + } + + for (i = 0; i < last_port; i++) { + /* Skip primary if we have seen a PCI one */ + if (i == 0 && primary == 1) + continue; + /* Skip secondary if we have seen a PCI one */ + if (i == 1 && secondary == 1) + continue; + if (legacy_init_one(i, legacy_port[i], + legacy_port[i] + 0x0206, + legacy_irq[i]) == 0) + ct++; + } + if (ct != 0) + return 0; + return -ENODEV; +} + +static __exit void legacy_exit(void) +{ + int i; + + for (i = 0; i < nr_legacy_host; i++) { + struct legacy_data *ld = &legacy_data[i]; + struct ata_port *ap =legacy_host[i]->ports[0]; + unsigned long io = ap->ioaddr.cmd_addr; + unsigned long ctrl = ap->ioaddr.ctl_addr; + ata_host_remove(legacy_host[i]); + platform_device_unregister(ld->platform_dev); + if (ld->timing) + release_region(ld->timing, 2); + release_region(io, 8); + release_region(ctrl, 1); + } +} + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for legacy ATA"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +module_param(probe_all, int, 0); +module_param(autospeed, int, 0); +module_param(ht6560a, int, 0); +module_param(ht6560b, int, 0); +module_param(opti82c611a, int, 0); +module_param(opti82c46x, int, 0); +module_param(pio_mask, int, 0); + +module_init(legacy_init); +module_exit(legacy_exit); + diff --git a/drivers/ata/pata_mpiix.c b/drivers/ata/pata_mpiix.c new file mode 100644 index 000000000000..1958c4ed09a8 --- /dev/null +++ b/drivers/ata/pata_mpiix.c @@ -0,0 +1,313 @@ +/* + * pata_mpiix.c - Intel MPIIX PATA for new ATA layer + * (C) 2005-2006 Red Hat Inc + * Alan Cox + * + * The MPIIX is different enough to the PIIX4 and friends that we give it + * a separate driver. The old ide/pci code handles this by just not tuning + * MPIIX at all. + * + * The MPIIX also differs in another important way from the majority of PIIX + * devices. The chip is a bridge (pardon the pun) between the old world of + * ISA IDE and PCI IDE. Although the ATA timings are PCI configured the actual + * IDE controller is not decoded in PCI space and the chip does not claim to + * be IDE class PCI. This requires slightly non-standard probe logic compared + * with PCI IDE and also that we do not disable the device when our driver is + * unloaded (as it has many other functions). + * + * The driver conciously keeps this logic internally to avoid pushing quirky + * PATA history into the clean libata layer. + * + * Thinkpad specific note: If you boot an MPIIX using thinkpad with a PCMCIA + * hard disk present this driver will not detect it. This is not a bug. In this + * configuration the secondary port of the MPIIX is disabled and the addresses + * are decoded by the PCMCIA bridge and therefore are for a generic IDE driver + * to operate. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_mpiix" +#define DRV_VERSION "0.7.1" + +enum { + IDETIM = 0x6C, /* IDE control register */ + IORDY = (1 << 1), + PPE = (1 << 2), + FTIM = (1 << 0), + ENABLED = (1 << 15), + SECONDARY = (1 << 14) +}; + +static int mpiix_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static const struct pci_bits mpiix_enable_bits[] = { + { 0x6D, 1, 0x80, 0x80 }, + { 0x6F, 1, 0x80, 0x80 } + }; + + if (!pci_test_config_bits(pdev, &mpiix_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * mpiix_error_handler - probe reset + * @ap: ATA port + * + * Perform the ATA probe and bus reset sequence plus specific handling + * for this hardware. The MPIIX has the enable bits in a different place + * to PIIX4 and friends. As a pure PIO device it has no cable detect + */ + +static void mpiix_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, mpiix_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * mpiix_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. The MPIIX allows us to program the + * IORDY sample point (2-5 clocks), recovery 1-4 clocks and whether + * prefetching or iordy are used. + * + * This would get very ugly because we can only program timing for one + * device at a time, the other gets PIO0. Fortunately libata calls + * our qc_issue_prot command before a command is issued so we can + * flip the timings back and forth to reduce the pain. + */ + +static void mpiix_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + int control = 0; + int pio = adev->pio_mode - XFER_PIO_0; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u16 idetim; + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + pci_read_config_word(pdev, IDETIM, &idetim); + /* Mask the IORDY/TIME/PPE0 bank for this device */ + if (adev->class == ATA_DEV_ATA) + control |= PPE; /* PPE enable for disk */ + if (ata_pio_need_iordy(adev)) + control |= IORDY; /* IORDY */ + if (pio > 0) + control |= FTIM; /* This drive is on the fast timing bank */ + + /* Mask out timing and clear both TIME bank selects */ + idetim &= 0xCCEE; + idetim &= ~(0x07 << (2 * adev->devno)); + idetim |= (control << (2 * adev->devno)); + + idetim |= (timings[pio][0] << 12) | (timings[pio][1] << 8); + pci_write_config_word(pdev, IDETIM, idetim); + + /* We use ap->private_data as a pointer to the device currently + loaded for timing */ + ap->private_data = adev; +} + +/** + * mpiix_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings if + * neccessary. Our logic also clears TIME0/TIME1 for the other device so + * that, even if we get this wrong, cycles to the other device will + * be made PIO0. + */ + +static unsigned int mpiix_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + + /* If modes have been configured and the channel data is not loaded + then load it. We have to check if pio_mode is set as the core code + does not set adev->pio_mode to XFER_PIO_0 while probing as would be + logical */ + + if (adev->pio_mode && adev != ap->private_data) + mpiix_set_piomode(ap, adev); + + return ata_qc_issue_prot(qc); +} + +static struct scsi_host_template mpiix_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations mpiix_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = mpiix_set_piomode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = mpiix_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = mpiix_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int mpiix_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + /* Single threaded by the PCI probe logic */ + static struct ata_probe_ent probe[2]; + static int printed_version; + u16 idetim; + int enabled; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &dev->dev, "version " DRV_VERSION "\n"); + + /* MPIIX has many functions which can be turned on or off according + to other devices present. Make sure IDE is enabled before we try + and use it */ + + pci_read_config_word(dev, IDETIM, &idetim); + if (!(idetim & ENABLED)) + return -ENODEV; + + /* We do our own plumbing to avoid leaking special cases for whacko + ancient hardware into the core code. There are two issues to + worry about. #1 The chip is a bridge so if in legacy mode and + without BARs set fools the setup. #2 If you pci_disable_device + the MPIIX your box goes castors up */ + + INIT_LIST_HEAD(&probe[0].node); + probe[0].dev = pci_dev_to_dev(dev); + probe[0].port_ops = &mpiix_port_ops; + probe[0].sht = &mpiix_sht; + probe[0].pio_mask = 0x1F; + probe[0].irq = 14; + probe[0].irq_flags = SA_SHIRQ; + probe[0].port_flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST; + probe[0].n_ports = 1; + probe[0].port[0].cmd_addr = 0x1F0; + probe[0].port[0].ctl_addr = 0x3F6; + probe[0].port[0].altstatus_addr = 0x3F6; + + /* The secondary lurks at different addresses but is otherwise + the same beastie */ + + INIT_LIST_HEAD(&probe[1].node); + probe[1] = probe[0]; + probe[1].irq = 15; + probe[1].port[0].cmd_addr = 0x170; + probe[1].port[0].ctl_addr = 0x376; + probe[1].port[0].altstatus_addr = 0x376; + + /* Let libata fill in the port details */ + ata_std_ports(&probe[0].port[0]); + ata_std_ports(&probe[1].port[0]); + + /* Now add the port that is active */ + enabled = (idetim & SECONDARY) ? 1 : 0; + + if (ata_device_add(&probe[enabled])) + return 0; + return -ENODEV; +} + +/** + * mpiix_remove_one - device unload + * @pdev: PCI device being removed + * + * Handle an unplug/unload event for a PCI device. Unload the + * PCI driver but do not use the default handler as we *MUST NOT* + * disable the device as it has other functions. + */ + +static void __devexit mpiix_remove_one(struct pci_dev *pdev) +{ + struct device *dev = pci_dev_to_dev(pdev); + struct ata_host *host = dev_get_drvdata(dev); + + ata_host_remove(host); + dev_set_drvdata(dev, NULL); +} + + + +static const struct pci_device_id mpiix[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX), }, + { 0, }, +}; + +static struct pci_driver mpiix_pci_driver = { + .name = DRV_NAME, + .id_table = mpiix, + .probe = mpiix_init_one, + .remove = mpiix_remove_one +}; + +static int __init mpiix_init(void) +{ + return pci_register_driver(&mpiix_pci_driver); +} + + +static void __exit mpiix_exit(void) +{ + pci_unregister_driver(&mpiix_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Intel MPIIX"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, mpiix); +MODULE_VERSION(DRV_VERSION); + +module_init(mpiix_init); +module_exit(mpiix_exit); diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c new file mode 100644 index 000000000000..e3a85778cd4f --- /dev/null +++ b/drivers/ata/pata_netcell.c @@ -0,0 +1,175 @@ +/* + * pata_netcell.c - Netcell PATA driver + * + * (c) 2006 Red Hat + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_netcell" +#define DRV_VERSION "0.1.5" + +/** + * netcell_probe_init - check for 40/80 pin + * @ap: Port + * + * Cables are handled by the RAID controller. Report 80 pin. + */ + +static int netcell_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + +/** + * netcell_probe_reset - Probe specified port on PATA host controller + * @ap: Port to probe + * + * LOCKING: + * None (inherited from caller). + */ + +static void netcell_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, netcell_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/* No PIO or DMA methods needed for this device */ + +static struct scsi_host_template netcell_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + /* Special handling needed if you have sector or LBA48 limits */ + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + /* Use standard CHS mapping rules */ + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations netcell_ops = { + .port_disable = ata_port_disable, + + /* Task file is PCI ATA format, use helpers */ + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = netcell_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + /* BMDMA handling is PCI ATA format, use helpers */ + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + /* Timeout handling. Special recovery hooks here */ + .eng_timeout = ata_eng_timeout, + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + /* Generic PATA PCI ATA helpers */ + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + + +/** + * netcell_init_one - Register Netcell ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in netcell_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int netcell_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + static struct ata_port_info info = { + .sht = &netcell_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + /* Actually we don't really care about these as the + firmware deals with it */ + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = 0x3f, /* UDMA 133 */ + .port_ops = &netcell_ops, + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, + "version " DRV_VERSION "\n"); + + /* Any chip specific setup/optimisation/messages here */ + ata_pci_clear_simplex(pdev); + + /* And let the library code do the work */ + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id netcell_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NETCELL, PCI_DEVICE_ID_REVOLUTION), }, + { } /* terminate list */ +}; + +static struct pci_driver netcell_pci_driver = { + .name = DRV_NAME, + .id_table = netcell_pci_tbl, + .probe = netcell_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init netcell_init(void) +{ + return pci_register_driver(&netcell_pci_driver); +} + +static void __exit netcell_exit(void) +{ + pci_unregister_driver(&netcell_pci_driver); +} + +module_init(netcell_init); +module_exit(netcell_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for Netcell PATA RAID"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, netcell_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_ns87410.c b/drivers/ata/pata_ns87410.c new file mode 100644 index 000000000000..93d6646d2954 --- /dev/null +++ b/drivers/ata/pata_ns87410.c @@ -0,0 +1,236 @@ +/* + * pata_ns87410.c - National Semiconductor 87410 PATA for new ATA layer + * (C) 2006 Red Hat Inc + * Alan Cox + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_ns87410" +#define DRV_VERSION "0.4.2" + +/** + * ns87410_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int ns87410_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static const struct pci_bits ns87410_enable_bits[] = { + { 0x43, 1, 0x08, 0x08 }, + { 0x47, 1, 0x08, 0x08 } + }; + + if (!pci_test_config_bits(pdev, &ns87410_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * ns87410_error_handler - probe reset + * @ap: ATA port + * + * Perform the ATA probe and bus reset sequence plus specific handling + * for this hardware. The MPIIX has the enable bits in a different place + * to PIIX4 and friends. As a pure PIO device it has no cable detect + */ + +static void ns87410_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, ns87410_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * ns87410_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program timing data. This is kept per channel not per device, + * and only affects the data port. + */ + +static void ns87410_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int port = 0x40 + 4 * ap->port_no; + u8 idetcr, idefr; + struct ata_timing at; + + static const u8 activebits[15] = { + 0, 1, 2, 3, 4, + 5, 5, 6, 6, 6, + 6, 7, 7, 7, 7 + }; + + static const u8 recoverbits[12] = { + 0, 1, 2, 3, 4, 5, 6, 6, 7, 7, 7, 7 + }; + + pci_read_config_byte(pdev, port + 3, &idefr); + + if (ata_pio_need_iordy(adev)) + idefr |= 0x04; /* IORDY enable */ + else + idefr &= ~0x04; + + if (ata_timing_compute(adev, adev->pio_mode, &at, 30303, 1) < 0) { + dev_printk(KERN_ERR, &pdev->dev, "unknown mode %d.\n", adev->pio_mode); + return; + } + + at.active = FIT(at.active, 2, 16) - 2; + at.setup = FIT(at.setup, 1, 4) - 1; + at.recover = FIT(at.recover, 1, 12) - 1; + + idetcr = (at.setup << 6) | (recoverbits[at.recover] << 3) | activebits[at.active]; + + pci_write_config_byte(pdev, port, idetcr); + pci_write_config_byte(pdev, port + 3, idefr); + /* We use ap->private_data as a pointer to the device currently + loaded for timing */ + ap->private_data = adev; +} + +/** + * ns87410_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings if + * neccessary. + */ + +static unsigned int ns87410_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + + /* If modes have been configured and the channel data is not loaded + then load it. We have to check if pio_mode is set as the core code + does not set adev->pio_mode to XFER_PIO_0 while probing as would be + logical */ + + if (adev->pio_mode && adev != ap->private_data) + ns87410_set_piomode(ap, adev); + + return ata_qc_issue_prot(qc); +} + +static struct scsi_host_template ns87410_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations ns87410_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = ns87410_set_piomode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ns87410_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = ns87410_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int ns87410_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &ns87410_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x0F, + .port_ops = &ns87410_port_ops + }; + static struct ata_port_info *port_info[2] = {&info, &info}; + return ata_pci_init_one(dev, port_info, 2); +} + +static const struct pci_device_id ns87410[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_87410), }, + { 0, }, +}; + +static struct pci_driver ns87410_pci_driver = { + .name = DRV_NAME, + .id_table = ns87410, + .probe = ns87410_init_one, + .remove = ata_pci_remove_one +}; + +static int __init ns87410_init(void) +{ + return pci_register_driver(&ns87410_pci_driver); +} + + +static void __exit ns87410_exit(void) +{ + pci_unregister_driver(&ns87410_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Nat Semi 87410"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ns87410); +MODULE_VERSION(DRV_VERSION); + +module_init(ns87410_init); +module_exit(ns87410_exit); diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c new file mode 100644 index 000000000000..04c618a2664b --- /dev/null +++ b/drivers/ata/pata_oldpiix.c @@ -0,0 +1,339 @@ +/* + * pata_oldpiix.c - Intel PATA/SATA controllers + * + * (C) 2005 Red Hat + * + * Some parts based on ata_piix.c by Jeff Garzik and others. + * + * Early PIIX differs significantly from the later PIIX as it lacks + * SITRE and the slave timing registers. This means that you have to + * set timing per channel, or be clever. Libata tells us whenever it + * does drive selection and we use this to reload the timings. + * + * Because of these behaviour differences PIIX gets its own driver module. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_oldpiix" +#define DRV_VERSION "0.5.1" + +/** + * oldpiix_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int oldpiix_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static const struct pci_bits oldpiix_enable_bits[] = { + { 0x41U, 1U, 0x80UL, 0x80UL }, /* port 0 */ + { 0x43U, 1U, 0x80UL, 0x80UL }, /* port 1 */ + }; + + if (!pci_test_config_bits(pdev, &oldpiix_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * oldpiix_pata_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * @classes: + * + * LOCKING: + * None (inherited from caller). + */ + +static void oldpiix_pata_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, oldpiix_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * oldpiix_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: um + * + * Set PIO mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void oldpiix_set_piomode (struct ata_port *ap, struct ata_device *adev) +{ + unsigned int pio = adev->pio_mode - XFER_PIO_0; + struct pci_dev *dev = to_pci_dev(ap->host->dev); + unsigned int idetm_port= ap->port_no ? 0x42 : 0x40; + u16 idetm_data; + int control = 0; + + /* + * See Intel Document 298600-004 for the timing programing rules + * for PIIX/ICH. Note that the early PIIX does not have the slave + * timing port at 0x44. + */ + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + if (pio > 2) + control |= 1; /* TIME1 enable */ + if (ata_pio_need_iordy(adev)) + control |= 2; /* IE IORDY */ + + /* Intel specifies that the PPE functionality is for disk only */ + if (adev->class == ATA_DEV_ATA) + control |= 4; /* PPE enable */ + + pci_read_config_word(dev, idetm_port, &idetm_data); + + /* Enable PPE, IE and TIME as appropriate. Clear the other + drive timing bits */ + if (adev->devno == 0) { + idetm_data &= 0xCCE0; + idetm_data |= control; + } else { + idetm_data &= 0xCC0E; + idetm_data |= (control << 4); + } + idetm_data |= (timings[pio][0] << 12) | + (timings[pio][1] << 8); + pci_write_config_word(dev, idetm_port, idetm_data); + + /* Track which port is configured */ + ap->private_data = adev; +} + +/** + * oldpiix_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * @isich: True if the device is an ICH and has IOCFG registers + * + * Set MWDMA mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void oldpiix_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *dev = to_pci_dev(ap->host->dev); + u8 idetm_port = ap->port_no ? 0x42 : 0x40; + u16 idetm_data; + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + /* + * MWDMA is driven by the PIO timings. We must also enable + * IORDY unconditionally along with TIME1. PPE has already + * been set when the PIO timing was set. + */ + + unsigned int mwdma = adev->dma_mode - XFER_MW_DMA_0; + unsigned int control; + const unsigned int needed_pio[3] = { + XFER_PIO_0, XFER_PIO_3, XFER_PIO_4 + }; + int pio = needed_pio[mwdma] - XFER_PIO_0; + + pci_read_config_word(dev, idetm_port, &idetm_data); + + control = 3; /* IORDY|TIME0 */ + /* Intel specifies that the PPE functionality is for disk only */ + if (adev->class == ATA_DEV_ATA) + control |= 4; /* PPE enable */ + + /* If the drive MWDMA is faster than it can do PIO then + we must force PIO into PIO0 */ + + if (adev->pio_mode < needed_pio[mwdma]) + /* Enable DMA timing only */ + control |= 8; /* PIO cycles in PIO0 */ + + /* Mask out the relevant control and timing bits we will load. Also + clear the other drive TIME register as a precaution */ + if (adev->devno == 0) { + idetm_data &= 0xCCE0; + idetm_data |= control; + } else { + idetm_data &= 0xCC0E; + idetm_data |= (control << 4); + } + idetm_data |= (timings[pio][0] << 12) | (timings[pio][1] << 8); + pci_write_config_word(dev, idetm_port, idetm_data); + + /* Track which port is configured */ + ap->private_data = adev; +} + +/** + * oldpiix_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings if + * neccessary. Our logic also clears TIME0/TIME1 for the other device so + * that, even if we get this wrong, cycles to the other device will + * be made PIO0. + */ + +static unsigned int oldpiix_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + + if (adev != ap->private_data) { + if (adev->dma_mode) + oldpiix_set_dmamode(ap, adev); + else if (adev->pio_mode) + oldpiix_set_piomode(ap, adev); + } + return ata_qc_issue_prot(qc); +} + + +static struct scsi_host_template oldpiix_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations oldpiix_pata_ops = { + .port_disable = ata_port_disable, + .set_piomode = oldpiix_set_piomode, + .set_dmamode = oldpiix_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = oldpiix_pata_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = oldpiix_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + + +/** + * oldpiix_init_one - Register PIIX ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in oldpiix_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. We probe for combined mode (sigh), + * and then hand over control to libata, for it to do the rest. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int oldpiix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + static struct ata_port_info info = { + .sht = &oldpiix_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma1-2 */ + .port_ops = &oldpiix_pata_ops, + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, + "version " DRV_VERSION "\n"); + + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id oldpiix_pci_tbl[] = { + { PCI_DEVICE(0x8086, 0x1230), }, + { } /* terminate list */ +}; + +static struct pci_driver oldpiix_pci_driver = { + .name = DRV_NAME, + .id_table = oldpiix_pci_tbl, + .probe = oldpiix_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init oldpiix_init(void) +{ + return pci_register_driver(&oldpiix_pci_driver); +} + +static void __exit oldpiix_exit(void) +{ + pci_unregister_driver(&oldpiix_pci_driver); +} + + +module_init(oldpiix_init); +module_exit(oldpiix_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for early PIIX series controllers"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, oldpiix_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_opti.c b/drivers/ata/pata_opti.c new file mode 100644 index 000000000000..c3d01325e0e2 --- /dev/null +++ b/drivers/ata/pata_opti.c @@ -0,0 +1,292 @@ +/* + * pata_opti.c - ATI PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * Based on + * linux/drivers/ide/pci/opti621.c Version 0.7 Sept 10, 2002 + * + * Copyright (C) 1996-1998 Linus Torvalds & authors (see below) + * + * Authors: + * Jaromir Koutek , + * Jan Harkes , + * Mark Lord + * Some parts of code are from ali14xx.c and from rz1000.c. + * + * Also consulted the FreeBSD prototype driver by Kevin Day to try + * and resolve some confusions. Further documentation can be found in + * Ralf Brown's interrupt list + * + * If you have other variants of the Opti range (Viper/Vendetta) please + * try this driver with those PCI idents and report back. For the later + * chips see the pata_optidma driver + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_opti" +#define DRV_VERSION "0.2.4" + +enum { + READ_REG = 0, /* index of Read cycle timing register */ + WRITE_REG = 1, /* index of Write cycle timing register */ + CNTRL_REG = 3, /* index of Control register */ + STRAP_REG = 5, /* index of Strap register */ + MISC_REG = 6 /* index of Miscellaneous register */ +}; + +/** + * opti_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int opti_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static const struct pci_bits opti_enable_bits[] = { + { 0x45, 1, 0x80, 0x00 }, + { 0x40, 1, 0x08, 0x00 } + }; + + if (!pci_test_config_bits(pdev, &opti_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * opti_probe_reset - probe reset + * @ap: ATA port + * + * Perform the ATA probe and bus reset sequence plus specific handling + * for this hardware. The Opti needs little handling - we have no UDMA66 + * capability that needs cable detection. All we must do is check the port + * is enabled. + */ + +static void opti_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, opti_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * opti_write_reg - control register setup + * @ap: ATA port + * @value: value + * @reg: control register number + * + * The Opti uses magic 'trapdoor' register accesses to do configuration + * rather than using PCI space as other controllers do. The double inw + * on the error register activates configuration mode. We can then write + * the control register + */ + +static void opti_write_reg(struct ata_port *ap, u8 val, int reg) +{ + unsigned long regio = ap->ioaddr.cmd_addr; + + /* These 3 unlock the control register access */ + inw(regio + 1); + inw(regio + 1); + outb(3, regio + 2); + + /* Do the I/O */ + outb(val, regio + reg); + + /* Relock */ + outb(0x83, regio + 2); +} + +#if 0 +/** + * opti_read_reg - control register read + * @ap: ATA port + * @reg: control register number + * + * The Opti uses magic 'trapdoor' register accesses to do configuration + * rather than using PCI space as other controllers do. The double inw + * on the error register activates configuration mode. We can then read + * the control register + */ + +static u8 opti_read_reg(struct ata_port *ap, int reg) +{ + unsigned long regio = ap->ioaddr.cmd_addr; + u8 ret; + inw(regio + 1); + inw(regio + 1); + outb(3, regio + 2); + ret = inb(regio + reg); + outb(0x83, regio + 2); +} +#endif + +/** + * opti_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. Timing numbers are taken from + * the FreeBSD driver then pre computed to keep the code clean. There + * are two tables depending on the hardware clock speed. + */ + +static void opti_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct ata_device *pair = ata_dev_pair(adev); + int clock; + int pio = adev->pio_mode - XFER_PIO_0; + unsigned long regio = ap->ioaddr.cmd_addr; + u8 addr; + + /* Address table precomputed with prefetch off and a DCLK of 2 */ + static const u8 addr_timing[2][5] = { + { 0x30, 0x20, 0x20, 0x10, 0x10 }, + { 0x20, 0x20, 0x10, 0x10, 0x10 } + }; + static const u8 data_rec_timing[2][5] = { + { 0x6B, 0x56, 0x42, 0x32, 0x31 }, + { 0x58, 0x44, 0x32, 0x22, 0x21 } + }; + + outb(0xff, regio + 5); + clock = inw(regio + 5) & 1; + + /* + * As with many controllers the address setup time is shared + * and must suit both devices if present. + */ + + addr = addr_timing[clock][pio]; + if (pair) { + /* Hardware constraint */ + u8 pair_addr = addr_timing[clock][pair->pio_mode - XFER_PIO_0]; + if (pair_addr > addr) + addr = pair_addr; + } + + /* Commence primary programming sequence */ + opti_write_reg(ap, adev->devno, MISC_REG); + opti_write_reg(ap, data_rec_timing[clock][pio], READ_REG); + opti_write_reg(ap, data_rec_timing[clock][pio], WRITE_REG); + opti_write_reg(ap, addr, MISC_REG); + + /* Programming sequence complete, override strapping */ + opti_write_reg(ap, 0x85, CNTRL_REG); +} + +static struct scsi_host_template opti_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations opti_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = opti_set_piomode, +/* .set_dmamode = opti_set_dmamode, */ + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = opti_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int opti_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &opti_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .port_ops = &opti_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + static int printed_version; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &dev->dev, "version " DRV_VERSION "\n"); + + return ata_pci_init_one(dev, port_info, 2); +} + +static const struct pci_device_id opti[] = { + { PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1}, + { 0, }, +}; + +static struct pci_driver opti_pci_driver = { + .name = DRV_NAME, + .id_table = opti, + .probe = opti_init_one, + .remove = ata_pci_remove_one +}; + +static int __init opti_init(void) +{ + return pci_register_driver(&opti_pci_driver); +} + + +static void __exit opti_exit(void) +{ + pci_unregister_driver(&opti_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Opti 621/621X"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, opti); +MODULE_VERSION(DRV_VERSION); + +module_init(opti_init); +module_exit(opti_exit); diff --git a/drivers/ata/pata_optidma.c b/drivers/ata/pata_optidma.c new file mode 100644 index 000000000000..800aea7b9444 --- /dev/null +++ b/drivers/ata/pata_optidma.c @@ -0,0 +1,547 @@ +/* + * pata_optidma.c - Opti DMA PATA for new ATA layer + * (C) 2006 Red Hat Inc + * Alan Cox + * + * The Opti DMA controllers are related to the older PIO PCI controllers + * and indeed the VLB ones. The main differences are that the timing + * numbers are now based off PCI clocks not VLB and differ, and that + * MWDMA is supported. + * + * This driver should support Viper-N+, FireStar, FireStar Plus. + * + * These devices support virtual DMA for read (aka the CS5520). Later + * chips support UDMA33, but only if the rest of the board logic does, + * so you have to get this right. We don't support the virtual DMA + * but we do handle UDMA. + * + * Bits that are worth knowing + * Most control registers are shadowed into I/O registers + * 0x1F5 bit 0 tells you if the PCI/VLB clock is 33 or 25Mhz + * Virtual DMA registers *move* between rev 0x02 and rev 0x10 + * UDMA requires a 66MHz FSB + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_optidma" +#define DRV_VERSION "0.2.1" + +enum { + READ_REG = 0, /* index of Read cycle timing register */ + WRITE_REG = 1, /* index of Write cycle timing register */ + CNTRL_REG = 3, /* index of Control register */ + STRAP_REG = 5, /* index of Strap register */ + MISC_REG = 6 /* index of Miscellaneous register */ +}; + +static int pci_clock; /* 0 = 33 1 = 25 */ + +/** + * optidma_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int optidma_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static const struct pci_bits optidma_enable_bits = { + 0x40, 1, 0x08, 0x00 + }; + + if (ap->port_no && !pci_test_config_bits(pdev, &optidma_enable_bits)) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * optidma_probe_reset - probe reset + * @ap: ATA port + * + * Perform the ATA probe and bus reset sequence plus specific handling + * for this hardware. The Opti needs little handling - we have no UDMA66 + * capability that needs cable detection. All we must do is check the port + * is enabled. + */ + +static void optidma_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, optidma_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * optidma_unlock - unlock control registers + * @ap: ATA port + * + * Unlock the control register block for this adapter. Registers must not + * be unlocked in a situation where libata might look at them. + */ + +static void optidma_unlock(struct ata_port *ap) +{ + unsigned long regio = ap->ioaddr.cmd_addr; + + /* These 3 unlock the control register access */ + inw(regio + 1); + inw(regio + 1); + outb(3, regio + 2); +} + +/** + * optidma_lock - issue temporary relock + * @ap: ATA port + * + * Re-lock the configuration register settings. + */ + +static void optidma_lock(struct ata_port *ap) +{ + unsigned long regio = ap->ioaddr.cmd_addr; + + /* Relock */ + outb(0x83, regio + 2); +} + +/** + * optidma_set_mode - set mode data + * @ap: ATA interface + * @adev: ATA device + * @mode: Mode to set + * + * Called to do the DMA or PIO mode setup. Timing numbers are all + * pre computed to keep the code clean. There are two tables depending + * on the hardware clock speed. + * + * WARNING: While we do this the IDE registers vanish. If we take an + * IRQ here we depend on the host set locking to avoid catastrophe. + */ + +static void optidma_set_mode(struct ata_port *ap, struct ata_device *adev, u8 mode) +{ + struct ata_device *pair = ata_dev_pair(adev); + int pio = adev->pio_mode - XFER_PIO_0; + int dma = adev->dma_mode - XFER_MW_DMA_0; + unsigned long regio = ap->ioaddr.cmd_addr; + u8 addr; + + /* Address table precomputed with a DCLK of 2 */ + static const u8 addr_timing[2][5] = { + { 0x30, 0x20, 0x20, 0x10, 0x10 }, + { 0x20, 0x20, 0x10, 0x10, 0x10 } + }; + static const u8 data_rec_timing[2][5] = { + { 0x59, 0x46, 0x30, 0x20, 0x20 }, + { 0x46, 0x32, 0x20, 0x20, 0x10 } + }; + static const u8 dma_data_rec_timing[2][3] = { + { 0x76, 0x20, 0x20 }, + { 0x54, 0x20, 0x10 } + }; + + /* Switch from IDE to control mode */ + optidma_unlock(ap); + + + /* + * As with many controllers the address setup time is shared + * and must suit both devices if present. FIXME: Check if we + * need to look at slowest of PIO/DMA mode of either device + */ + + if (mode >= XFER_MW_DMA_0) + addr = 0; + else + addr = addr_timing[pci_clock][pio]; + + if (pair) { + u8 pair_addr; + /* Hardware constraint */ + if (pair->dma_mode) + pair_addr = 0; + else + pair_addr = addr_timing[pci_clock][pair->pio_mode - XFER_PIO_0]; + if (pair_addr > addr) + addr = pair_addr; + } + + /* Commence primary programming sequence */ + /* First we load the device number into the timing select */ + outb(adev->devno, regio + MISC_REG); + /* Now we load the data timings into read data/write data */ + if (mode < XFER_MW_DMA_0) { + outb(data_rec_timing[pci_clock][pio], regio + READ_REG); + outb(data_rec_timing[pci_clock][pio], regio + WRITE_REG); + } else if (mode < XFER_UDMA_0) { + outb(dma_data_rec_timing[pci_clock][dma], regio + READ_REG); + outb(dma_data_rec_timing[pci_clock][dma], regio + WRITE_REG); + } + /* Finally we load the address setup into the misc register */ + outb(addr | adev->devno, regio + MISC_REG); + + /* Programming sequence complete, timing 0 dev 0, timing 1 dev 1 */ + outb(0x85, regio + CNTRL_REG); + + /* Switch back to IDE mode */ + optidma_lock(ap); + + /* Note: at this point our programming is incomplete. We are + not supposed to program PCI 0x43 "things we hacked onto the chip" + until we've done both sets of PIO/DMA timings */ +} + +/** + * optiplus_set_mode - DMA setup for Firestar Plus + * @ap: ATA port + * @adev: device + * @mode: desired mode + * + * The Firestar plus has additional UDMA functionality for UDMA0-2 and + * requires we do some additional work. Because the base work we must do + * is mostly shared we wrap the Firestar setup functionality in this + * one + */ + +static void optiplus_set_mode(struct ata_port *ap, struct ata_device *adev, u8 mode) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 udcfg; + u8 udslave; + int dev2 = 2 * adev->devno; + int unit = 2 * ap->port_no + adev->devno; + int udma = mode - XFER_UDMA_0; + + pci_read_config_byte(pdev, 0x44, &udcfg); + if (mode <= XFER_UDMA_0) { + udcfg &= ~(1 << unit); + optidma_set_mode(ap, adev, adev->dma_mode); + } else { + udcfg |= (1 << unit); + if (ap->port_no) { + pci_read_config_byte(pdev, 0x45, &udslave); + udslave &= ~(0x03 << dev2); + udslave |= (udma << dev2); + pci_write_config_byte(pdev, 0x45, udslave); + } else { + udcfg &= ~(0x30 << dev2); + udcfg |= (udma << dev2); + } + } + pci_write_config_byte(pdev, 0x44, udcfg); +} + +/** + * optidma_set_pio_mode - PIO setup callback + * @ap: ATA port + * @adev: Device + * + * The libata core provides separate functions for handling PIO and + * DMA programming. The architecture of the Firestar makes it easier + * for us to have a common function so we provide wrappers + */ + +static void optidma_set_pio_mode(struct ata_port *ap, struct ata_device *adev) +{ + optidma_set_mode(ap, adev, adev->pio_mode); +} + +/** + * optidma_set_dma_mode - DMA setup callback + * @ap: ATA port + * @adev: Device + * + * The libata core provides separate functions for handling PIO and + * DMA programming. The architecture of the Firestar makes it easier + * for us to have a common function so we provide wrappers + */ + +static void optidma_set_dma_mode(struct ata_port *ap, struct ata_device *adev) +{ + optidma_set_mode(ap, adev, adev->dma_mode); +} + +/** + * optiplus_set_pio_mode - PIO setup callback + * @ap: ATA port + * @adev: Device + * + * The libata core provides separate functions for handling PIO and + * DMA programming. The architecture of the Firestar makes it easier + * for us to have a common function so we provide wrappers + */ + +static void optiplus_set_pio_mode(struct ata_port *ap, struct ata_device *adev) +{ + optiplus_set_mode(ap, adev, adev->pio_mode); +} + +/** + * optiplus_set_dma_mode - DMA setup callback + * @ap: ATA port + * @adev: Device + * + * The libata core provides separate functions for handling PIO and + * DMA programming. The architecture of the Firestar makes it easier + * for us to have a common function so we provide wrappers + */ + +static void optiplus_set_dma_mode(struct ata_port *ap, struct ata_device *adev) +{ + optiplus_set_mode(ap, adev, adev->dma_mode); +} + +/** + * optidma_make_bits - PCI setup helper + * @adev: ATA device + * + * Turn the ATA device setup into PCI configuration bits + * for register 0x43 and return the two bits needed. + */ + +static u8 optidma_make_bits43(struct ata_device *adev) +{ + static const u8 bits43[5] = { + 0, 0, 0, 1, 2 + }; + if (!ata_dev_enabled(adev)) + return 0; + if (adev->dma_mode) + return adev->dma_mode - XFER_MW_DMA_0; + return bits43[adev->pio_mode - XFER_PIO_0]; +} + +/** + * optidma_post_set_mode - finalize PCI setup + * @ap: port to set up + * + * Finalise the configuration by writing the nibble of extra bits + * of data into the chip. + */ + +static void optidma_post_set_mode(struct ata_port *ap) +{ + u8 r; + int nybble = 4 * ap->port_no; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_byte(pdev, 0x43, &r); + + r &= (0x0F << nybble); + r |= (optidma_make_bits43(&ap->device[0]) + + (optidma_make_bits43(&ap->device[0]) << 2)) << nybble; + + pci_write_config_byte(pdev, 0x43, r); +} + +static struct scsi_host_template optidma_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations optidma_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = optidma_set_pio_mode, + .set_dmamode = optidma_set_dma_mode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + .error_handler = optidma_error_handler, + .post_set_mode = optidma_post_set_mode, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations optiplus_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = optiplus_set_pio_mode, + .set_dmamode = optiplus_set_dma_mode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + .error_handler = optidma_error_handler, + .post_set_mode = optidma_post_set_mode, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * optiplus_with_udma - Look for UDMA capable setup + * @pdev; ATA controller + */ + +static int optiplus_with_udma(struct pci_dev *pdev) +{ + u8 r; + int ret = 0; + int ioport = 0x22; + struct pci_dev *dev1; + + /* Find function 1 */ + dev1 = pci_get_device(0x1045, 0xC701, NULL); + if(dev1 == NULL) + return 0; + + /* Rev must be >= 0x10 */ + pci_read_config_byte(dev1, 0x08, &r); + if (r < 0x10) + goto done_nomsg; + /* Read the chipset system configuration to check our mode */ + pci_read_config_byte(dev1, 0x5F, &r); + ioport |= (r << 8); + outb(0x10, ioport); + /* Must be 66Mhz sync */ + if ((inb(ioport + 2) & 1) == 0) + goto done; + + /* Check the ATA arbitration/timing is suitable */ + pci_read_config_byte(pdev, 0x42, &r); + if ((r & 0x36) != 0x36) + goto done; + pci_read_config_byte(dev1, 0x52, &r); + if (r & 0x80) /* IDEDIR disabled */ + ret = 1; +done: + printk(KERN_WARNING "UDMA not supported in this configuration.\n"); +done_nomsg: /* Wrong chip revision */ + pci_dev_put(dev1); + return ret; +} + +static int optidma_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info_82c700 = { + .sht = &optidma_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &optidma_port_ops + }; + static struct ata_port_info info_82c700_udma = { + .sht = &optidma_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x07, + .port_ops = &optiplus_port_ops + }; + static struct ata_port_info *port_info[2]; + struct ata_port_info *info = &info_82c700; + static int printed_version; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &dev->dev, "version " DRV_VERSION "\n"); + + /* Fixed location chipset magic */ + inw(0x1F1); + inw(0x1F1); + pci_clock = inb(0x1F5) & 1; /* 0 = 33Mhz, 1 = 25Mhz */ + + if (optiplus_with_udma(dev)) + info = &info_82c700_udma; + + port_info[0] = port_info[1] = info; + return ata_pci_init_one(dev, port_info, 2); +} + +static const struct pci_device_id optidma[] = { + { PCI_DEVICE(0x1045, 0xD568), }, /* Opti 82C700 */ + { 0, }, +}; + +static struct pci_driver optidma_pci_driver = { + .name = DRV_NAME, + .id_table = optidma, + .probe = optidma_init_one, + .remove = ata_pci_remove_one +}; + +static int __init optidma_init(void) +{ + return pci_register_driver(&optidma_pci_driver); +} + + +static void __exit optidma_exit(void) +{ + pci_unregister_driver(&optidma_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Opti Firestar/Firestar Plus"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, optidma); +MODULE_VERSION(DRV_VERSION); + +module_init(optidma_init); +module_exit(optidma_exit); diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c new file mode 100644 index 000000000000..2abe0a3bbb92 --- /dev/null +++ b/drivers/ata/pata_pcmcia.c @@ -0,0 +1,393 @@ +/* + * pata_pcmcia.c - PCMCIA PATA controller driver. + * Copyright 2005-2006 Red Hat Inc , all rights reserved. + * PCMCIA ident update Copyright 2006 Marcin Juszkiewicz + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Heavily based upon ide-cs.c + * The initial developer of the original code is David A. Hinds + * . Portions created by David A. Hinds + * are Copyright (C) 1999 David A. Hinds. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +#define DRV_NAME "pata_pcmcia" +#define DRV_VERSION "0.2.9" + +/* + * Private data structure to glue stuff together + */ + +struct ata_pcmcia_info { + struct pcmcia_device *pdev; + int ndev; + dev_node_t node; +}; + +static struct scsi_host_template pcmcia_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations pcmcia_port_ops = { + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ata_bmdma_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer_noirq, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +#define CS_CHECK(fn, ret) \ +do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) + +/** + * pcmcia_init_one - attach a PCMCIA interface + * @pdev: pcmcia device + * + * Register a PCMCIA IDE interface. Such interfaces are PIO 0 and + * shared IRQ. + */ + +static int pcmcia_init_one(struct pcmcia_device *pdev) +{ + struct ata_probe_ent ae; + struct ata_pcmcia_info *info; + tuple_t tuple; + struct { + unsigned short buf[128]; + cisparse_t parse; + config_info_t conf; + cistpl_cftable_entry_t dflt; + } *stk = NULL; + cistpl_cftable_entry_t *cfg; + int pass, last_ret = 0, last_fn = 0, is_kme = 0, ret = -ENOMEM; + unsigned long io_base, ctl_base; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return -ENOMEM; + + /* Glue stuff together. FIXME: We may be able to get rid of info with care */ + info->pdev = pdev; + pdev->priv = info; + + /* Set up attributes in order to probe card and get resources */ + pdev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + pdev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; + pdev->io.IOAddrLines = 3; + pdev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; + pdev->irq.IRQInfo1 = IRQ_LEVEL_ID; + pdev->conf.Attributes = CONF_ENABLE_IRQ; + pdev->conf.IntType = INT_MEMORY_AND_IO; + + /* Allocate resoure probing structures */ + + stk = kzalloc(sizeof(*stk), GFP_KERNEL); + if (!stk) + goto out1; + + cfg = &stk->parse.cftable_entry; + + /* Tuples we are walking */ + tuple.TupleData = (cisdata_t *)&stk->buf; + tuple.TupleOffset = 0; + tuple.TupleDataMax = 255; + tuple.Attributes = 0; + tuple.DesiredTuple = CISTPL_CONFIG; + + CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(pdev, &tuple)); + CS_CHECK(GetTupleData, pcmcia_get_tuple_data(pdev, &tuple)); + CS_CHECK(ParseTuple, pcmcia_parse_tuple(pdev, &tuple, &stk->parse)); + pdev->conf.ConfigBase = stk->parse.config.base; + pdev->conf.Present = stk->parse.config.rmask[0]; + + /* See if we have a manufacturer identifier. Use it to set is_kme for + vendor quirks */ + tuple.DesiredTuple = CISTPL_MANFID; + if (!pcmcia_get_first_tuple(pdev, &tuple) && !pcmcia_get_tuple_data(pdev, &tuple) && !pcmcia_parse_tuple(pdev, &tuple, &stk->parse)) + is_kme = ((stk->parse.manfid.manf == MANFID_KME) && ((stk->parse.manfid.card == PRODID_KME_KXLC005_A) || (stk->parse.manfid.card == PRODID_KME_KXLC005_B))); + + /* Not sure if this is right... look up the current Vcc */ + CS_CHECK(GetConfigurationInfo, pcmcia_get_configuration_info(pdev, &stk->conf)); +/* link->conf.Vcc = stk->conf.Vcc; */ + + pass = io_base = ctl_base = 0; + tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; + tuple.Attributes = 0; + CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(pdev, &tuple)); + + /* Now munch the resources looking for a suitable set */ + while (1) { + if (pcmcia_get_tuple_data(pdev, &tuple) != 0) + goto next_entry; + if (pcmcia_parse_tuple(pdev, &tuple, &stk->parse) != 0) + goto next_entry; + /* Check for matching Vcc, unless we're desperate */ + if (!pass) { + if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) { + if (stk->conf.Vcc != cfg->vcc.param[CISTPL_POWER_VNOM] / 10000) + goto next_entry; + } else if (stk->dflt.vcc.present & (1 << CISTPL_POWER_VNOM)) { + if (stk->conf.Vcc != stk->dflt.vcc.param[CISTPL_POWER_VNOM] / 10000) + goto next_entry; + } + } + + if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM)) + pdev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000; + else if (stk->dflt.vpp1.present & (1 << CISTPL_POWER_VNOM)) + pdev->conf.Vpp = stk->dflt.vpp1.param[CISTPL_POWER_VNOM] / 10000; + + if ((cfg->io.nwin > 0) || (stk->dflt.io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &stk->dflt.io; + pdev->conf.ConfigIndex = cfg->index; + pdev->io.BasePort1 = io->win[0].base; + pdev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + if (!(io->flags & CISTPL_IO_16BIT)) + pdev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + if (io->nwin == 2) { + pdev->io.NumPorts1 = 8; + pdev->io.BasePort2 = io->win[1].base; + pdev->io.NumPorts2 = (is_kme) ? 2 : 1; + if (pcmcia_request_io(pdev, &pdev->io) != 0) + goto next_entry; + io_base = pdev->io.BasePort1; + ctl_base = pdev->io.BasePort2; + } else if ((io->nwin == 1) && (io->win[0].len >= 16)) { + pdev->io.NumPorts1 = io->win[0].len; + pdev->io.NumPorts2 = 0; + if (pcmcia_request_io(pdev, &pdev->io) != 0) + goto next_entry; + io_base = pdev->io.BasePort1; + ctl_base = pdev->io.BasePort1 + 0x0e; + } else goto next_entry; + /* If we've got this far, we're done */ + break; + } +next_entry: + if (cfg->flags & CISTPL_CFTABLE_DEFAULT) + memcpy(&stk->dflt, cfg, sizeof(stk->dflt)); + if (pass) { + CS_CHECK(GetNextTuple, pcmcia_get_next_tuple(pdev, &tuple)); + } else if (pcmcia_get_next_tuple(pdev, &tuple) != 0) { + CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(pdev, &tuple)); + memset(&stk->dflt, 0, sizeof(stk->dflt)); + pass++; + } + } + + CS_CHECK(RequestIRQ, pcmcia_request_irq(pdev, &pdev->irq)); + CS_CHECK(RequestConfiguration, pcmcia_request_configuration(pdev, &pdev->conf)); + + /* Success. Disable the IRQ nIEN line, do quirks */ + outb(0x02, ctl_base); + if (is_kme) + outb(0x81, ctl_base + 0x01); + + /* FIXME: Could be more ports at base + 0x10 but we only deal with + one right now */ + if (pdev->io.NumPorts1 >= 0x20) + printk(KERN_WARNING DRV_NAME ": second channel not yet supported.\n"); + + /* + * Having done the PCMCIA plumbing the ATA side is relatively + * sane. + */ + + memset(&ae, 0, sizeof(struct ata_probe_ent)); + INIT_LIST_HEAD(&ae.node); + ae.dev = &pdev->dev; + ae.port_ops = &pcmcia_port_ops; + ae.sht = &pcmcia_sht; + ae.n_ports = 1; + ae.pio_mask = 1; /* ISA so PIO 0 cycles */ + ae.irq = pdev->irq.AssignedIRQ; + ae.irq_flags = SA_SHIRQ; + ae.port_flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST; + ae.port[0].cmd_addr = io_base; + ae.port[0].altstatus_addr = ctl_base; + ae.port[0].ctl_addr = ctl_base; + ata_std_ports(&ae.port[0]); + + if (ata_device_add(&ae) == 0) + goto failed; + + info->ndev = 1; + kfree(stk); + return 0; + +cs_failed: + cs_error(pdev, last_fn, last_ret); +failed: + kfree(stk); + info->ndev = 0; + pcmcia_disable_device(pdev); +out1: + kfree(info); + return ret; +} + +/** + * pcmcia_remove_one - unplug an pcmcia interface + * @pdev: pcmcia device + * + * A PCMCIA ATA device has been unplugged. Perform the needed + * cleanup. Also called on module unload for any active devices. + */ + +static void pcmcia_remove_one(struct pcmcia_device *pdev) +{ + struct ata_pcmcia_info *info = pdev->priv; + struct device *dev = &pdev->dev; + + if (info != NULL) { + /* If we have attached the device to the ATA layer, detach it */ + if (info->ndev) { + struct ata_host *host = dev_get_drvdata(dev); + ata_host_remove(host); + dev_set_drvdata(dev, NULL); + } + info->ndev = 0; + pdev->priv = NULL; + } + pcmcia_disable_device(pdev); + kfree(info); +} + +static struct pcmcia_device_id pcmcia_devices[] = { + PCMCIA_DEVICE_FUNC_ID(4), + PCMCIA_DEVICE_MANF_CARD(0x0007, 0x0000), /* Hitachi */ + PCMCIA_DEVICE_MANF_CARD(0x0032, 0x0704), + PCMCIA_DEVICE_MANF_CARD(0x0045, 0x0401), + PCMCIA_DEVICE_MANF_CARD(0x0098, 0x0000), /* Toshiba */ + PCMCIA_DEVICE_MANF_CARD(0x00a4, 0x002d), + PCMCIA_DEVICE_MANF_CARD(0x00ce, 0x0000), /* Samsung */ + PCMCIA_DEVICE_MANF_CARD(0x0319, 0x0000), /* Hitachi */ + PCMCIA_DEVICE_MANF_CARD(0x2080, 0x0001), + PCMCIA_DEVICE_MANF_CARD(0x4e01, 0x0200), /* Lexar */ + PCMCIA_DEVICE_PROD_ID123("Caravelle", "PSC-IDE ", "PSC000", 0x8c36137c, 0xd0693ab8, 0x2768a9f0), + PCMCIA_DEVICE_PROD_ID123("CDROM", "IDE", "MCD-601p", 0x1b9179ca, 0xede88951, 0x0d902f74), + PCMCIA_DEVICE_PROD_ID123("PCMCIA", "IDE CARD", "F1", 0x281f1c5d, 0x1907960c, 0xf7fde8b9), + PCMCIA_DEVICE_PROD_ID12("ARGOSY", "CD-ROM", 0x78f308dc, 0x66536591), + PCMCIA_DEVICE_PROD_ID12("ARGOSY", "PnPIDE", 0x78f308dc, 0x0c694728), + PCMCIA_DEVICE_PROD_ID12("CNF CD-M", "CD-ROM", 0x7d93b852, 0x66536591), + PCMCIA_DEVICE_PROD_ID12("Creative Technology Ltd.", "PCMCIA CD-ROM Interface Card", 0xff8c8a45, 0xfe8020c4), + PCMCIA_DEVICE_PROD_ID12("Digital Equipment Corporation.", "Digital Mobile Media CD-ROM", 0x17692a66, 0xef1dcbde), + PCMCIA_DEVICE_PROD_ID12("EXP", "CD+GAME", 0x6f58c983, 0x63c13aaf), + PCMCIA_DEVICE_PROD_ID12("EXP ", "CD-ROM", 0x0a5c52fd, 0x66536591), + PCMCIA_DEVICE_PROD_ID12("EXP ", "PnPIDE", 0x0a5c52fd, 0x0c694728), + PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e), + PCMCIA_DEVICE_PROD_ID12("HITACHI", "FLASH", 0xf4f43949, 0x9eb86aae), + PCMCIA_DEVICE_PROD_ID12("HITACHI", "microdrive", 0xf4f43949, 0xa6d76178), + PCMCIA_DEVICE_PROD_ID12("IBM", "microdrive", 0xb569a6e5, 0xa6d76178), + PCMCIA_DEVICE_PROD_ID12("IBM", "IBM17JSSFP20", 0xb569a6e5, 0xf2508753), + PCMCIA_DEVICE_PROD_ID12("IO DATA", "CBIDE2 ", 0x547e66dc, 0x8671043b), + PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDE", 0x547e66dc, 0x5c5ab149), + PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDEII", 0x547e66dc, 0xb3662674), + PCMCIA_DEVICE_PROD_ID12("LOOKMEET", "CBIDE2 ", 0xe37be2b5, 0x8671043b), + PCMCIA_DEVICE_PROD_ID12("M-Systems", "CF500", 0x7ed2ad87, 0x7a13045c), + PCMCIA_DEVICE_PROD_ID2("NinjaATA-", 0xebe0bd79), + PCMCIA_DEVICE_PROD_ID12("PCMCIA", "CD-ROM", 0x281f1c5d, 0x66536591), + PCMCIA_DEVICE_PROD_ID12("PCMCIA", "PnPIDE", 0x281f1c5d, 0x0c694728), + PCMCIA_DEVICE_PROD_ID12("SHUTTLE TECHNOLOGY LTD.", "PCCARD-IDE/ATAPI Adapter", 0x4a3f0ba0, 0x322560e1), + PCMCIA_DEVICE_PROD_ID12("SEAGATE", "ST1", 0x87c1b330, 0xe1f30883), + PCMCIA_DEVICE_PROD_ID12("SAMSUNG", "04/05/06", 0x43d74cb4, 0x6a22777d), + PCMCIA_DEVICE_PROD_ID12("SMI VENDOR", "SMI PRODUCT", 0x30896c92, 0x703cc5f6), + PCMCIA_DEVICE_PROD_ID12("TOSHIBA", "MK2001MPL", 0xb4585a1a, 0x3489e003), + PCMCIA_DEVICE_PROD_ID12("WIT", "IDE16", 0x244e5994, 0x3e232852), + PCMCIA_DEVICE_PROD_ID1("STI Flash", 0xe4a13209), + PCMCIA_DEVICE_PROD_ID12("STI", "Flash 5.0", 0xbf2df18d, 0x8cb57a0e), + PCMCIA_MFC_DEVICE_PROD_ID12(1, "SanDisk", "ConnectPlus", 0x7a954bd9, 0x74be00c6), + PCMCIA_DEVICE_NULL, +}; + +MODULE_DEVICE_TABLE(pcmcia, pcmcia_devices); + +static struct pcmcia_driver pcmcia_driver = { + .owner = THIS_MODULE, + .drv = { + .name = DRV_NAME, + }, + .id_table = pcmcia_devices, + .probe = pcmcia_init_one, + .remove = pcmcia_remove_one, +}; + +static int __init pcmcia_init(void) +{ + return pcmcia_register_driver(&pcmcia_driver); +} + +static void __exit pcmcia_exit(void) +{ + pcmcia_unregister_driver(&pcmcia_driver); +} + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for PCMCIA ATA"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +module_init(pcmcia_init); +module_exit(pcmcia_exit); diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c new file mode 100644 index 000000000000..56b8c1ee2937 --- /dev/null +++ b/drivers/ata/pata_pdc2027x.c @@ -0,0 +1,869 @@ +/* + * Promise PATA TX2/TX4/TX2000/133 IDE driver for pdc20268 to pdc20277. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Ported to libata by: + * Albert Lee IBM Corporation + * + * Copyright (C) 1998-2002 Andre Hedrick + * Portions Copyright (C) 1999 Promise Technology, Inc. + * + * Author: Frank Tiernan (frankt@promise.com) + * Released under terms of General Public License + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware information only available under NDA. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_pdc2027x" +#define DRV_VERSION "0.74-ac3" +#undef PDC_DEBUG + +#ifdef PDC_DEBUG +#define PDPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args) +#else +#define PDPRINTK(fmt, args...) +#endif + +enum { + PDC_UDMA_100 = 0, + PDC_UDMA_133 = 1, + + PDC_100_MHZ = 100000000, + PDC_133_MHZ = 133333333, + + PDC_SYS_CTL = 0x1100, + PDC_ATA_CTL = 0x1104, + PDC_GLOBAL_CTL = 0x1108, + PDC_CTCR0 = 0x110C, + PDC_CTCR1 = 0x1110, + PDC_BYTE_COUNT = 0x1120, + PDC_PLL_CTL = 0x1202, +}; + +static int pdc2027x_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); +static void pdc2027x_remove_one(struct pci_dev *pdev); +static void pdc2027x_error_handler(struct ata_port *ap); +static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev); +static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev); +static void pdc2027x_post_set_mode(struct ata_port *ap); +static int pdc2027x_check_atapi_dma(struct ata_queued_cmd *qc); + +/* + * ATA Timing Tables based on 133MHz controller clock. + * These tables are only used when the controller is in 133MHz clock. + * If the controller is in 100MHz clock, the ASIC hardware will + * set the timing registers automatically when "set feature" command + * is issued to the device. However, if the controller clock is 133MHz, + * the following tables must be used. + */ +static struct pdc2027x_pio_timing { + u8 value0, value1, value2; +} pdc2027x_pio_timing_tbl [] = { + { 0xfb, 0x2b, 0xac }, /* PIO mode 0 */ + { 0x46, 0x29, 0xa4 }, /* PIO mode 1 */ + { 0x23, 0x26, 0x64 }, /* PIO mode 2 */ + { 0x27, 0x0d, 0x35 }, /* PIO mode 3, IORDY on, Prefetch off */ + { 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */ +}; + +static struct pdc2027x_mdma_timing { + u8 value0, value1; +} pdc2027x_mdma_timing_tbl [] = { + { 0xdf, 0x5f }, /* MDMA mode 0 */ + { 0x6b, 0x27 }, /* MDMA mode 1 */ + { 0x69, 0x25 }, /* MDMA mode 2 */ +}; + +static struct pdc2027x_udma_timing { + u8 value0, value1, value2; +} pdc2027x_udma_timing_tbl [] = { + { 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */ + { 0x3a, 0x0a, 0xd0 }, /* UDMA mode 1 */ + { 0x2a, 0x07, 0xcd }, /* UDMA mode 2 */ + { 0x1a, 0x05, 0xcd }, /* UDMA mode 3 */ + { 0x1a, 0x03, 0xcd }, /* UDMA mode 4 */ + { 0x1a, 0x02, 0xcb }, /* UDMA mode 5 */ + { 0x1a, 0x01, 0xcb }, /* UDMA mode 6 */ +}; + +static const struct pci_device_id pdc2027x_pci_tbl[] = { + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_100 }, + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20269, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_133 }, + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20270, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_100 }, + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20271, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_133 }, + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20275, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_133 }, + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20276, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_133 }, + { PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20277, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PDC_UDMA_133 }, + { } /* terminate list */ +}; + +static struct pci_driver pdc2027x_pci_driver = { + .name = DRV_NAME, + .id_table = pdc2027x_pci_tbl, + .probe = pdc2027x_init_one, + .remove = __devexit_p(pdc2027x_remove_one), +}; + +static struct scsi_host_template pdc2027x_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations pdc2027x_pata100_ops = { + .port_disable = ata_port_disable, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .check_atapi_dma = pdc2027x_check_atapi_dma, + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_mmio_data_xfer, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = pdc2027x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_pci_host_stop, +}; + +static struct ata_port_operations pdc2027x_pata133_ops = { + .port_disable = ata_port_disable, + .set_piomode = pdc2027x_set_piomode, + .set_dmamode = pdc2027x_set_dmamode, + .post_set_mode = pdc2027x_post_set_mode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .check_atapi_dma = pdc2027x_check_atapi_dma, + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_mmio_data_xfer, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = pdc2027x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_pci_host_stop, +}; + +static struct ata_port_info pdc2027x_port_info[] = { + /* PDC_UDMA_100 */ + { + .sht = &pdc2027x_sht, + .flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SLAVE_POSS | + ATA_FLAG_MMIO, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = ATA_UDMA5, /* udma0-5 */ + .port_ops = &pdc2027x_pata100_ops, + }, + /* PDC_UDMA_133 */ + { + .sht = &pdc2027x_sht, + .flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SLAVE_POSS | + ATA_FLAG_MMIO, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = ATA_UDMA6, /* udma0-6 */ + .port_ops = &pdc2027x_pata133_ops, + }, +}; + +MODULE_AUTHOR("Andre Hedrick, Frank Tiernan, Albert Lee"); +MODULE_DESCRIPTION("libata driver module for Promise PDC20268 to PDC20277"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, pdc2027x_pci_tbl); + +/** + * port_mmio - Get the MMIO address of PDC2027x extended registers + * @ap: Port + * @offset: offset from mmio base + */ +static inline void* port_mmio(struct ata_port *ap, unsigned int offset) +{ + return ap->host->mmio_base + ap->port_no * 0x100 + offset; +} + +/** + * dev_mmio - Get the MMIO address of PDC2027x extended registers + * @ap: Port + * @adev: device + * @offset: offset from mmio base + */ +static inline void* dev_mmio(struct ata_port *ap, struct ata_device *adev, unsigned int offset) +{ + u8 adj = (adev->devno) ? 0x08 : 0x00; + return port_mmio(ap, offset) + adj; +} + +/** + * pdc2027x_pata_cbl_detect - Probe host controller cable detect info + * @ap: Port for which cable detect info is desired + * + * Read 80c cable indicator from Promise extended register. + * This register is latched when the system is reset. + * + * LOCKING: + * None (inherited from caller). + */ +static void pdc2027x_cbl_detect(struct ata_port *ap) +{ + u32 cgcr; + + /* check cable detect results */ + cgcr = readl(port_mmio(ap, PDC_GLOBAL_CTL)); + if (cgcr & (1 << 26)) + goto cbl40; + + PDPRINTK("No cable or 80-conductor cable on port %d\n", ap->port_no); + + ap->cbl = ATA_CBL_PATA80; + return; + +cbl40: + printk(KERN_INFO DRV_NAME ": 40-conductor cable detected on port %d\n", ap->port_no); + ap->cbl = ATA_CBL_PATA40; + ap->udma_mask &= ATA_UDMA_MASK_40C; +} + +/** + * pdc2027x_port_enabled - Check PDC ATA control register to see whether the port is enabled. + * @ap: Port to check + */ +static inline int pdc2027x_port_enabled(struct ata_port *ap) +{ + return readb(port_mmio(ap, PDC_ATA_CTL)) & 0x02; +} + +/** + * pdc2027x_prereset - prereset for PATA host controller + * @ap: Target port + * + * Probeinit including cable detection. + * + * LOCKING: + * None (inherited from caller). + */ + +static int pdc2027x_prereset(struct ata_port *ap) +{ + /* Check whether port enabled */ + if (!pdc2027x_port_enabled(ap)) { + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + pdc2027x_cbl_detect(ap); + return ata_std_prereset(ap); +} + +/** + * pdc2027x_error_handler - Perform reset on PATA port and classify + * @ap: Port to reset + * + * Reset PATA phy and classify attached devices. + * + * LOCKING: + * None (inherited from caller). + */ + +static void pdc2027x_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, pdc2027x_prereset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * pdc2027x_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port to configure + * @adev: um + * @pio: PIO mode, 0 - 4 + * + * Set PIO mode for device. + * + * LOCKING: + * None (inherited from caller). + */ + +static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + unsigned int pio = adev->pio_mode - XFER_PIO_0; + u32 ctcr0, ctcr1; + + PDPRINTK("adev->pio_mode[%X]\n", adev->pio_mode); + + /* Sanity check */ + if (pio > 4) { + printk(KERN_ERR DRV_NAME ": Unknown pio mode [%d] ignored\n", pio); + return; + + } + + /* Set the PIO timing registers using value table for 133MHz */ + PDPRINTK("Set pio regs... \n"); + + ctcr0 = readl(dev_mmio(ap, adev, PDC_CTCR0)); + ctcr0 &= 0xffff0000; + ctcr0 |= pdc2027x_pio_timing_tbl[pio].value0 | + (pdc2027x_pio_timing_tbl[pio].value1 << 8); + writel(ctcr0, dev_mmio(ap, adev, PDC_CTCR0)); + + ctcr1 = readl(dev_mmio(ap, adev, PDC_CTCR1)); + ctcr1 &= 0x00ffffff; + ctcr1 |= (pdc2027x_pio_timing_tbl[pio].value2 << 24); + writel(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); + + PDPRINTK("Set pio regs done\n"); + + PDPRINTK("Set to pio mode[%u] \n", pio); +} + +/** + * pdc2027x_set_dmamode - Initialize host controller PATA UDMA timings + * @ap: Port to configure + * @adev: um + * @udma: udma mode, XFER_UDMA_0 to XFER_UDMA_6 + * + * Set UDMA mode for device. + * + * LOCKING: + * None (inherited from caller). + */ +static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + unsigned int dma_mode = adev->dma_mode; + u32 ctcr0, ctcr1; + + if ((dma_mode >= XFER_UDMA_0) && + (dma_mode <= XFER_UDMA_6)) { + /* Set the UDMA timing registers with value table for 133MHz */ + unsigned int udma_mode = dma_mode & 0x07; + + if (dma_mode == XFER_UDMA_2) { + /* + * Turn off tHOLD. + * If tHOLD is '1', the hardware will add half clock for data hold time. + * This code segment seems to be no effect. tHOLD will be overwritten below. + */ + ctcr1 = readl(dev_mmio(ap, adev, PDC_CTCR1)); + writel(ctcr1 & ~(1 << 7), dev_mmio(ap, adev, PDC_CTCR1)); + } + + PDPRINTK("Set udma regs... \n"); + + ctcr1 = readl(dev_mmio(ap, adev, PDC_CTCR1)); + ctcr1 &= 0xff000000; + ctcr1 |= pdc2027x_udma_timing_tbl[udma_mode].value0 | + (pdc2027x_udma_timing_tbl[udma_mode].value1 << 8) | + (pdc2027x_udma_timing_tbl[udma_mode].value2 << 16); + writel(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); + + PDPRINTK("Set udma regs done\n"); + + PDPRINTK("Set to udma mode[%u] \n", udma_mode); + + } else if ((dma_mode >= XFER_MW_DMA_0) && + (dma_mode <= XFER_MW_DMA_2)) { + /* Set the MDMA timing registers with value table for 133MHz */ + unsigned int mdma_mode = dma_mode & 0x07; + + PDPRINTK("Set mdma regs... \n"); + ctcr0 = readl(dev_mmio(ap, adev, PDC_CTCR0)); + + ctcr0 &= 0x0000ffff; + ctcr0 |= (pdc2027x_mdma_timing_tbl[mdma_mode].value0 << 16) | + (pdc2027x_mdma_timing_tbl[mdma_mode].value1 << 24); + + writel(ctcr0, dev_mmio(ap, adev, PDC_CTCR0)); + PDPRINTK("Set mdma regs done\n"); + + PDPRINTK("Set to mdma mode[%u] \n", mdma_mode); + } else { + printk(KERN_ERR DRV_NAME ": Unknown dma mode [%u] ignored\n", dma_mode); + } +} + +/** + * pdc2027x_post_set_mode - Set the timing registers back to correct values. + * @ap: Port to configure + * + * The pdc2027x hardware will look at "SET FEATURES" and change the timing registers + * automatically. The values set by the hardware might be incorrect, under 133Mhz PLL. + * This function overwrites the possibly incorrect values set by the hardware to be correct. + */ +static void pdc2027x_post_set_mode(struct ata_port *ap) +{ + int i; + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + + if (ata_dev_enabled(dev)) { + + pdc2027x_set_piomode(ap, dev); + + /* + * Enable prefetch if the device support PIO only. + */ + if (dev->xfer_shift == ATA_SHIFT_PIO) { + u32 ctcr1 = readl(dev_mmio(ap, dev, PDC_CTCR1)); + ctcr1 |= (1 << 25); + writel(ctcr1, dev_mmio(ap, dev, PDC_CTCR1)); + + PDPRINTK("Turn on prefetch\n"); + } else { + pdc2027x_set_dmamode(ap, dev); + } + } + } +} + +/** + * pdc2027x_check_atapi_dma - Check whether ATAPI DMA can be supported for this command + * @qc: Metadata associated with taskfile to check + * + * LOCKING: + * None (inherited from caller). + * + * RETURNS: 0 when ATAPI DMA can be used + * 1 otherwise + */ +static int pdc2027x_check_atapi_dma(struct ata_queued_cmd *qc) +{ + struct scsi_cmnd *cmd = qc->scsicmd; + u8 *scsicmd = cmd->cmnd; + int rc = 1; /* atapi dma off by default */ + + /* + * This workaround is from Promise's GPL driver. + * If ATAPI DMA is used for commands not in the + * following white list, say MODE_SENSE and REQUEST_SENSE, + * pdc2027x might hit the irq lost problem. + */ + switch (scsicmd[0]) { + case READ_10: + case WRITE_10: + case READ_12: + case WRITE_12: + case READ_6: + case WRITE_6: + case 0xad: /* READ_DVD_STRUCTURE */ + case 0xbe: /* READ_CD */ + /* ATAPI DMA is ok */ + rc = 0; + break; + default: + ; + } + + return rc; +} + +/** + * pdc_read_counter - Read the ctr counter + * @probe_ent: for the port address + */ + +static long pdc_read_counter(struct ata_probe_ent *probe_ent) +{ + long counter; + int retry = 1; + u32 bccrl, bccrh, bccrlv, bccrhv; + +retry: + bccrl = readl(probe_ent->mmio_base + PDC_BYTE_COUNT) & 0xffff; + bccrh = readl(probe_ent->mmio_base + PDC_BYTE_COUNT + 0x100) & 0xffff; + rmb(); + + /* Read the counter values again for verification */ + bccrlv = readl(probe_ent->mmio_base + PDC_BYTE_COUNT) & 0xffff; + bccrhv = readl(probe_ent->mmio_base + PDC_BYTE_COUNT + 0x100) & 0xffff; + rmb(); + + counter = (bccrh << 15) | bccrl; + + PDPRINTK("bccrh [%X] bccrl [%X]\n", bccrh, bccrl); + PDPRINTK("bccrhv[%X] bccrlv[%X]\n", bccrhv, bccrlv); + + /* + * The 30-bit decreasing counter are read by 2 pieces. + * Incorrect value may be read when both bccrh and bccrl are changing. + * Ex. When 7900 decrease to 78FF, wrong value 7800 might be read. + */ + if (retry && !(bccrh == bccrhv && bccrl >= bccrlv)) { + retry--; + PDPRINTK("rereading counter\n"); + goto retry; + } + + return counter; +} + +/** + * adjust_pll - Adjust the PLL input clock in Hz. + * + * @pdc_controller: controller specific information + * @probe_ent: For the port address + * @pll_clock: The input of PLL in HZ + */ +static void pdc_adjust_pll(struct ata_probe_ent *probe_ent, long pll_clock, unsigned int board_idx) +{ + + u16 pll_ctl; + long pll_clock_khz = pll_clock / 1000; + long pout_required = board_idx? PDC_133_MHZ:PDC_100_MHZ; + long ratio = pout_required / pll_clock_khz; + int F, R; + + /* Sanity check */ + if (unlikely(pll_clock_khz < 5000L || pll_clock_khz > 70000L)) { + printk(KERN_ERR DRV_NAME ": Invalid PLL input clock %ldkHz, give up!\n", pll_clock_khz); + return; + } + +#ifdef PDC_DEBUG + PDPRINTK("pout_required is %ld\n", pout_required); + + /* Show the current clock value of PLL control register + * (maybe already configured by the firmware) + */ + pll_ctl = readw(probe_ent->mmio_base + PDC_PLL_CTL); + + PDPRINTK("pll_ctl[%X]\n", pll_ctl); +#endif + + /* + * Calculate the ratio of F, R and OD + * POUT = (F + 2) / (( R + 2) * NO) + */ + if (ratio < 8600L) { /* 8.6x */ + /* Using NO = 0x01, R = 0x0D */ + R = 0x0d; + } else if (ratio < 12900L) { /* 12.9x */ + /* Using NO = 0x01, R = 0x08 */ + R = 0x08; + } else if (ratio < 16100L) { /* 16.1x */ + /* Using NO = 0x01, R = 0x06 */ + R = 0x06; + } else if (ratio < 64000L) { /* 64x */ + R = 0x00; + } else { + /* Invalid ratio */ + printk(KERN_ERR DRV_NAME ": Invalid ratio %ld, give up!\n", ratio); + return; + } + + F = (ratio * (R+2)) / 1000 - 2; + + if (unlikely(F < 0 || F > 127)) { + /* Invalid F */ + printk(KERN_ERR DRV_NAME ": F[%d] invalid!\n", F); + return; + } + + PDPRINTK("F[%d] R[%d] ratio*1000[%ld]\n", F, R, ratio); + + pll_ctl = (R << 8) | F; + + PDPRINTK("Writing pll_ctl[%X]\n", pll_ctl); + + writew(pll_ctl, probe_ent->mmio_base + PDC_PLL_CTL); + readw(probe_ent->mmio_base + PDC_PLL_CTL); /* flush */ + + /* Wait the PLL circuit to be stable */ + mdelay(30); + +#ifdef PDC_DEBUG + /* + * Show the current clock value of PLL control register + * (maybe configured by the firmware) + */ + pll_ctl = readw(probe_ent->mmio_base + PDC_PLL_CTL); + + PDPRINTK("pll_ctl[%X]\n", pll_ctl); +#endif + + return; +} + +/** + * detect_pll_input_clock - Detect the PLL input clock in Hz. + * @probe_ent: for the port address + * Ex. 16949000 on 33MHz PCI bus for pdc20275. + * Half of the PCI clock. + */ +static long pdc_detect_pll_input_clock(struct ata_probe_ent *probe_ent) +{ + u32 scr; + long start_count, end_count; + long pll_clock; + + /* Read current counter value */ + start_count = pdc_read_counter(probe_ent); + + /* Start the test mode */ + scr = readl(probe_ent->mmio_base + PDC_SYS_CTL); + PDPRINTK("scr[%X]\n", scr); + writel(scr | (0x01 << 14), probe_ent->mmio_base + PDC_SYS_CTL); + readl(probe_ent->mmio_base + PDC_SYS_CTL); /* flush */ + + /* Let the counter run for 100 ms. */ + mdelay(100); + + /* Read the counter values again */ + end_count = pdc_read_counter(probe_ent); + + /* Stop the test mode */ + scr = readl(probe_ent->mmio_base + PDC_SYS_CTL); + PDPRINTK("scr[%X]\n", scr); + writel(scr & ~(0x01 << 14), probe_ent->mmio_base + PDC_SYS_CTL); + readl(probe_ent->mmio_base + PDC_SYS_CTL); /* flush */ + + /* calculate the input clock in Hz */ + pll_clock = (start_count - end_count) * 10; + + PDPRINTK("start[%ld] end[%ld] \n", start_count, end_count); + PDPRINTK("PLL input clock[%ld]Hz\n", pll_clock); + + return pll_clock; +} + +/** + * pdc_hardware_init - Initialize the hardware. + * @pdev: instance of pci_dev found + * @pdc_controller: controller specific information + * @pe: for the port address + */ +static int pdc_hardware_init(struct pci_dev *pdev, struct ata_probe_ent *pe, unsigned int board_idx) +{ + long pll_clock; + + /* + * Detect PLL input clock rate. + * On some system, where PCI bus is running at non-standard clock rate. + * Ex. 25MHz or 40MHz, we have to adjust the cycle_time. + * The pdc20275 controller employs PLL circuit to help correct timing registers setting. + */ + pll_clock = pdc_detect_pll_input_clock(pe); + + if (pll_clock < 0) /* counter overflow? Try again. */ + pll_clock = pdc_detect_pll_input_clock(pe); + + dev_printk(KERN_INFO, &pdev->dev, "PLL input clock %ld kHz\n", pll_clock/1000); + + /* Adjust PLL control register */ + pdc_adjust_pll(pe, pll_clock, board_idx); + + return 0; +} + +/** + * pdc_ata_setup_port - setup the mmio address + * @port: ata ioports to setup + * @base: base address + */ +static void pdc_ata_setup_port(struct ata_ioports *port, unsigned long base) +{ + port->cmd_addr = + port->data_addr = base; + port->feature_addr = + port->error_addr = base + 0x05; + port->nsect_addr = base + 0x0a; + port->lbal_addr = base + 0x0f; + port->lbam_addr = base + 0x10; + port->lbah_addr = base + 0x15; + port->device_addr = base + 0x1a; + port->command_addr = + port->status_addr = base + 0x1f; + port->altstatus_addr = + port->ctl_addr = base + 0x81a; +} + +/** + * pdc2027x_init_one - PCI probe function + * Called when an instance of PCI adapter is inserted. + * This function checks whether the hardware is supported, + * initialize hardware and register an instance of ata_host to + * libata by providing struct ata_probe_ent and ata_device_add(). + * (implements struct pci_driver.probe() ) + * + * @pdev: instance of pci_dev found + * @ent: matching entry in the id_tbl[] + */ +static int __devinit pdc2027x_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + unsigned int board_idx = (unsigned int) ent->driver_data; + + struct ata_probe_ent *probe_ent = NULL; + unsigned long base; + void *mmio_base; + int rc; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); + + rc = pci_enable_device(pdev); + if (rc) + return rc; + + rc = pci_request_regions(pdev, DRV_NAME); + if (rc) + goto err_out; + + rc = pci_set_dma_mask(pdev, ATA_DMA_MASK); + if (rc) + goto err_out_regions; + + rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK); + if (rc) + goto err_out_regions; + + /* Prepare the probe entry */ + probe_ent = kzalloc(sizeof(*probe_ent), GFP_KERNEL); + if (probe_ent == NULL) { + rc = -ENOMEM; + goto err_out_regions; + } + + probe_ent->dev = pci_dev_to_dev(pdev); + INIT_LIST_HEAD(&probe_ent->node); + + mmio_base = pci_iomap(pdev, 5, 0); + if (!mmio_base) { + rc = -ENOMEM; + goto err_out_free_ent; + } + + base = (unsigned long) mmio_base; + + probe_ent->sht = pdc2027x_port_info[board_idx].sht; + probe_ent->port_flags = pdc2027x_port_info[board_idx].flags; + probe_ent->pio_mask = pdc2027x_port_info[board_idx].pio_mask; + probe_ent->mwdma_mask = pdc2027x_port_info[board_idx].mwdma_mask; + probe_ent->udma_mask = pdc2027x_port_info[board_idx].udma_mask; + probe_ent->port_ops = pdc2027x_port_info[board_idx].port_ops; + + probe_ent->irq = pdev->irq; + probe_ent->irq_flags = SA_SHIRQ; + probe_ent->mmio_base = mmio_base; + + pdc_ata_setup_port(&probe_ent->port[0], base + 0x17c0); + probe_ent->port[0].bmdma_addr = base + 0x1000; + pdc_ata_setup_port(&probe_ent->port[1], base + 0x15c0); + probe_ent->port[1].bmdma_addr = base + 0x1008; + + probe_ent->n_ports = 2; + + pci_set_master(pdev); + //pci_enable_intx(pdev); + + /* initialize adapter */ + if (pdc_hardware_init(pdev, probe_ent, board_idx) != 0) + goto err_out_free_ent; + + ata_device_add(probe_ent); + kfree(probe_ent); + + return 0; + +err_out_free_ent: + kfree(probe_ent); +err_out_regions: + pci_release_regions(pdev); +err_out: + pci_disable_device(pdev); + return rc; +} + +/** + * pdc2027x_remove_one - Called to remove a single instance of the + * adapter. + * + * @dev: The PCI device to remove. + * FIXME: module load/unload not working yet + */ +static void __devexit pdc2027x_remove_one(struct pci_dev *pdev) +{ + ata_pci_remove_one(pdev); +} + +/** + * pdc2027x_init - Called after this module is loaded into the kernel. + */ +static int __init pdc2027x_init(void) +{ + return pci_module_init(&pdc2027x_pci_driver); +} + +/** + * pdc2027x_exit - Called before this module unloaded from the kernel + */ +static void __exit pdc2027x_exit(void) +{ + pci_unregister_driver(&pdc2027x_pci_driver); +} + +module_init(pdc2027x_init); +module_exit(pdc2027x_exit); diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c new file mode 100644 index 000000000000..6cb52b0e7696 --- /dev/null +++ b/drivers/ata/pata_pdc202xx_old.c @@ -0,0 +1,423 @@ +/* + * pata_pdc202xx_old.c - Promise PDC202xx PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * Based in part on linux/drivers/ide/pci/pdc202xx_old.c + * + * First cut with LBA48/ATAPI + * + * TODO: + * Channel interlock/reset on both required ? + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_pdc202xx_old" +#define DRV_VERSION "0.2.1" + +/** + * pdc2024x_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int pdc2024x_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + + +static void pdc2024x_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, pdc2024x_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + + +static int pdc2026x_pre_reset(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u16 cis; + + pci_read_config_word(pdev, 0x50, &cis); + if (cis & (1 << (10 + ap->port_no))) + ap->cbl = ATA_CBL_PATA80; + else + ap->cbl = ATA_CBL_PATA40; + + return ata_std_prereset(ap); +} + +static void pdc2026x_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, pdc2026x_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * pdc_configure_piomode - set chip PIO timing + * @ap: ATA interface + * @adev: ATA device + * @pio: PIO mode + * + * Called to do the PIO mode setup. Our timing registers are shared + * so a configure_dmamode call will undo any work we do here and vice + * versa + */ + +static void pdc_configure_piomode(struct ata_port *ap, struct ata_device *adev, int pio) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int port = 0x60 + 4 * ap->port_no + 2 * adev->devno; + static u16 pio_timing[5] = { + 0x0913, 0x050C , 0x0308, 0x0206, 0x0104 + }; + u8 r_ap, r_bp; + + pci_read_config_byte(pdev, port, &r_ap); + pci_read_config_byte(pdev, port + 1, &r_bp); + r_ap &= ~0x3F; /* Preserve ERRDY_EN, SYNC_IN */ + r_bp &= ~0x07; + r_ap |= (pio_timing[pio] >> 8); + r_bp |= (pio_timing[pio] & 0xFF); + + if (ata_pio_need_iordy(adev)) + r_ap |= 0x20; /* IORDY enable */ + if (adev->class == ATA_DEV_ATA) + r_ap |= 0x10; /* FIFO enable */ + pci_write_config_byte(pdev, port, r_ap); + pci_write_config_byte(pdev, port + 1, r_bp); +} + +/** + * pdc_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. Our timing registers are shared + * but we want to set the PIO timing by default. + */ + +static void pdc_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + pdc_configure_piomode(ap, adev, adev->pio_mode - XFER_PIO_0); +} + +/** + * pdc_configure_dmamode - set DMA mode in chip + * @ap: ATA interface + * @adev: ATA device + * + * Load DMA cycle times into the chip ready for a DMA transfer + * to occur. + */ + +static void pdc_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int port = 0x60 + 4 * ap->port_no + 2 * adev->devno; + static u8 udma_timing[6][2] = { + { 0x60, 0x03 }, /* 33 Mhz Clock */ + { 0x40, 0x02 }, + { 0x20, 0x01 }, + { 0x40, 0x02 }, /* 66 Mhz Clock */ + { 0x20, 0x01 }, + { 0x20, 0x01 } + }; + u8 r_bp, r_cp; + + pci_read_config_byte(pdev, port + 1, &r_bp); + pci_read_config_byte(pdev, port + 2, &r_cp); + + r_bp &= ~0xF0; + r_cp &= ~0x0F; + + if (adev->dma_mode >= XFER_UDMA_0) { + int speed = adev->dma_mode - XFER_UDMA_0; + r_bp |= udma_timing[speed][0]; + r_cp |= udma_timing[speed][1]; + + } else { + int speed = adev->dma_mode - XFER_MW_DMA_0; + r_bp |= 0x60; + r_cp |= (5 - speed); + } + pci_write_config_byte(pdev, port + 1, r_bp); + pci_write_config_byte(pdev, port + 2, r_cp); + +} + +/** + * pdc2026x_bmdma_start - DMA engine begin + * @qc: ATA command + * + * In UDMA3 or higher we have to clock switch for the duration of the + * DMA transfer sequence. + */ + +static void pdc2026x_bmdma_start(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct ata_taskfile *tf = &qc->tf; + int sel66 = ap->port_no ? 0x08: 0x02; + + unsigned long master = ap->host->ports[0]->ioaddr.bmdma_addr; + unsigned long clock = master + 0x11; + unsigned long atapi_reg = master + 0x20 + (4 * ap->port_no); + + u32 len; + + /* Check we keep host level locking here */ + if (adev->dma_mode >= XFER_UDMA_2) + outb(inb(clock) | sel66, clock); + else + outb(inb(clock) & ~sel66, clock); + + /* The DMA clocks may have been trashed by a reset. FIXME: make conditional + and move to qc_issue ? */ + pdc_set_dmamode(ap, qc->dev); + + /* Cases the state machine will not complete correctly without help */ + if ((tf->flags & ATA_TFLAG_LBA48) || tf->protocol == ATA_PROT_ATAPI_DMA) + { + if (tf->flags & ATA_TFLAG_LBA48) + len = qc->nsect * 512; + else + len = qc->nbytes; + + if (tf->flags & ATA_TFLAG_WRITE) + len |= 0x06000000; + else + len |= 0x05000000; + + outl(len, atapi_reg); + } + + /* Activate DMA */ + ata_bmdma_start(qc); +} + +/** + * pdc2026x_bmdma_end - DMA engine stop + * @qc: ATA command + * + * After a DMA completes we need to put the clock back to 33MHz for + * PIO timings. + */ + +static void pdc2026x_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct ata_taskfile *tf = &qc->tf; + + int sel66 = ap->port_no ? 0x08: 0x02; + /* The clock bits are in the same register for both channels */ + unsigned long master = ap->host->ports[0]->ioaddr.bmdma_addr; + unsigned long clock = master + 0x11; + unsigned long atapi_reg = master + 0x20 + (4 * ap->port_no); + + /* Cases the state machine will not complete correctly */ + if (tf->protocol == ATA_PROT_ATAPI_DMA || ( tf->flags & ATA_TFLAG_LBA48)) { + outl(0, atapi_reg); + outb(inb(clock) & ~sel66, clock); + } + /* Check we keep host level locking here */ + /* Flip back to 33Mhz for PIO */ + if (adev->dma_mode >= XFER_UDMA_2) + outb(inb(clock) & ~sel66, clock); + + ata_bmdma_stop(qc); +} + +/** + * pdc2026x_dev_config - device setup hook + * @ap: ATA port + * @adev: newly found device + * + * Perform chip specific early setup. We need to lock the transfer + * sizes to 8bit to avoid making the state engine on the 2026x cards + * barf. + */ + +static void pdc2026x_dev_config(struct ata_port *ap, struct ata_device *adev) +{ + adev->max_sectors = 256; +} + +static struct scsi_host_template pdc_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations pdc2024x_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = pdc_set_piomode, + .set_dmamode = pdc_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = pdc2024x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations pdc2026x_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = pdc_set_piomode, + .set_dmamode = pdc_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + .dev_config = pdc2026x_dev_config, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = pdc2026x_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = pdc2026x_bmdma_start, + .bmdma_stop = pdc2026x_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int pdc_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info[3] = { + { + .sht = &pdc_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA2, + .port_ops = &pdc2024x_port_ops + }, + { + .sht = &pdc_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA4, + .port_ops = &pdc2026x_port_ops + }, + { + .sht = &pdc_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA5, + .port_ops = &pdc2026x_port_ops + } + + }; + static struct ata_port_info *port_info[2]; + + port_info[0] = port_info[1] = &info[id->driver_data]; + + if (dev->device == PCI_DEVICE_ID_PROMISE_20265) { + struct pci_dev *bridge = dev->bus->self; + /* Don't grab anything behind a Promise I2O RAID */ + if (bridge && bridge->vendor == PCI_VENDOR_ID_INTEL) { + if( bridge->device == PCI_DEVICE_ID_INTEL_I960) + return -ENODEV; + if( bridge->device == PCI_DEVICE_ID_INTEL_I960RM) + return -ENODEV; + } + } + return ata_pci_init_one(dev, port_info, 2); +} + +static struct pci_device_id pdc[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20246), 0}, + { PCI_DEVICE(PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20262), 1}, + { PCI_DEVICE(PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20263), 1}, + { PCI_DEVICE(PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20265), 2}, + { PCI_DEVICE(PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20267), 2}, + { 0, }, +}; + +static struct pci_driver pdc_pci_driver = { + .name = DRV_NAME, + .id_table = pdc, + .probe = pdc_init_one, + .remove = ata_pci_remove_one +}; + +static int __init pdc_init(void) +{ + return pci_register_driver(&pdc_pci_driver); +} + + +static void __exit pdc_exit(void) +{ + pci_unregister_driver(&pdc_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Promise 2024x and 20262-20267"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, pdc); +MODULE_VERSION(DRV_VERSION); + +module_init(pdc_init); +module_exit(pdc_exit); diff --git a/drivers/ata/pata_qdi.c b/drivers/ata/pata_qdi.c new file mode 100644 index 000000000000..ededb40e084d --- /dev/null +++ b/drivers/ata/pata_qdi.c @@ -0,0 +1,403 @@ +/* + * pata_qdi.c - QDI VLB ATA controllers + * (C) 2006 Red Hat + * + * This driver mostly exists as a proof of concept for non PCI devices under + * libata. While the QDI6580 was 'neat' in 1993 it is no longer terribly + * useful. + * + * Tuning code written from the documentation at + * http://www.ryston.cz/petr/vlb/qd6500.html + * http://www.ryston.cz/petr/vlb/qd6580.html + * + * Probe code based on drivers/ide/legacy/qd65xx.c + * Rewritten from the work of Colten Edwards by + * Samuel Thibault + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_qdi" +#define DRV_VERSION "0.2.4" + +#define NR_HOST 4 /* Two 6580s */ + +struct qdi_data { + unsigned long timing; + u8 clock[2]; + u8 last; + int fast; + struct platform_device *platform_dev; + +}; + +static struct ata_host *qdi_host[NR_HOST]; +static struct qdi_data qdi_data[NR_HOST]; +static int nr_qdi_host; + +#ifdef MODULE +static int probe_qdi = 1; +#else +static int probe_qdi; +#endif + +static void qdi6500_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct ata_timing t; + struct qdi_data *qdi = ap->host->private_data; + int active, recovery; + u8 timing; + + /* Get the timing data in cycles */ + ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000); + + if (qdi->fast) { + active = 8 - FIT(t.active, 1, 8); + recovery = 18 - FIT(t.recover, 3, 18); + } else { + active = 9 - FIT(t.active, 2, 9); + recovery = 15 - FIT(t.recover, 0, 15); + } + timing = (recovery << 4) | active | 0x08; + + qdi->clock[adev->devno] = timing; + + outb(timing, qdi->timing); +} + +static void qdi6580_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + struct ata_timing t; + struct qdi_data *qdi = ap->host->private_data; + int active, recovery; + u8 timing; + + /* Get the timing data in cycles */ + ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000); + + if (qdi->fast) { + active = 8 - FIT(t.active, 1, 8); + recovery = 18 - FIT(t.recover, 3, 18); + } else { + active = 9 - FIT(t.active, 2, 9); + recovery = 15 - FIT(t.recover, 0, 15); + } + timing = (recovery << 4) | active | 0x08; + + qdi->clock[adev->devno] = timing; + + outb(timing, qdi->timing); + + /* Clear the FIFO */ + if (adev->class != ATA_DEV_ATA) + outb(0x5F, (qdi->timing & 0xFFF0) + 3); +} + +/** + * qdi_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings. + */ + +static unsigned int qdi_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct qdi_data *qdi = ap->host->private_data; + + if (qdi->clock[adev->devno] != qdi->last) { + if (adev->pio_mode) { + qdi->last = qdi->clock[adev->devno]; + outb(qdi->clock[adev->devno], qdi->timing); + } + } + return ata_qc_issue_prot(qc); +} + +static void qdi_data_xfer(struct ata_device *adev, unsigned char *buf, unsigned int buflen, int write_data) +{ + struct ata_port *ap = adev->ap; + int slop = buflen & 3; + + if (ata_id_has_dword_io(adev->id)) { + if (write_data) + outsl(ap->ioaddr.data_addr, buf, buflen >> 2); + else + insl(ap->ioaddr.data_addr, buf, buflen >> 2); + + if (unlikely(slop)) { + u32 pad; + if (write_data) { + memcpy(&pad, buf + buflen - slop, slop); + outl(le32_to_cpu(pad), ap->ioaddr.data_addr); + } else { + pad = cpu_to_le16(inl(ap->ioaddr.data_addr)); + memcpy(buf + buflen - slop, &pad, slop); + } + } + } else + ata_pio_data_xfer(adev, buf, buflen, write_data); +} + +static struct scsi_host_template qdi_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations qdi6500_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = qdi6500_set_piomode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ata_bmdma_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = qdi_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = qdi_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations qdi6580_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = qdi6580_set_piomode, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ata_bmdma_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .qc_prep = ata_qc_prep, + .qc_issue = qdi_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = qdi_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * qdi_init_one - attach a qdi interface + * @type: Type to display + * @io: I/O port start + * @irq: interrupt line + * @fast: True if on a > 33Mhz VLB + * + * Register an ISA bus IDE interface. Such interfaces are PIO and we + * assume do not support IRQ sharing. + */ + +static __init int qdi_init_one(unsigned long port, int type, unsigned long io, int irq, int fast) +{ + struct ata_probe_ent ae; + struct platform_device *pdev; + int ret; + + unsigned long ctrl = io + 0x206; + + /* + * Fill in a probe structure first of all + */ + + pdev = platform_device_register_simple(DRV_NAME, nr_qdi_host, NULL, 0); + if (pdev == NULL) + return -ENOMEM; + + memset(&ae, 0, sizeof(struct ata_probe_ent)); + INIT_LIST_HEAD(&ae.node); + ae.dev = &pdev->dev; + + if (type == 6580) { + ae.port_ops = &qdi6580_port_ops; + ae.pio_mask = 0x1F; + } else { + ae.port_ops = &qdi6500_port_ops; + ae.pio_mask = 0x07; /* Actually PIO3 !IORDY is possible */ + } + + ae.sht = &qdi_sht; + ae.n_ports = 1; + ae.irq = irq; + ae.irq_flags = 0; + ae.port_flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST; + ae.port[0].cmd_addr = io; + ae.port[0].altstatus_addr = ctrl; + ae.port[0].ctl_addr = ctrl; + ata_std_ports(&ae.port[0]); + + /* + * Hook in a private data structure per channel + */ + ae.private_data = &qdi_data[nr_qdi_host]; + + qdi_data[nr_qdi_host].timing = port; + qdi_data[nr_qdi_host].fast = fast; + qdi_data[nr_qdi_host].platform_dev = pdev; + + printk(KERN_INFO DRV_NAME": qd%d at 0x%lx.\n", type, io); + ret = ata_device_add(&ae); + if (ret == 0) { + platform_device_unregister(pdev); + return -ENODEV; + } + + qdi_host[nr_qdi_host++] = dev_get_drvdata(&pdev->dev); + return 0; +} + +/** + * qdi_init - attach qdi interfaces + * + * Attach qdi IDE interfaces by scanning the ports it may occupy. + */ + +static __init int qdi_init(void) +{ + unsigned long flags; + static const unsigned long qd_port[2] = { 0x30, 0xB0 }; + static const unsigned long ide_port[2] = { 0x170, 0x1F0 }; + static const int ide_irq[2] = { 14, 15 }; + + int ct = 0; + int i; + + if (probe_qdi == 0) + return -ENODEV; + + /* + * Check each possible QD65xx base address + */ + + for (i = 0; i < 2; i++) { + unsigned long port = qd_port[i]; + u8 r, res; + + + if (request_region(port, 2, "pata_qdi")) { + /* Check for a card */ + local_irq_save(flags); + r = inb_p(port); + outb_p(0x19, port); + res = inb_p(port); + outb_p(r, port); + local_irq_restore(flags); + + /* Fail */ + if (res == 0x19) + { + release_region(port, 2); + continue; + } + + /* Passes the presence test */ + r = inb_p(port + 1); /* Check port agrees with port set */ + if ((r & 2) >> 1 != i) { + release_region(port, 2); + continue; + } + + /* Check card type */ + if ((r & 0xF0) == 0xC0) { + /* QD6500: single channel */ + if (r & 8) { + /* Disabled ? */ + release_region(port, 2); + continue; + } + ct += qdi_init_one(port, 6500, ide_port[r & 0x01], ide_irq[r & 0x01], r & 0x04); + } + if (((r & 0xF0) == 0xA0) || (r & 0xF0) == 0x50) { + /* QD6580: dual channel */ + if (!request_region(port + 2 , 2, "pata_qdi")) + { + release_region(port, 2); + continue; + } + res = inb(port + 3); + if (res & 1) { + /* Single channel mode */ + ct += qdi_init_one(port, 6580, ide_port[r & 0x01], ide_irq[r & 0x01], r & 0x04); + } else { + /* Dual channel mode */ + ct += qdi_init_one(port, 6580, 0x1F0, 14, r & 0x04); + ct += qdi_init_one(port + 2, 6580, 0x170, 15, r & 0x04); + } + } + } + } + if (ct != 0) + return 0; + return -ENODEV; +} + +static __exit void qdi_exit(void) +{ + int i; + + for (i = 0; i < nr_qdi_host; i++) { + ata_host_remove(qdi_host[i]); + /* Free the control resource. The 6580 dual channel has the resources + * claimed as a pair of 2 byte resources so we need no special cases... + */ + release_region(qdi_data[i].timing, 2); + platform_device_unregister(qdi_data[i].platform_dev); + } +} + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for qdi ATA"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +module_init(qdi_init); +module_exit(qdi_exit); + +module_param(probe_qdi, int, 0); + diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c new file mode 100644 index 000000000000..6f7d0527265f --- /dev/null +++ b/drivers/ata/pata_radisys.c @@ -0,0 +1,335 @@ +/* + * pata_radisys.c - Intel PATA/SATA controllers + * + * (C) 2006 Red Hat + * + * Some parts based on ata_piix.c by Jeff Garzik and others. + * + * A PIIX relative, this device has a single ATA channel and no + * slave timings, SITRE or PPE. In that sense it is a close relative + * of the original PIIX. It does however support UDMA 33/66 per channel + * although no other modes/timings. Also lacking is 32bit I/O on the ATA + * port. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_radisys" +#define DRV_VERSION "0.4.1" + +/** + * radisys_probe_init - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int radisys_pre_reset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA80; + return ata_std_prereset(ap); +} + + +/** + * radisys_pata_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * @classes: + * + * LOCKING: + * None (inherited from caller). + */ + +static void radisys_pata_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, radisys_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * radisys_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: um + * + * Set PIO mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void radisys_set_piomode (struct ata_port *ap, struct ata_device *adev) +{ + unsigned int pio = adev->pio_mode - XFER_PIO_0; + struct pci_dev *dev = to_pci_dev(ap->host->dev); + u16 idetm_data; + int control = 0; + + /* + * See Intel Document 298600-004 for the timing programing rules + * for PIIX/ICH. Note that the early PIIX does not have the slave + * timing port at 0x44. The Radisys is a relative of the PIIX + * but not the same so be careful. + */ + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, /* Check me */ + { 0, 0 }, + { 1, 1 }, + { 2, 2 }, + { 3, 3 }, }; + + if (pio > 0) + control |= 1; /* TIME1 enable */ + if (ata_pio_need_iordy(adev)) + control |= 2; /* IE IORDY */ + + pci_read_config_word(dev, 0x40, &idetm_data); + + /* Enable IE and TIME as appropriate. Clear the other + drive timing bits */ + idetm_data &= 0xCCCC; + idetm_data |= (control << (4 * adev->devno)); + idetm_data |= (timings[pio][0] << 12) | + (timings[pio][1] << 8); + pci_write_config_word(dev, 0x40, idetm_data); + + /* Track which port is configured */ + ap->private_data = adev; +} + +/** + * radisys_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * @isich: True if the device is an ICH and has IOCFG registers + * + * Set MWDMA mode for device, in host controller PCI config space. + * + * LOCKING: + * None (inherited from caller). + */ + +static void radisys_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *dev = to_pci_dev(ap->host->dev); + u16 idetm_data; + u8 udma_enable; + + static const /* ISP RTC */ + u8 timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 1 }, + { 2, 2 }, + { 3, 3 }, }; + + /* + * MWDMA is driven by the PIO timings. We must also enable + * IORDY unconditionally. + */ + + pci_read_config_word(dev, 0x40, &idetm_data); + pci_read_config_byte(dev, 0x48, &udma_enable); + + if (adev->dma_mode < XFER_UDMA_0) { + unsigned int mwdma = adev->dma_mode - XFER_MW_DMA_0; + const unsigned int needed_pio[3] = { + XFER_PIO_0, XFER_PIO_3, XFER_PIO_4 + }; + int pio = needed_pio[mwdma] - XFER_PIO_0; + int control = 3; /* IORDY|TIME0 */ + + /* If the drive MWDMA is faster than it can do PIO then + we must force PIO0 for PIO cycles. */ + + if (adev->pio_mode < needed_pio[mwdma]) + control = 1; + + /* Mask out the relevant control and timing bits we will load. Also + clear the other drive TIME register as a precaution */ + + idetm_data &= 0xCCCC; + idetm_data |= control << (4 * adev->devno); + idetm_data |= (timings[pio][0] << 12) | (timings[pio][1] << 8); + + udma_enable &= ~(1 << adev->devno); + } else { + u8 udma_mode; + + /* UDMA66 on: UDMA 33 and 66 are switchable via register 0x4A */ + + pci_read_config_byte(dev, 0x4A, &udma_mode); + + if (adev->xfer_mode == XFER_UDMA_2) + udma_mode &= ~ (1 << adev->devno); + else /* UDMA 4 */ + udma_mode |= (1 << adev->devno); + + pci_write_config_byte(dev, 0x4A, udma_mode); + + udma_enable |= (1 << adev->devno); + } + pci_write_config_word(dev, 0x40, idetm_data); + pci_write_config_byte(dev, 0x48, udma_enable); + + /* Track which port is configured */ + ap->private_data = adev; +} + +/** + * radisys_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings if + * neccessary. Our logic also clears TIME0/TIME1 for the other device so + * that, even if we get this wrong, cycles to the other device will + * be made PIO0. + */ + +static unsigned int radisys_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + + if (adev != ap->private_data) { + /* UDMA timing is not shared */ + if (adev->dma_mode < XFER_UDMA_0) { + if (adev->dma_mode) + radisys_set_dmamode(ap, adev); + else if (adev->pio_mode) + radisys_set_piomode(ap, adev); + } + } + return ata_qc_issue_prot(qc); +} + + +static struct scsi_host_template radisys_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations radisys_pata_ops = { + .port_disable = ata_port_disable, + .set_piomode = radisys_set_piomode, + .set_dmamode = radisys_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = radisys_pata_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = radisys_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + + +/** + * radisys_init_one - Register PIIX ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in radisys_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. We probe for combined mode (sigh), + * and then hand over control to libata, for it to do the rest. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int radisys_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + static struct ata_port_info info = { + .sht = &radisys_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma1-2 */ + .udma_mask = 0x14, /* UDMA33/66 only */ + .port_ops = &radisys_pata_ops, + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, + "version " DRV_VERSION "\n"); + + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id radisys_pci_tbl[] = { + { 0x1331, 0x8201, PCI_ANY_ID, PCI_ANY_ID, }, + { } /* terminate list */ +}; + +static struct pci_driver radisys_pci_driver = { + .name = DRV_NAME, + .id_table = radisys_pci_tbl, + .probe = radisys_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init radisys_init(void) +{ + return pci_register_driver(&radisys_pci_driver); +} + +static void __exit radisys_exit(void) +{ + pci_unregister_driver(&radisys_pci_driver); +} + + +module_init(radisys_init); +module_exit(radisys_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for Radisys R82600 controllers"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, radisys_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c new file mode 100644 index 000000000000..3c6d84fd4312 --- /dev/null +++ b/drivers/ata/pata_rz1000.c @@ -0,0 +1,205 @@ +/* + * RZ1000/1001 driver based upon + * + * linux/drivers/ide/pci/rz1000.c Version 0.06 January 12, 2003 + * Copyright (C) 1995-1998 Linus Torvalds & author (see below) + * Principal Author: mlord@pobox.com (Mark Lord) + * + * See linux/MAINTAINERS for address of current maintainer. + * + * This file provides support for disabling the buggy read-ahead + * mode of the RZ1000 IDE chipset, commonly used on Intel motherboards. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_rz1000" +#define DRV_VERSION "0.2.2" + + +/** + * rz1000_prereset - probe begin + * @ap: ATA port + * + * Set up cable type and use generics + */ + +static int rz1000_prereset(struct ata_port *ap) +{ + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + +/** + * rz1000_error_handler - probe reset + * @ap: ATA port + * + * Perform the ATA standard reset sequence + */ + +static void rz1000_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, rz1000_prereset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * rz1000_set_mode - mode setting function + * @ap: ATA interface + * + * Use a non standard set_mode function. We don't want to be tuned. We + * would prefer to be BIOS generic but for the fact our hardware is + * whacked out. + */ + +static void rz1000_set_mode(struct ata_port *ap) +{ + int i; + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + if (ata_dev_enabled(dev)) { + /* We don't really care */ + dev->pio_mode = XFER_PIO_0; + dev->xfer_mode = XFER_PIO_0; + dev->xfer_shift = ATA_SHIFT_PIO; + dev->flags |= ATA_DFLAG_PIO; + } + } +} + + +static struct scsi_host_template rz1000_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations rz1000_port_ops = { + .set_mode = rz1000_set_mode, + + .port_disable = ata_port_disable, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = rz1000_error_handler, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = rz1000_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * rz1000_init_one - Register RZ1000 ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in rz1000_pci_tbl matching with @pdev + * + * Configure an RZ1000 interface. This doesn't require much special + * handling except that we *MUST* kill the chipset readahead or the + * user may experience data corruption. + */ + +static int rz1000_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + struct ata_port_info *port_info[2]; + u16 reg; + static struct ata_port_info info = { + .sht = &rz1000_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .port_ops = &rz1000_port_ops + }; + + if (!printed_version++) + printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n"); + + /* Be exceptionally paranoid as we must be sure to apply the fix */ + if (pci_read_config_word(pdev, 0x40, ®) != 0) + goto fail; + reg &= 0xDFFF; + if (pci_write_config_word(pdev, 0x40, reg) != 0) + goto fail; + printk(KERN_INFO DRV_NAME ": disabled chipset readahead.\n"); + + port_info[0] = &info; + port_info[1] = &info; + return ata_pci_init_one(pdev, port_info, 2); +fail: + printk(KERN_ERR DRV_NAME ": failed to disable read-ahead on chipset..\n"); + /* Not safe to use so skip */ + return -ENODEV; +} + +static struct pci_device_id pata_rz1000[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000), }, + { PCI_DEVICE(PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001), }, + { 0, }, +}; + +static struct pci_driver rz1000_pci_driver = { + .name = DRV_NAME, + .id_table = pata_rz1000, + .probe = rz1000_init_one, + .remove = ata_pci_remove_one +}; + + +static int __init rz1000_init(void) +{ + return pci_register_driver(&rz1000_pci_driver); +} + +static void __exit rz1000_exit(void) +{ + pci_unregister_driver(&rz1000_pci_driver); +} + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for RZ1000 PCI ATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, pata_rz1000); +MODULE_VERSION(DRV_VERSION); + +module_init(rz1000_init); +module_exit(rz1000_exit); + diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c new file mode 100644 index 000000000000..9b42b9a52423 --- /dev/null +++ b/drivers/ata/pata_sc1200.c @@ -0,0 +1,287 @@ +/* + * New ATA layer SC1200 driver Alan Cox + * + * TODO: Mode selection filtering + * TODO: Can't enable second channel until ATA core has serialize + * TODO: Needs custom DMA cleanup code + * + * Based very heavily on + * + * linux/drivers/ide/pci/sc1200.c Version 0.91 28-Jan-2003 + * + * Copyright (C) 2000-2002 Mark Lord + * May be copied or modified under the terms of the GNU General Public License + * + * Development of this chipset driver was funded + * by the nice folks at National Semiconductor. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "sc1200" +#define DRV_VERSION "0.2.3" + +#define SC1200_REV_A 0x00 +#define SC1200_REV_B1 0x01 +#define SC1200_REV_B3 0x02 +#define SC1200_REV_C1 0x03 +#define SC1200_REV_D1 0x04 + +/** + * sc1200_clock - PCI clock + * + * Return the PCI bus clocking for the SC1200 chipset configuration + * in use. We return 0 for 33MHz 1 for 48MHz and 2 for 66Mhz + */ + +static int sc1200_clock(void) +{ + /* Magic registers that give us the chipset data */ + u8 chip_id = inb(0x903C); + u8 silicon_rev = inb(0x903D); + u16 pci_clock; + + if (chip_id == 0x04 && silicon_rev < SC1200_REV_B1) + return 0; /* 33 MHz mode */ + + /* Clock generator configuration 0x901E its 8/9 are the PCI clocking + 0/3 is 33Mhz 1 is 48 2 is 66 */ + + pci_clock = inw(0x901E); + pci_clock >>= 8; + pci_clock &= 0x03; + if (pci_clock == 3) + pci_clock = 0; + return pci_clock; +} + +/** + * sc1200_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + * + * Set our PIO requirements. This is fairly simple on the SC1200 + */ + +static void sc1200_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static const u32 pio_timings[4][5] = { + {0x00009172, 0x00012171, 0x00020080, 0x00032010, 0x00040010}, // format0 33Mhz + {0xd1329172, 0x71212171, 0x30200080, 0x20102010, 0x00100010}, // format1, 33Mhz + {0xfaa3f4f3, 0xc23232b2, 0x513101c1, 0x31213121, 0x10211021}, // format1, 48Mhz + {0xfff4fff4, 0xf35353d3, 0x814102f1, 0x42314231, 0x11311131} // format1, 66Mhz + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 format; + unsigned int reg = 0x40 + 0x10 * ap->port_no; + int mode = adev->pio_mode - XFER_PIO_0; + + pci_read_config_dword(pdev, reg + 4, &format); + format >>= 31; + format += sc1200_clock(); + pci_write_config_dword(pdev, reg + 8 * adev->devno, + pio_timings[format][mode]); +} + +/** + * sc1200_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + * We cannot mix MWDMA and UDMA without reloading timings each switch + * master to slave. + */ + +static void sc1200_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const u32 udma_timing[3][3] = { + { 0x00921250, 0x00911140, 0x00911030 }, + { 0x00932470, 0x00922260, 0x00922140 }, + { 0x009436A1, 0x00933481, 0x00923261 } + }; + + static const u32 mwdma_timing[3][3] = { + { 0x00077771, 0x00012121, 0x00002020 }, + { 0x000BBBB2, 0x00024241, 0x00013131 }, + { 0x000FFFF3, 0x00035352, 0x00015151 } + }; + + int clock = sc1200_clock(); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + unsigned int reg = 0x40 + 0x10 * ap->port_no; + int mode = adev->dma_mode; + u32 format; + + if (mode >= XFER_UDMA_0) + format = udma_timing[clock][mode - XFER_UDMA_0]; + else + format = mwdma_timing[clock][mode - XFER_MW_DMA_0]; + + if (adev->devno == 0) { + u32 timings; + + pci_read_config_dword(pdev, reg + 4, &timings); + timings &= 0x80000000UL; + timings |= format; + pci_write_config_dword(pdev, reg + 4, timings); + } else + pci_write_config_dword(pdev, reg + 12, format); +} + +/** + * sc1200_qc_issue_prot - command issue + * @qc: command pending + * + * Called when the libata layer is about to issue a command. We wrap + * this interface so that we can load the correct ATA timings if + * neccessary. Specifically we have a problem that there is only + * one MWDMA/UDMA bit. + */ + +static unsigned int sc1200_qc_issue_prot(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_device *adev = qc->dev; + struct ata_device *prev = ap->private_data; + + /* See if the DMA settings could be wrong */ + if (adev->dma_mode != 0 && adev != prev && prev != NULL) { + /* Maybe, but do the channels match MWDMA/UDMA ? */ + if ((adev->dma_mode >= XFER_UDMA_0 && prev->dma_mode < XFER_UDMA_0) || + (adev->dma_mode < XFER_UDMA_0 && prev->dma_mode >= XFER_UDMA_0)) + /* Switch the mode bits */ + sc1200_set_dmamode(ap, adev); + } + + return ata_qc_issue_prot(qc); +} + +static struct scsi_host_template sc1200_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations sc1200_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = sc1200_set_piomode, + .set_dmamode = sc1200_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = ata_bmdma_error_handler, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = sc1200_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * sc1200_init_one - Initialise an SC1200 + * @dev: PCI device + * @id: Entry in match table + * + * Just throw the needed data at the libata helper and it does all + * our work. + */ + +static int sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &sc1200_sht, + .flags = ATA_FLAG_SLAVE_POSS|ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x07, + .port_ops = &sc1200_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + + /* Can't enable port 2 yet, see top comments */ + return ata_pci_init_one(dev, port_info, 1); +} + +static struct pci_device_id sc1200[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_IDE), }, + { 0, }, +}; + +static struct pci_driver sc1200_pci_driver = { + .name = DRV_NAME, + .id_table = sc1200, + .probe = sc1200_init_one, + .remove = ata_pci_remove_one +}; + +static int __init sc1200_init(void) +{ + return pci_register_driver(&sc1200_pci_driver); +} + + +static void __exit sc1200_exit(void) +{ + pci_unregister_driver(&sc1200_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox, Mark Lord"); +MODULE_DESCRIPTION("low-level driver for the NS/AMD SC1200"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, sc1200); +MODULE_VERSION(DRV_VERSION); + +module_init(sc1200_init); +module_exit(sc1200_exit); diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c new file mode 100644 index 000000000000..3ec30036c978 --- /dev/null +++ b/drivers/ata/pata_serverworks.c @@ -0,0 +1,587 @@ +/* + * ata-serverworks.c - Serverworks PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * based upon + * + * serverworks.c + * + * Copyright (C) 1998-2000 Michel Aubry + * Copyright (C) 1998-2000 Andrzej Krzysztofowicz + * Copyright (C) 1998-2000 Andre Hedrick + * Portions copyright (c) 2001 Sun Microsystems + * + * + * RCC/ServerWorks IDE driver for Linux + * + * OSB4: `Open South Bridge' IDE Interface (fn 1) + * supports UDMA mode 2 (33 MB/s) + * + * CSB5: `Champion South Bridge' IDE Interface (fn 1) + * all revisions support UDMA mode 4 (66 MB/s) + * revision A2.0 and up support UDMA mode 5 (100 MB/s) + * + * *** The CSB5 does not provide ANY register *** + * *** to detect 80-conductor cable presence. *** + * + * CSB6: `Champion South Bridge' IDE Interface (optional: third channel) + * + * Documentation: + * Available under NDA only. Errata info very hard to get. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_serverworks" +#define DRV_VERSION "0.3.6" + +#define SVWKS_CSB5_REVISION_NEW 0x92 /* min PCI_REVISION_ID for UDMA5 (A2.0) */ +#define SVWKS_CSB6_REVISION 0xa0 /* min PCI_REVISION_ID for UDMA4 (A1.0) */ + +/* Seagate Barracuda ATA IV Family drives in UDMA mode 5 + * can overrun their FIFOs when used with the CSB5 */ + +static const char *csb_bad_ata100[] = { + "ST320011A", + "ST340016A", + "ST360021A", + "ST380021A", + NULL +}; + +/** + * dell_cable - Dell serverworks cable detection + * @ap: ATA port to do cable detect + * + * Dell hide the 40/80 pin select for their interfaces in the top two + * bits of the subsystem ID. + */ + +static int dell_cable(struct ata_port *ap) { + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (pdev->subsystem_device & (1 << (ap->port_no + 14))) + return ATA_CBL_PATA80; + return ATA_CBL_PATA40; +} + +/** + * sun_cable - Sun Cobalt 'Alpine' cable detection + * @ap: ATA port to do cable select + * + * Cobalt CSB5 IDE hides the 40/80pin in the top two bits of the + * subsystem ID the same as dell. We could use one function but we may + * need to extend the Dell one in future + */ + +static int sun_cable(struct ata_port *ap) { + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (pdev->subsystem_device & (1 << (ap->port_no + 14))) + return ATA_CBL_PATA80; + return ATA_CBL_PATA40; +} + +/** + * osb4_cable - OSB4 cable detect + * @ap: ATA port to check + * + * The OSB4 isn't UDMA66 capable so this is easy + */ + +static int osb4_cable(struct ata_port *ap) { + return ATA_CBL_PATA40; +} + +/** + * csb4_cable - CSB5/6 cable detect + * @ap: ATA port to check + * + * Serverworks default arrangement is to use the drive side detection + * only. + */ + +static int csb_cable(struct ata_port *ap) { + return ATA_CBL_PATA80; +} + +struct sv_cable_table { + int device; + int subvendor; + int (*cable_detect)(struct ata_port *ap); +}; + +/* + * Note that we don't copy the old serverworks code because the old + * code contains obvious mistakes + */ + +static struct sv_cable_table cable_detect[] = { + { PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, PCI_VENDOR_ID_DELL, dell_cable }, + { PCI_DEVICE_ID_SERVERWORKS_CSB6IDE, PCI_VENDOR_ID_DELL, dell_cable }, + { PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, PCI_VENDOR_ID_SUN, sun_cable }, + { PCI_DEVICE_ID_SERVERWORKS_OSB4, PCI_ANY_ID, osb4_cable }, + { PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, PCI_ANY_ID, csb_cable }, + { PCI_DEVICE_ID_SERVERWORKS_CSB6IDE, PCI_ANY_ID, csb_cable }, + { PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2, PCI_ANY_ID, csb_cable }, + { PCI_DEVICE_ID_SERVERWORKS_HT1000IDE, PCI_ANY_ID, csb_cable }, + { } +}; + +/** + * serverworks_pre_reset - cable detection + * @ap: ATA port + * + * Perform cable detection according to the device and subvendor + * identifications + */ + +static int serverworks_pre_reset(struct ata_port *ap) { + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct sv_cable_table *cb = cable_detect; + + while(cb->device) { + if (cb->device == pdev->device && + (cb->subvendor == pdev->subsystem_vendor || + cb->subvendor == PCI_ANY_ID)) { + ap->cbl = cb->cable_detect(ap); + return ata_std_prereset(ap); + } + cb++; + } + + BUG(); + return -1; /* kill compiler warning */ +} + +static void serverworks_error_handler(struct ata_port *ap) +{ + return ata_bmdma_drive_eh(ap, serverworks_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * serverworks_is_csb - Check for CSB or OSB + * @pdev: PCI device to check + * + * Returns true if the device being checked is known to be a CSB + * series device. + */ + +static u8 serverworks_is_csb(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE: + case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE: + case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2: + case PCI_DEVICE_ID_SERVERWORKS_HT1000IDE: + return 1; + default: + break; + } + return 0; +} + +/** + * serverworks_osb4_filter - mode selection filter + * @ap: ATA interface + * @adev: ATA device + * + * Filter the offered modes for the device to apply controller + * specific rules. OSB4 requires no UDMA for disks due to a FIFO + * bug we hit. + */ + +static unsigned long serverworks_osb4_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) +{ + if (adev->class == ATA_DEV_ATA) + mask &= ~ATA_MASK_UDMA; + return ata_pci_default_filter(ap, adev, mask); +} + + +/** + * serverworks_csb_filter - mode selection filter + * @ap: ATA interface + * @adev: ATA device + * + * Check the blacklist and disable UDMA5 if matched + */ + +static unsigned long serverworks_csb_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) +{ + const char *p; + char model_num[40]; + int len, i; + + /* Disk, UDMA */ + if (adev->class != ATA_DEV_ATA) + return ata_pci_default_filter(ap, adev, mask); + + /* Actually do need to check */ + ata_id_string(adev->id, model_num, ATA_ID_PROD_OFS, sizeof(model_num)); + /* Precuationary - why not do this in the libata core ?? */ + + len = strlen(model_num); + while ((len > 0) && (model_num[len - 1] == ' ')) { + len--; + model_num[len] = 0; + } + + for(i = 0; (p = csb_bad_ata100[i]) != NULL; i++) { + if (!strncmp(p, model_num, len)) + mask &= ~(0x1F << ATA_SHIFT_UDMA); + } + return ata_pci_default_filter(ap, adev, mask); +} + + +/** + * serverworks_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the OSB4/CSB5 timing registers for PIO. The PIO register + * load is done as a simple lookup. + */ +static void serverworks_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static const u8 pio_mode[] = { 0x5d, 0x47, 0x34, 0x22, 0x20 }; + int offset = 1 + (2 * ap->port_no) - adev->devno; + int devbits = (2 * ap->port_no + adev->devno) * 4; + u16 csb5_pio; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int pio = adev->pio_mode - XFER_PIO_0; + + pci_write_config_byte(pdev, 0x40 + offset, pio_mode[pio]); + + /* The OSB4 just requires the timing but the CSB series want the + mode number as well */ + if (serverworks_is_csb(pdev)) { + pci_read_config_word(pdev, 0x4A, &csb5_pio); + csb5_pio &= ~(0x0F << devbits); + pci_write_config_byte(pdev, 0x4A, csb5_pio | (pio << devbits)); + } +} + +/** + * serverworks_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the MWDMA/UDMA modes for the serverworks OSB4/CSB5 + * chipset. The MWDMA mode values are pulled from a lookup table + * while the chipset uses mode number for UDMA. + */ + +static void serverworks_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const u8 dma_mode[] = { 0x77, 0x21, 0x20 }; + int offset = 1 + 2 * ap->port_no - adev->devno; + int devbits = (2 * ap->port_no + adev->devno); + u8 ultra; + u8 ultra_cfg; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_byte(pdev, 0x54, &ultra_cfg); + + if (adev->dma_mode >= XFER_UDMA_0) { + pci_write_config_byte(pdev, 0x44 + offset, 0x20); + + pci_read_config_byte(pdev, 0x56 + ap->port_no, &ultra); + ultra &= ~(0x0F << (ap->port_no * 4)); + ultra |= (adev->dma_mode - XFER_UDMA_0) + << (ap->port_no * 4); + pci_write_config_byte(pdev, 0x56 + ap->port_no, ultra); + + ultra_cfg |= (1 << devbits); + } else { + pci_write_config_byte(pdev, 0x44 + offset, + dma_mode[adev->dma_mode - XFER_MW_DMA_0]); + ultra_cfg &= ~(1 << devbits); + } + pci_write_config_byte(pdev, 0x54, ultra_cfg); +} + +static struct scsi_host_template serverworks_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations serverworks_osb4_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = serverworks_set_piomode, + .set_dmamode = serverworks_set_dmamode, + .mode_filter = serverworks_osb4_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = serverworks_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations serverworks_csb_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = serverworks_set_piomode, + .set_dmamode = serverworks_set_dmamode, + .mode_filter = serverworks_csb_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = serverworks_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int serverworks_fixup_osb4(struct pci_dev *pdev) +{ + u32 reg; + struct pci_dev *isa_dev = pci_get_device(PCI_VENDOR_ID_SERVERWORKS, + PCI_DEVICE_ID_SERVERWORKS_OSB4, NULL); + if (isa_dev) { + pci_read_config_dword(isa_dev, 0x64, ®); + reg &= ~0x00002000; /* disable 600ns interrupt mask */ + if (!(reg & 0x00004000)) + printk(KERN_DEBUG DRV_NAME ": UDMA not BIOS enabled.\n"); + reg |= 0x00004000; /* enable UDMA/33 support */ + pci_write_config_dword(isa_dev, 0x64, reg); + pci_dev_put(isa_dev); + return 0; + } + printk(KERN_WARNING "ata_serverworks: Unable to find bridge.\n"); + return -ENODEV; +} + +static int serverworks_fixup_csb(struct pci_dev *pdev) +{ + u8 rev; + u8 btr; + + pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); + + /* Third Channel Test */ + if (!(PCI_FUNC(pdev->devfn) & 1)) { + struct pci_dev * findev = NULL; + u32 reg4c = 0; + findev = pci_get_device(PCI_VENDOR_ID_SERVERWORKS, + PCI_DEVICE_ID_SERVERWORKS_CSB5, NULL); + if (findev) { + pci_read_config_dword(findev, 0x4C, ®4c); + reg4c &= ~0x000007FF; + reg4c |= 0x00000040; + reg4c |= 0x00000020; + pci_write_config_dword(findev, 0x4C, reg4c); + pci_dev_put(findev); + } + } else { + struct pci_dev * findev = NULL; + u8 reg41 = 0; + + findev = pci_get_device(PCI_VENDOR_ID_SERVERWORKS, + PCI_DEVICE_ID_SERVERWORKS_CSB6, NULL); + if (findev) { + pci_read_config_byte(findev, 0x41, ®41); + reg41 &= ~0x40; + pci_write_config_byte(findev, 0x41, reg41); + pci_dev_put(findev); + } + } + /* setup the UDMA Control register + * + * 1. clear bit 6 to enable DMA + * 2. enable DMA modes with bits 0-1 + * 00 : legacy + * 01 : udma2 + * 10 : udma2/udma4 + * 11 : udma2/udma4/udma5 + */ + pci_read_config_byte(pdev, 0x5A, &btr); + btr &= ~0x40; + if (!(PCI_FUNC(pdev->devfn) & 1)) + btr |= 0x2; + else + btr |= (rev >= SVWKS_CSB5_REVISION_NEW) ? 0x3 : 0x2; + pci_write_config_byte(pdev, 0x5A, btr); + + return btr; +} + +static void serverworks_fixup_ht1000(struct pci_dev *pdev) +{ + u8 btr; + /* Setup HT1000 SouthBridge Controller - Single Channel Only */ + pci_read_config_byte(pdev, 0x5A, &btr); + btr &= ~0x40; + btr |= 0x3; + pci_write_config_byte(pdev, 0x5A, btr); +} + + +static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int ports = 2; + static struct ata_port_info info[4] = { + { /* OSB4 */ + .sht = &serverworks_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x07, + .port_ops = &serverworks_osb4_port_ops + }, { /* OSB4 no UDMA */ + .sht = &serverworks_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x00, + .port_ops = &serverworks_osb4_port_ops + }, { /* CSB5 */ + .sht = &serverworks_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, + .port_ops = &serverworks_csb_port_ops + }, { /* CSB5 - later revisions*/ + .sht = &serverworks_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &serverworks_csb_port_ops + } + }; + static struct ata_port_info *port_info[2]; + struct ata_port_info *devinfo = &info[id->driver_data]; + + /* Force master latency timer to 64 PCI clocks */ + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40); + + /* OSB4 : South Bridge and IDE */ + if (pdev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) { + /* Select non UDMA capable OSB4 if we can't do fixups */ + if ( serverworks_fixup_osb4(pdev) < 0) + devinfo = &info[1]; + } + /* setup CSB5/CSB6 : South Bridge and IDE option RAID */ + else if ((pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) || + (pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) || + (pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) { + + /* If the returned btr is the newer revision then + select the right info block */ + if (serverworks_fixup_csb(pdev) == 3) + devinfo = &info[3]; + + /* Is this the 3rd channel CSB6 IDE ? */ + if (pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2) + ports = 1; + } + /* setup HT1000E */ + else if (pdev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) + serverworks_fixup_ht1000(pdev); + + if (pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) + ata_pci_clear_simplex(pdev); + + port_info[0] = port_info[1] = devinfo; + return ata_pci_init_one(pdev, port_info, ports); +} + +static struct pci_device_id serverworks[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE), 0}, + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE), 2}, + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE), 2}, + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2), 2}, + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000IDE), 2}, + { 0, }, +}; + +static struct pci_driver serverworks_pci_driver = { + .name = DRV_NAME, + .id_table = serverworks, + .probe = serverworks_init_one, + .remove = ata_pci_remove_one +}; + +static int __init serverworks_init(void) +{ + return pci_register_driver(&serverworks_pci_driver); +} + + +static void __exit serverworks_exit(void) +{ + pci_unregister_driver(&serverworks_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Serverworks OSB4/CSB5/CSB6"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, serverworks); +MODULE_VERSION(DRV_VERSION); + +module_init(serverworks_init); +module_exit(serverworks_exit); diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c new file mode 100644 index 000000000000..8f7db9638d0a --- /dev/null +++ b/drivers/ata/pata_sil680.c @@ -0,0 +1,381 @@ +/* + * pata_sil680.c - SIL680 PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * based upon + * + * linux/drivers/ide/pci/siimage.c Version 1.07 Nov 30, 2003 + * + * Copyright (C) 2001-2002 Andre Hedrick + * Copyright (C) 2003 Red Hat + * + * May be copied or modified under the terms of the GNU General Public License + * + * Documentation publically available. + * + * If you have strange problems with nVidia chipset systems please + * see the SI support documentation and update your system BIOS + * if neccessary + * + * TODO + * If we know all our devices are LBA28 (or LBA28 sized) we could use + * the command fifo mode. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_sil680" +#define DRV_VERSION "0.3.2" + +/** + * sil680_selreg - return register base + * @hwif: interface + * @r: config offset + * + * Turn a config register offset into the right address in either + * PCI space or MMIO space to access the control register in question + * Thankfully this is a configuration operation so isnt performance + * criticial. + */ + +static unsigned long sil680_selreg(struct ata_port *ap, int r) +{ + unsigned long base = 0xA0 + r; + base += (ap->port_no << 4); + return base; +} + +/** + * sil680_seldev - return register base + * @hwif: interface + * @r: config offset + * + * Turn a config register offset into the right address in either + * PCI space or MMIO space to access the control register in question + * including accounting for the unit shift. + */ + +static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r) +{ + unsigned long base = 0xA0 + r; + base += (ap->port_no << 4); + base |= adev->devno ? 2 : 0; + return base; +} + + +/** + * sil680_cable_detect - cable detection + * @ap: ATA port + * + * Perform cable detection. The SIL680 stores this in PCI config + * space for us. + */ + +static int sil680_cable_detect(struct ata_port *ap) { + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + unsigned long addr = sil680_selreg(ap, 0); + u8 ata66; + pci_read_config_byte(pdev, addr, &ata66); + if (ata66 & 1) + return ATA_CBL_PATA80; + else + return ATA_CBL_PATA40; +} + +static int sil680_pre_reset(struct ata_port *ap) +{ + ap->cbl = sil680_cable_detect(ap); + return ata_std_prereset(ap); +} + +/** + * sil680_bus_reset - reset the SIL680 bus + * @ap: ATA port to reset + * + * Perform the SIL680 housekeeping when doing an ATA bus reset + */ + +static int sil680_bus_reset(struct ata_port *ap,unsigned int *classes) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + unsigned long addr = sil680_selreg(ap, 0); + u8 reset; + + pci_read_config_byte(pdev, addr, &reset); + pci_write_config_byte(pdev, addr, reset | 0x03); + udelay(25); + pci_write_config_byte(pdev, addr, reset); + return ata_std_softreset(ap, classes); +} + +static void sil680_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, sil680_pre_reset, sil680_bus_reset, NULL, ata_std_postreset); +} + +/** + * sil680_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the SIL680 registers for PIO mode. Note that the task speed + * registers are shared between the devices so we must pick the lowest + * mode for command work. + */ + +static void sil680_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static u16 speed_p[5] = { 0x328A, 0x2283, 0x1104, 0x10C3, 0x10C1 }; + static u16 speed_t[5] = { 0x328A, 0x1281, 0x1281, 0x10C3, 0x10C1 }; + + unsigned long tfaddr = sil680_selreg(ap, 0x02); + unsigned long addr = sil680_seldev(ap, adev, 0x04); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int pio = adev->pio_mode - XFER_PIO_0; + int lowest_pio = pio; + u16 reg; + + struct ata_device *pair = ata_dev_pair(adev); + + if (pair != NULL && adev->pio_mode > pair->pio_mode) + lowest_pio = pair->pio_mode - XFER_PIO_0; + + pci_write_config_word(pdev, addr, speed_p[pio]); + pci_write_config_word(pdev, tfaddr, speed_t[lowest_pio]); + + pci_read_config_word(pdev, tfaddr-2, ®); + reg &= ~0x0200; /* Clear IORDY */ + if (ata_pio_need_iordy(adev)) + reg |= 0x0200; /* Enable IORDY */ + pci_write_config_word(pdev, tfaddr-2, reg); +} + +/** + * sil680_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Program the MWDMA/UDMA modes for the sil680 k + * chipset. The MWDMA mode values are pulled from a lookup table + * while the chipset uses mode number for UDMA. + */ + +static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static u8 ultra_table[2][7] = { + { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01, 0xFF }, /* 100MHz */ + { 0x0F, 0x0B, 0x07, 0x05, 0x03, 0x02, 0x01 }, /* 133Mhz */ + }; + static u16 dma_table[3] = { 0x2208, 0x10C2, 0x10C1 }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + unsigned long ma = sil680_seldev(ap, adev, 0x08); + unsigned long ua = sil680_seldev(ap, adev, 0x0C); + unsigned long addr_mask = 0x80 + 4 * ap->port_no; + int port_shift = adev->devno * 4; + u8 scsc, mode; + u16 multi, ultra; + + pci_read_config_byte(pdev, 0x8A, &scsc); + pci_read_config_byte(pdev, addr_mask, &mode); + pci_read_config_word(pdev, ma, &multi); + pci_read_config_word(pdev, ua, &ultra); + + /* Mask timing bits */ + ultra &= ~0x3F; + mode &= ~(0x03 << port_shift); + + /* Extract scsc */ + scsc = (scsc & 0x30) ? 1: 0; + + if (adev->dma_mode >= XFER_UDMA_0) { + multi = 0x10C1; + ultra |= ultra_table[scsc][adev->dma_mode - XFER_UDMA_0]; + mode |= (0x03 << port_shift); + } else { + multi = dma_table[adev->dma_mode - XFER_MW_DMA_0]; + mode |= (0x02 << port_shift); + } + pci_write_config_byte(pdev, addr_mask, mode); + pci_write_config_word(pdev, ma, multi); + pci_write_config_word(pdev, ua, ultra); +} + +static struct scsi_host_template sil680_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations sil680_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = sil680_set_piomode, + .set_dmamode = sil680_set_dmamode, + .mode_filter = ata_pci_default_filter, + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = sil680_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int sil680_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &sil680_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, + .port_ops = &sil680_port_ops + }; + static struct ata_port_info info_slow = { + .sht = &sil680_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &sil680_port_ops + }; + static struct ata_port_info *port_info[2] = {&info, &info}; + static int printed_version; + u32 class_rev = 0; + u8 tmpbyte = 0; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); + + pci_read_config_dword(pdev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xff; + /* FIXME: double check */ + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, (class_rev) ? 1 : 255); + + pci_write_config_byte(pdev, 0x80, 0x00); + pci_write_config_byte(pdev, 0x84, 0x00); + + pci_read_config_byte(pdev, 0x8A, &tmpbyte); + + printk(KERN_INFO "sil680: BA5_EN = %d clock = %02X\n", + tmpbyte & 1, tmpbyte & 0x30); + + switch(tmpbyte & 0x30) { + case 0x00: + /* 133 clock attempt to force it on */ + pci_write_config_byte(pdev, 0x8A, tmpbyte|0x10); + break; + case 0x30: + /* if clocking is disabled */ + /* 133 clock attempt to force it on */ + pci_write_config_byte(pdev, 0x8A, tmpbyte & ~0x20); + break; + case 0x10: + /* 133 already */ + break; + case 0x20: + /* BIOS set PCI x2 clocking */ + break; + } + + pci_read_config_byte(pdev, 0x8A, &tmpbyte); + printk(KERN_INFO "sil680: BA5_EN = %d clock = %02X\n", + tmpbyte & 1, tmpbyte & 0x30); + if ((tmpbyte & 0x30) == 0) + port_info[0] = port_info[1] = &info_slow; + + pci_write_config_byte(pdev, 0xA1, 0x72); + pci_write_config_word(pdev, 0xA2, 0x328A); + pci_write_config_dword(pdev, 0xA4, 0x62DD62DD); + pci_write_config_dword(pdev, 0xA8, 0x43924392); + pci_write_config_dword(pdev, 0xAC, 0x40094009); + pci_write_config_byte(pdev, 0xB1, 0x72); + pci_write_config_word(pdev, 0xB2, 0x328A); + pci_write_config_dword(pdev, 0xB4, 0x62DD62DD); + pci_write_config_dword(pdev, 0xB8, 0x43924392); + pci_write_config_dword(pdev, 0xBC, 0x40094009); + + switch(tmpbyte & 0x30) { + case 0x00: printk(KERN_INFO "sil680: 100MHz clock.\n");break; + case 0x10: printk(KERN_INFO "sil680: 133MHz clock.\n");break; + case 0x20: printk(KERN_INFO "sil680: Using PCI clock.\n");break; + /* This last case is _NOT_ ok */ + case 0x30: printk(KERN_ERR "sil680: Clock disabled ?\n"); + return -EIO; + } + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id sil680[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_SII_680), }, + { 0, }, +}; + +static struct pci_driver sil680_pci_driver = { + .name = DRV_NAME, + .id_table = sil680, + .probe = sil680_init_one, + .remove = ata_pci_remove_one +}; + +static int __init sil680_init(void) +{ + return pci_register_driver(&sil680_pci_driver); +} + + +static void __exit sil680_exit(void) +{ + pci_unregister_driver(&sil680_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for SI680 PATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, sil680); +MODULE_VERSION(DRV_VERSION); + +module_init(sil680_init); +module_exit(sil680_exit); diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c new file mode 100644 index 000000000000..4ad7fb44bfa6 --- /dev/null +++ b/drivers/ata/pata_sis.c @@ -0,0 +1,1030 @@ +/* + * pata_sis.c - SiS ATA driver + * + * (C) 2005 Red Hat + * + * Based upon linux/drivers/ide/pci/sis5513.c + * Copyright (C) 1999-2000 Andre Hedrick + * Copyright (C) 2002 Lionel Bouton , Maintainer + * Copyright (C) 2003 Vojtech Pavlik + * SiS Taiwan : for direct support and hardware. + * Daniela Engert : for initial ATA100 advices and numerous others. + * John Fremlin, Manfred Spraul, Dave Morgan, Peter Kjellerstedt : + * for checking code correctness, providing patches. + * Original tests and design on the SiS620 chipset. + * ATA100 tests and design on the SiS735 chipset. + * ATA16/33 support from specs + * ATA133 support for SiS961/962 by L.C. Chang + * + * + * TODO + * Check MWDMA on drives that don't support MWDMA speed pio cycles ? + * More Testing + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_sis" +#define DRV_VERSION "0.4.2" + +struct sis_chipset { + u16 device; /* PCI host ID */ + struct ata_port_info *info; /* Info block */ + /* Probably add family, cable detect type etc here to clean + up code later */ +}; + +/** + * sis_port_base - return PCI configuration base for dev + * @adev: device + * + * Returns the base of the PCI configuration registers for this port + * number. + */ + +static int sis_port_base(struct ata_device *adev) +{ + return 0x40 + (4 * adev->ap->port_no) + (2 * adev->devno); +} + +/** + * sis_133_pre_reset - check for 40/80 pin + * @ap: Port + * + * Perform cable detection for the later UDMA133 capable + * SiS chipset. + */ + +static int sis_133_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits sis_enable_bits[] = { + { 0x4aU, 1U, 0x02UL, 0x02UL }, /* port 0 */ + { 0x4aU, 1U, 0x04UL, 0x04UL }, /* port 1 */ + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u16 tmp; + + if (!pci_test_config_bits(pdev, &sis_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + /* The top bit of this register is the cable detect bit */ + pci_read_config_word(pdev, 0x50 + 2 * ap->port_no, &tmp); + if (tmp & 0x8000) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + + return ata_std_prereset(ap); +} + +/** + * sis_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_133_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, sis_133_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + + +/** + * sis_66_pre_reset - check for 40/80 pin + * @ap: Port + * + * Perform cable detection on the UDMA66, UDMA100 and early UDMA133 + * SiS IDE controllers. + */ + +static int sis_66_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits sis_enable_bits[] = { + { 0x4aU, 1U, 0x02UL, 0x02UL }, /* port 0 */ + { 0x4aU, 1U, 0x04UL, 0x04UL }, /* port 1 */ + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 tmp; + + if (!pci_test_config_bits(pdev, &sis_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + /* Older chips keep cable detect in bits 4/5 of reg 0x48 */ + pci_read_config_byte(pdev, 0x48, &tmp); + tmp >>= ap->port_no; + if (tmp & 0x10) + ap->cbl = ATA_CBL_PATA40; + else + ap->cbl = ATA_CBL_PATA80; + + return ata_std_prereset(ap); +} + +/** + * sis_66_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * @classes: + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_66_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, sis_66_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * sis_old_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int sis_old_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits sis_enable_bits[] = { + { 0x4aU, 1U, 0x02UL, 0x02UL }, /* port 0 */ + { 0x4aU, 1U, 0x04UL, 0x04UL }, /* port 1 */ + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (!pci_test_config_bits(pdev, &sis_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + + +/** + * sis_old_error_handler - Probe specified port on PATA host controller + * @ap: Port to probe + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_old_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, sis_old_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * sis_set_fifo - Set RWP fifo bits for this device + * @ap: Port + * @adev: Device + * + * SIS chipsets implement prefetch/postwrite bits for each device + * on both channels. This functionality is not ATAPI compatible and + * must be configured according to the class of device present + */ + +static void sis_set_fifo(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u8 fifoctrl; + u8 mask = 0x11; + + mask <<= (2 * ap->port_no); + mask <<= adev->devno; + + /* This holds various bits including the FIFO control */ + pci_read_config_byte(pdev, 0x4B, &fifoctrl); + fifoctrl &= ~mask; + + /* Enable for ATA (disk) only */ + if (adev->class == ATA_DEV_ATA) + fifoctrl |= mask; + pci_write_config_byte(pdev, 0x4B, fifoctrl); +} + +/** + * sis_old_set_piomode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring for. + * + * Set PIO mode for device, in host controller PCI config space. This + * function handles PIO set up for all chips that are pre ATA100 and + * also early ATA100 devices. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_old_set_piomode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int port = sis_port_base(adev); + u8 t1, t2; + int speed = adev->pio_mode - XFER_PIO_0; + + const u8 active[] = { 0x00, 0x07, 0x04, 0x03, 0x01 }; + const u8 recovery[] = { 0x00, 0x06, 0x04, 0x03, 0x03 }; + + sis_set_fifo(ap, adev); + + pci_read_config_byte(pdev, port, &t1); + pci_read_config_byte(pdev, port + 1, &t2); + + t1 &= ~0x0F; /* Clear active/recovery timings */ + t2 &= ~0x07; + + t1 |= active[speed]; + t2 |= recovery[speed]; + + pci_write_config_byte(pdev, port, t1); + pci_write_config_byte(pdev, port + 1, t2); +} + +/** + * sis_100_set_pioode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring for. + * + * Set PIO mode for device, in host controller PCI config space. This + * function handles PIO set up for ATA100 devices and early ATA133. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_100_set_piomode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int port = sis_port_base(adev); + int speed = adev->pio_mode - XFER_PIO_0; + + const u8 actrec[] = { 0x00, 0x67, 0x44, 0x33, 0x31 }; + + sis_set_fifo(ap, adev); + + pci_write_config_byte(pdev, port, actrec[speed]); +} + +/** + * sis_133_set_pioode - Initialize host controller PATA PIO timings + * @ap: Port whose timings we are configuring + * @adev: Device we are configuring for. + * + * Set PIO mode for device, in host controller PCI config space. This + * function handles PIO set up for the later ATA133 devices. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_133_set_piomode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int port = 0x40; + u32 t1; + u32 reg54; + int speed = adev->pio_mode - XFER_PIO_0; + + const u32 timing133[] = { + 0x28269000, /* Recovery << 24 | Act << 16 | Ini << 12 */ + 0x0C266000, + 0x04263000, + 0x0C0A3000, + 0x05093000 + }; + const u32 timing100[] = { + 0x1E1C6000, /* Recovery << 24 | Act << 16 | Ini << 12 */ + 0x091C4000, + 0x031C2000, + 0x09072000, + 0x04062000 + }; + + sis_set_fifo(ap, adev); + + /* If bit 14 is set then the registers are mapped at 0x70 not 0x40 */ + pci_read_config_dword(pdev, 0x54, ®54); + if (reg54 & 0x40000000) + port = 0x70; + port += 8 * ap->port_no + 4 * adev->devno; + + pci_read_config_dword(pdev, port, &t1); + t1 &= 0xC0C00FFF; /* Mask out timing */ + + if (t1 & 0x08) /* 100 or 133 ? */ + t1 |= timing133[speed]; + else + t1 |= timing100[speed]; + pci_write_config_byte(pdev, port, t1); +} + +/** + * sis_old_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * + * Set UDMA/MWDMA mode for device, in host controller PCI config space. + * Handles pre UDMA and UDMA33 devices. Supports MWDMA as well unlike + * the old ide/pci driver. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_old_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int speed = adev->dma_mode - XFER_MW_DMA_0; + int drive_pci = sis_port_base(adev); + u16 timing; + + const u16 mwdma_bits[] = { 0x707, 0x202, 0x202 }; + const u16 udma_bits[] = { 0xE000, 0xC000, 0xA000 }; + + pci_read_config_word(pdev, drive_pci, &timing); + + if (adev->dma_mode < XFER_UDMA_0) { + /* bits 3-0 hold recovery timing bits 8-10 active timing and + the higer bits are dependant on the device */ + timing &= ~ 0x870F; + timing |= mwdma_bits[speed]; + pci_write_config_word(pdev, drive_pci, timing); + } else { + /* Bit 15 is UDMA on/off, bit 13-14 are cycle time */ + speed = adev->dma_mode - XFER_UDMA_0; + timing &= ~0x6000; + timing |= udma_bits[speed]; + } +} + +/** + * sis_66_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * + * Set UDMA/MWDMA mode for device, in host controller PCI config space. + * Handles UDMA66 and early UDMA100 devices. Supports MWDMA as well unlike + * the old ide/pci driver. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_66_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int speed = adev->dma_mode - XFER_MW_DMA_0; + int drive_pci = sis_port_base(adev); + u16 timing; + + const u16 mwdma_bits[] = { 0x707, 0x202, 0x202 }; + const u16 udma_bits[] = { 0xF000, 0xD000, 0xB000, 0xA000, 0x9000}; + + pci_read_config_word(pdev, drive_pci, &timing); + + if (adev->dma_mode < XFER_UDMA_0) { + /* bits 3-0 hold recovery timing bits 8-10 active timing and + the higer bits are dependant on the device, bit 15 udma */ + timing &= ~ 0x870F; + timing |= mwdma_bits[speed]; + } else { + /* Bit 15 is UDMA on/off, bit 12-14 are cycle time */ + speed = adev->dma_mode - XFER_UDMA_0; + timing &= ~0x6000; + timing |= udma_bits[speed]; + } + pci_write_config_word(pdev, drive_pci, timing); +} + +/** + * sis_100_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * + * Set UDMA/MWDMA mode for device, in host controller PCI config space. + * Handles UDMA66 and early UDMA100 devices. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_100_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int speed = adev->dma_mode - XFER_MW_DMA_0; + int drive_pci = sis_port_base(adev); + u16 timing; + + const u16 udma_bits[] = { 0x8B00, 0x8700, 0x8500, 0x8300, 0x8200, 0x8100}; + + pci_read_config_word(pdev, drive_pci, &timing); + + if (adev->dma_mode < XFER_UDMA_0) { + /* NOT SUPPORTED YET: NEED DATA SHEET. DITTO IN OLD DRIVER */ + } else { + /* Bit 15 is UDMA on/off, bit 12-14 are cycle time */ + speed = adev->dma_mode - XFER_UDMA_0; + timing &= ~0x0F00; + timing |= udma_bits[speed]; + } + pci_write_config_word(pdev, drive_pci, timing); +} + +/** + * sis_133_early_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * + * Set UDMA/MWDMA mode for device, in host controller PCI config space. + * Handles early SiS 961 bridges. Supports MWDMA as well unlike + * the old ide/pci driver. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_133_early_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int speed = adev->dma_mode - XFER_MW_DMA_0; + int drive_pci = sis_port_base(adev); + u16 timing; + + const u16 udma_bits[] = { 0x8F00, 0x8A00, 0x8700, 0x8500, 0x8300, 0x8200, 0x8100}; + + pci_read_config_word(pdev, drive_pci, &timing); + + if (adev->dma_mode < XFER_UDMA_0) { + /* NOT SUPPORTED YET: NEED DATA SHEET. DITTO IN OLD DRIVER */ + } else { + /* Bit 15 is UDMA on/off, bit 12-14 are cycle time */ + speed = adev->dma_mode - XFER_UDMA_0; + timing &= ~0x0F00; + timing |= udma_bits[speed]; + } + pci_write_config_word(pdev, drive_pci, timing); +} + +/** + * sis_133_set_dmamode - Initialize host controller PATA DMA timings + * @ap: Port whose timings we are configuring + * @adev: Device to program + * + * Set UDMA/MWDMA mode for device, in host controller PCI config space. + * Handles early SiS 961 bridges. Supports MWDMA as well unlike + * the old ide/pci driver. + * + * LOCKING: + * None (inherited from caller). + */ + +static void sis_133_set_dmamode (struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int speed = adev->dma_mode - XFER_MW_DMA_0; + int port = 0x40; + u32 t1; + u32 reg54; + + /* bits 4- cycle time 8 - cvs time */ + const u32 timing_u100[] = { 0x6B0, 0x470, 0x350, 0x140, 0x120, 0x110, 0x000 }; + const u32 timing_u133[] = { 0x9F0, 0x6A0, 0x470, 0x250, 0x230, 0x220, 0x210 }; + + /* If bit 14 is set then the registers are mapped at 0x70 not 0x40 */ + pci_read_config_dword(pdev, 0x54, ®54); + if (reg54 & 0x40000000) + port = 0x70; + port += (8 * ap->port_no) + (4 * adev->devno); + + pci_read_config_dword(pdev, port, &t1); + + if (adev->dma_mode < XFER_UDMA_0) { + t1 &= ~0x00000004; + /* FIXME: need data sheet to add MWDMA here. Also lacking on + ide/pci driver */ + } else { + speed = adev->dma_mode - XFER_UDMA_0; + /* if & 8 no UDMA133 - need info for ... */ + t1 &= ~0x00000FF0; + t1 |= 0x00000004; + if (t1 & 0x08) + t1 |= timing_u133[speed]; + else + t1 |= timing_u100[speed]; + } + pci_write_config_dword(pdev, port, t1); +} + +static struct scsi_host_template sis_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static const struct ata_port_operations sis_133_ops = { + .port_disable = ata_port_disable, + .set_piomode = sis_133_set_piomode, + .set_dmamode = sis_133_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = sis_133_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static const struct ata_port_operations sis_133_early_ops = { + .port_disable = ata_port_disable, + .set_piomode = sis_100_set_piomode, + .set_dmamode = sis_133_early_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = sis_66_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static const struct ata_port_operations sis_100_ops = { + .port_disable = ata_port_disable, + .set_piomode = sis_100_set_piomode, + .set_dmamode = sis_100_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = sis_66_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static const struct ata_port_operations sis_66_ops = { + .port_disable = ata_port_disable, + .set_piomode = sis_old_set_piomode, + .set_dmamode = sis_66_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = sis_66_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static const struct ata_port_operations sis_old_ops = { + .port_disable = ata_port_disable, + .set_piomode = sis_old_set_piomode, + .set_dmamode = sis_old_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = sis_old_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .data_xfer = ata_pio_data_xfer, + + .eng_timeout = ata_eng_timeout, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop, +}; + +static struct ata_port_info sis_info = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, + .udma_mask = 0, + .port_ops = &sis_old_ops, +}; +static struct ata_port_info sis_info33 = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA2, /* UDMA 33 */ + .port_ops = &sis_old_ops, +}; +static struct ata_port_info sis_info66 = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .udma_mask = ATA_UDMA4, /* UDMA 66 */ + .port_ops = &sis_66_ops, +}; +static struct ata_port_info sis_info100 = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .udma_mask = ATA_UDMA5, + .port_ops = &sis_100_ops, +}; +static struct ata_port_info sis_info100_early = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .udma_mask = ATA_UDMA5, + .pio_mask = 0x1f, /* pio0-4 */ + .port_ops = &sis_66_ops, +}; +static struct ata_port_info sis_info133 = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .udma_mask = ATA_UDMA6, + .port_ops = &sis_133_ops, +}; +static struct ata_port_info sis_info133_early = { + .sht = &sis_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, /* pio0-4 */ + .udma_mask = ATA_UDMA6, + .port_ops = &sis_133_early_ops, +}; + + +static void sis_fixup(struct pci_dev *pdev, struct sis_chipset *sis) +{ + u16 regw; + u8 reg; + + if (sis->info == &sis_info133) { + pci_read_config_word(pdev, 0x50, ®w); + if (regw & 0x08) + pci_write_config_word(pdev, 0x50, regw & ~0x08); + pci_read_config_word(pdev, 0x52, ®w); + if (regw & 0x08) + pci_write_config_word(pdev, 0x52, regw & ~0x08); + return; + } + + if (sis->info == &sis_info133_early || sis->info == &sis_info100) { + /* Fix up latency */ + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x80); + /* Set compatibility bit */ + pci_read_config_byte(pdev, 0x49, ®); + if (!(reg & 0x01)) + pci_write_config_byte(pdev, 0x49, reg | 0x01); + return; + } + + if (sis->info == &sis_info66 || sis->info == &sis_info100_early) { + /* Fix up latency */ + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x80); + /* Set compatibility bit */ + pci_read_config_byte(pdev, 0x52, ®); + if (!(reg & 0x04)) + pci_write_config_byte(pdev, 0x52, reg | 0x04); + return; + } + + if (sis->info == &sis_info33) { + pci_read_config_byte(pdev, PCI_CLASS_PROG, ®); + if (( reg & 0x0F ) != 0x00) + pci_write_config_byte(pdev, PCI_CLASS_PROG, reg & 0xF0); + /* Fall through to ATA16 fixup below */ + } + + if (sis->info == &sis_info || sis->info == &sis_info33) { + /* force per drive recovery and active timings + needed on ATA_33 and below chips */ + pci_read_config_byte(pdev, 0x52, ®); + if (!(reg & 0x08)) + pci_write_config_byte(pdev, 0x52, reg|0x08); + return; + } + + BUG(); +} + +/** + * sis_init_one - Register SiS ATA PCI device with kernel services + * @pdev: PCI device to register + * @ent: Entry in sis_pci_tbl matching with @pdev + * + * Called from kernel PCI layer. We probe for combined mode (sigh), + * and then hand over control to libata, for it to do the rest. + * + * LOCKING: + * Inherited from PCI layer (may sleep). + * + * RETURNS: + * Zero on success, or -ERRNO value. + */ + +static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int printed_version; + static struct ata_port_info *port_info[2]; + struct ata_port_info *port; + struct pci_dev *host = NULL; + struct sis_chipset *chipset = NULL; + + static struct sis_chipset sis_chipsets[] = { + { 0x0745, &sis_info100 }, + { 0x0735, &sis_info100 }, + { 0x0733, &sis_info100 }, + { 0x0635, &sis_info100 }, + { 0x0633, &sis_info100 }, + + { 0x0730, &sis_info100_early }, /* 100 with ATA 66 layout */ + { 0x0550, &sis_info100_early }, /* 100 with ATA 66 layout */ + + { 0x0640, &sis_info66 }, + { 0x0630, &sis_info66 }, + { 0x0620, &sis_info66 }, + { 0x0540, &sis_info66 }, + { 0x0530, &sis_info66 }, + + { 0x5600, &sis_info33 }, + { 0x5598, &sis_info33 }, + { 0x5597, &sis_info33 }, + { 0x5591, &sis_info33 }, + { 0x5582, &sis_info33 }, + { 0x5581, &sis_info33 }, + + { 0x5596, &sis_info }, + { 0x5571, &sis_info }, + { 0x5517, &sis_info }, + { 0x5511, &sis_info }, + + {0} + }; + static struct sis_chipset sis133_early = { + 0x0, &sis_info133_early + }; + static struct sis_chipset sis133 = { + 0x0, &sis_info133 + }; + static struct sis_chipset sis100_early = { + 0x0, &sis_info100_early + }; + static struct sis_chipset sis100 = { + 0x0, &sis_info100 + }; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, + "version " DRV_VERSION "\n"); + + /* We have to find the bridge first */ + + for (chipset = &sis_chipsets[0]; chipset->device; chipset++) { + host = pci_get_device(PCI_VENDOR_ID_SI, chipset->device, NULL); + if (host != NULL) { + if (chipset->device == 0x630) { /* SIS630 */ + u8 host_rev; + pci_read_config_byte(host, PCI_REVISION_ID, &host_rev); + if (host_rev >= 0x30) /* 630 ET */ + chipset = &sis100_early; + } + break; + } + } + + /* Look for concealed bridges */ + if (host == NULL) { + /* Second check */ + u32 idemisc; + u16 trueid; + + /* Disable ID masking and register remapping then + see what the real ID is */ + + pci_read_config_dword(pdev, 0x54, &idemisc); + pci_write_config_dword(pdev, 0x54, idemisc & 0x7fffffff); + pci_read_config_word(pdev, PCI_DEVICE_ID, &trueid); + pci_write_config_dword(pdev, 0x54, idemisc); + + switch(trueid) { + case 0x5518: /* SIS 962/963 */ + chipset = &sis133; + if ((idemisc & 0x40000000) == 0) { + pci_write_config_dword(pdev, 0x54, idemisc | 0x40000000); + printk(KERN_INFO "SIS5513: Switching to 5513 register mapping\n"); + } + break; + case 0x0180: /* SIS 965/965L */ + chipset = &sis133; + break; + case 0x1180: /* SIS 966/966L */ + chipset = &sis133; + break; + } + } + + /* Further check */ + if (chipset == NULL) { + struct pci_dev *lpc_bridge; + u16 trueid; + u8 prefctl; + u8 idecfg; + u8 sbrev; + + /* Try the second unmasking technique */ + pci_read_config_byte(pdev, 0x4a, &idecfg); + pci_write_config_byte(pdev, 0x4a, idecfg | 0x10); + pci_read_config_word(pdev, PCI_DEVICE_ID, &trueid); + pci_write_config_byte(pdev, 0x4a, idecfg); + + switch(trueid) { + case 0x5517: + lpc_bridge = pci_get_slot(pdev->bus, 0x10); /* Bus 0 Dev 2 Fn 0 */ + if (lpc_bridge == NULL) + break; + pci_read_config_byte(lpc_bridge, PCI_REVISION_ID, &sbrev); + pci_read_config_byte(pdev, 0x49, &prefctl); + pci_dev_put(lpc_bridge); + + if (sbrev == 0x10 && (prefctl & 0x80)) { + chipset = &sis133_early; + break; + } + chipset = &sis100; + break; + } + } + pci_dev_put(host); + + /* No chipset info, no support */ + if (chipset == NULL) + return -ENODEV; + + port = chipset->info; + port->private_data = chipset; + + sis_fixup(pdev, chipset); + + port_info[0] = port_info[1] = port; + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id sis_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, 0x5513), }, /* SiS 5513 */ + { PCI_DEVICE(PCI_VENDOR_ID_SI, 0x5518), }, /* SiS 5518 */ + { } +}; + +static struct pci_driver sis_pci_driver = { + .name = DRV_NAME, + .id_table = sis_pci_tbl, + .probe = sis_init_one, + .remove = ata_pci_remove_one, +}; + +static int __init sis_init(void) +{ + return pci_register_driver(&sis_pci_driver); +} + +static void __exit sis_exit(void) +{ + pci_unregister_driver(&sis_pci_driver); +} + + +module_init(sis_init); +module_exit(sis_exit); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("SCSI low-level driver for SiS ATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, sis_pci_tbl); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/ata/pata_sl82c105.c b/drivers/ata/pata_sl82c105.c new file mode 100644 index 000000000000..47b290b5604a --- /dev/null +++ b/drivers/ata/pata_sl82c105.c @@ -0,0 +1,388 @@ +/* + * pata_sl82c105.c - SL82C105 PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * Based in part on linux/drivers/ide/pci/sl82c105.c + * SL82C105/Winbond 553 IDE driver + * + * and in part on the documentation and errata sheet + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_sl82c105" +#define DRV_VERSION "0.2.2" + +enum { + /* + * SL82C105 PCI config register 0x40 bits. + */ + CTRL_IDE_IRQB = (1 << 30), + CTRL_IDE_IRQA = (1 << 28), + CTRL_LEGIRQ = (1 << 11), + CTRL_P1F16 = (1 << 5), + CTRL_P1EN = (1 << 4), + CTRL_P0F16 = (1 << 1), + CTRL_P0EN = (1 << 0) +}; + +/** + * sl82c105_pre_reset - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int sl82c105_pre_reset(struct ata_port *ap) +{ + static const struct pci_bits sl82c105_enable_bits[] = { + { 0x40, 1, 0x01, 0x01 }, + { 0x40, 1, 0x10, 0x10 } + }; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (ap->port_no && !pci_test_config_bits(pdev, &sl82c105_enable_bits[ap->port_no])) { + ata_port_disable(ap); + dev_printk(KERN_INFO, &pdev->dev, "port disabled. ignoring.\n"); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + + +static void sl82c105_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, sl82c105_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + + +/** + * sl82c105_configure_piomode - set chip PIO timing + * @ap: ATA interface + * @adev: ATA device + * @pio: PIO mode + * + * Called to do the PIO mode setup. Our timing registers are shared + * so a configure_dmamode call will undo any work we do here and vice + * versa + */ + +static void sl82c105_configure_piomode(struct ata_port *ap, struct ata_device *adev, int pio) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static u16 pio_timing[5] = { + 0x50D, 0x407, 0x304, 0x242, 0x240 + }; + u16 dummy; + int timing = 0x44 + (8 * ap->port_no) + (4 * adev->devno); + + pci_write_config_word(pdev, timing, pio_timing[pio]); + /* Can we lose this oddity of the old driver */ + pci_read_config_word(pdev, timing, &dummy); +} + +/** + * sl82c105_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the PIO mode setup. Our timing registers are shared + * but we want to set the PIO timing by default. + */ + +static void sl82c105_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + sl82c105_configure_piomode(ap, adev, adev->pio_mode - XFER_PIO_0); +} + +/** + * sl82c105_configure_dmamode - set DMA mode in chip + * @ap: ATA interface + * @adev: ATA device + * + * Load DMA cycle times into the chip ready for a DMA transfer + * to occur. + */ + +static void sl82c105_configure_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static u16 dma_timing[3] = { + 0x707, 0x201, 0x200 + }; + u16 dummy; + int timing = 0x44 + (8 * ap->port_no) + (4 * adev->devno); + int dma = adev->dma_mode - XFER_MW_DMA_0; + + pci_write_config_word(pdev, timing, dma_timing[dma]); + /* Can we lose this oddity of the old driver */ + pci_read_config_word(pdev, timing, &dummy); +} + +/** + * sl82c105_set_dmamode - set initial DMA mode data + * @ap: ATA interface + * @adev: ATA device + * + * Called to do the DMA mode setup. This replaces the PIO timings + * for the device in question. Set appropriate PIO timings not DMA + * timings at this point. + */ + +static void sl82c105_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + switch(adev->dma_mode) { + case XFER_MW_DMA_0: + sl82c105_configure_piomode(ap, adev, 1); + break; + case XFER_MW_DMA_1: + sl82c105_configure_piomode(ap, adev, 3); + break; + case XFER_MW_DMA_2: + sl82c105_configure_piomode(ap, adev, 3); + break; + default: + BUG(); + } +} + +/** + * sl82c105_reset_engine - Reset the DMA engine + * @ap: ATA interface + * + * The sl82c105 has some serious problems with the DMA engine + * when transfers don't run as expected or ATAPI is used. The + * recommended fix is to reset the engine each use using a chip + * test register. + */ + +static void sl82c105_reset_engine(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u16 val; + + pci_read_config_word(pdev, 0x7E, &val); + pci_write_config_word(pdev, 0x7E, val | 4); + pci_write_config_word(pdev, 0x7E, val & ~4); +} + +/** + * sl82c105_bmdma_start - DMA engine begin + * @qc: ATA command + * + * Reset the DMA engine each use as recommended by the errata + * document. + * + * FIXME: if we switch clock at BMDMA start/end we might get better + * PIO performance on DMA capable devices. + */ + +static void sl82c105_bmdma_start(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + + sl82c105_reset_engine(ap); + + /* Set the clocks for DMA */ + sl82c105_configure_dmamode(ap, qc->dev); + /* Activate DMA */ + ata_bmdma_start(qc); +} + +/** + * sl82c105_bmdma_end - DMA engine stop + * @qc: ATA command + * + * Reset the DMA engine each use as recommended by the errata + * document. + * + * This function is also called to turn off DMA when a timeout occurs + * during DMA operation. In both cases we need to reset the engine, + * so no actual eng_timeout handler is required. + * + * We assume bmdma_stop is always called if bmdma_start as called. If + * not then we may need to wrap qc_issue. + */ + +static void sl82c105_bmdma_stop(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + + ata_bmdma_stop(qc); + sl82c105_reset_engine(ap); + + /* This will redo the initial setup of the DMA device to matching + PIO timings */ + sl82c105_set_dmamode(ap, qc->dev); +} + +static struct scsi_host_template sl82c105_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations sl82c105_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = sl82c105_set_piomode, + .set_dmamode = sl82c105_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .error_handler = sl82c105_error_handler, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = sl82c105_bmdma_start, + .bmdma_stop = sl82c105_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * sl82c105_bridge_revision - find bridge version + * @pdev: PCI device for the ATA function + * + * Locates the PCI bridge associated with the ATA function and + * providing it is a Winbond 553 reports the revision. If it cannot + * find a revision or the right device it returns -1 + */ + +static int sl82c105_bridge_revision(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + u8 rev; + + /* + * The bridge should be part of the same device, but function 0. + */ + bridge = pci_get_slot(pdev->bus, + PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + if (!bridge) + return -1; + + /* + * Make sure it is a Winbond 553 and is an ISA bridge. + */ + if (bridge->vendor != PCI_VENDOR_ID_WINBOND || + bridge->device != PCI_DEVICE_ID_WINBOND_83C553 || + bridge->class >> 8 != PCI_CLASS_BRIDGE_ISA) { + pci_dev_put(bridge); + return -1; + } + /* + * We need to find function 0's revision, not function 1 + */ + pci_read_config_byte(bridge, PCI_REVISION_ID, &rev); + + pci_dev_put(bridge); + return rev; +} + + +static int sl82c105_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info_dma = { + .sht = &sl82c105_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &sl82c105_port_ops + }; + static struct ata_port_info info_early = { + .sht = &sl82c105_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .port_ops = &sl82c105_port_ops + }; + static struct ata_port_info *port_info[2] = { &info_early, &info_early }; + u32 val; + int rev; + + rev = sl82c105_bridge_revision(dev); + + if (rev == -1) + dev_printk(KERN_WARNING, &dev->dev, "pata_sl82c105: Unable to find bridge, disabling DMA.\n"); + else if (rev <= 5) + dev_printk(KERN_WARNING, &dev->dev, "pata_sl82c105: Early bridge revision, no DMA available.\n"); + else { + port_info[0] = &info_dma; + port_info[1] = &info_dma; + } + + pci_read_config_dword(dev, 0x40, &val); + val |= CTRL_P0EN | CTRL_P0F16 | CTRL_P1F16; + pci_write_config_dword(dev, 0x40, val); + + + return ata_pci_init_one(dev, port_info, 1); /* For now */ +} + +static struct pci_device_id sl82c105[] = { + { PCI_DEVICE(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105), }, + { 0, }, +}; + +static struct pci_driver sl82c105_pci_driver = { + .name = DRV_NAME, + .id_table = sl82c105, + .probe = sl82c105_init_one, + .remove = ata_pci_remove_one +}; + +static int __init sl82c105_init(void) +{ + return pci_register_driver(&sl82c105_pci_driver); +} + + +static void __exit sl82c105_exit(void) +{ + pci_unregister_driver(&sl82c105_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Sl82c105"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, sl82c105); +MODULE_VERSION(DRV_VERSION); + +module_init(sl82c105_init); +module_exit(sl82c105_exit); diff --git a/drivers/ata/pata_triflex.c b/drivers/ata/pata_triflex.c new file mode 100644 index 000000000000..7dd6ea3a21e4 --- /dev/null +++ b/drivers/ata/pata_triflex.c @@ -0,0 +1,285 @@ +/* + * pata_triflex.c - Compaq PATA for new ATA layer + * (C) 2005 Red Hat Inc + * Alan Cox + * + * based upon + * + * triflex.c + * + * IDE Chipset driver for the Compaq TriFlex IDE controller. + * + * Known to work with the Compaq Workstation 5x00 series. + * + * Copyright (C) 2002 Hewlett-Packard Development Group, L.P. + * Author: Torben Mathiasen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Loosely based on the piix & svwks drivers. + * + * Documentation: + * Not publically available. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_triflex" +#define DRV_VERSION "0.2.5" + +/** + * triflex_probe_init - probe begin + * @ap: ATA port + * + * Set up cable type and use generic probe init + */ + +static int triflex_probe_init(struct ata_port *ap) +{ + static const struct pci_bits triflex_enable_bits[] = { + { 0x80, 1, 0x01, 0x01 }, + { 0x80, 1, 0x02, 0x02 } + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (!pci_test_config_bits(pdev, &triflex_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + + + +static void triflex_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, triflex_probe_init, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * triflex_load_timing - timing configuration + * @ap: ATA interface + * @adev: Device on the bus + * @speed: speed to configure + * + * The Triflex has one set of timings per device per channel. This + * means we must do some switching. As the PIO and DMA timings don't + * match we have to do some reloading unlike PIIX devices where tuning + * tricks can avoid it. + */ + +static void triflex_load_timing(struct ata_port *ap, struct ata_device *adev, int speed) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 timing = 0; + u32 triflex_timing, old_triflex_timing; + int channel_offset = ap->port_no ? 0x74: 0x70; + unsigned int is_slave = (adev->devno != 0); + + + pci_read_config_dword(pdev, channel_offset, &old_triflex_timing); + triflex_timing = old_triflex_timing; + + switch(speed) + { + case XFER_MW_DMA_2: + timing = 0x0103;break; + case XFER_MW_DMA_1: + timing = 0x0203;break; + case XFER_MW_DMA_0: + timing = 0x0808;break; + case XFER_SW_DMA_2: + case XFER_SW_DMA_1: + case XFER_SW_DMA_0: + timing = 0x0F0F;break; + case XFER_PIO_4: + timing = 0x0202;break; + case XFER_PIO_3: + timing = 0x0204;break; + case XFER_PIO_2: + timing = 0x0404;break; + case XFER_PIO_1: + timing = 0x0508;break; + case XFER_PIO_0: + timing = 0x0808;break; + default: + BUG(); + } + triflex_timing &= ~ (0xFFFF << (16 * is_slave)); + triflex_timing |= (timing << (16 * is_slave)); + + if (triflex_timing != old_triflex_timing) + pci_write_config_dword(pdev, channel_offset, triflex_timing); +} + +/** + * triflex_set_piomode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * + * Use the timing loader to set up the PIO mode. We have to do this + * because DMA start/stop will only be called once DMA occurs. If there + * has been no DMA then the PIO timings are still needed. + */ +static void triflex_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + triflex_load_timing(ap, adev, adev->pio_mode); +} + +/** + * triflex_dma_start - DMA start callback + * @qc: Command in progress + * + * Usually drivers set the DMA timing at the point the set_dmamode call + * is made. Triflex however requires we load new timings on the + * transition or keep matching PIO/DMA pairs (ie MWDMA2/PIO4 etc). + * We load the DMA timings just before starting DMA and then restore + * the PIO timing when the DMA is finished. + */ + +static void triflex_bmdma_start(struct ata_queued_cmd *qc) +{ + triflex_load_timing(qc->ap, qc->dev, qc->dev->dma_mode); + ata_bmdma_start(qc); +} + +/** + * triflex_dma_stop - DMA stop callback + * @ap: ATA interface + * @adev: ATA device + * + * We loaded new timings in dma_start, as a result we need to restore + * the PIO timings in dma_stop so that the next command issue gets the + * right clock values. + */ + +static void triflex_bmdma_stop(struct ata_queued_cmd *qc) +{ + ata_bmdma_stop(qc); + triflex_load_timing(qc->ap, qc->dev, qc->dev->pio_mode); +} + +static struct scsi_host_template triflex_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations triflex_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = triflex_set_piomode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = triflex_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = triflex_bmdma_start, + .bmdma_stop = triflex_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static int triflex_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static struct ata_port_info info = { + .sht = &triflex_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &triflex_port_ops + }; + static struct ata_port_info *port_info[2] = { &info, &info }; + static int printed_version; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &dev->dev, "version " DRV_VERSION "\n"); + + return ata_pci_init_one(dev, port_info, 2); +} + +static const struct pci_device_id triflex[] = { + { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_TRIFLEX_IDE, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { 0, }, +}; + +static struct pci_driver triflex_pci_driver = { + .name = DRV_NAME, + .id_table = triflex, + .probe = triflex_init_one, + .remove = ata_pci_remove_one +}; + +static int __init triflex_init(void) +{ + return pci_register_driver(&triflex_pci_driver); +} + + +static void __exit triflex_exit(void) +{ + pci_unregister_driver(&triflex_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for Compaq Triflex"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, triflex); +MODULE_VERSION(DRV_VERSION); + +module_init(triflex_init); +module_exit(triflex_exit); diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c new file mode 100644 index 000000000000..2580e1606d9e --- /dev/null +++ b/drivers/ata/pata_via.c @@ -0,0 +1,568 @@ +/* + * pata_via.c - VIA PATA for new ATA layer + * (C) 2005-2006 Red Hat Inc + * Alan Cox + * + * Documentation + * Most chipset documentation available under NDA only + * + * VIA version guide + * VIA VT82C561 - early design, uses ata_generic currently + * VIA VT82C576 - MWDMA, 33Mhz + * VIA VT82C586 - MWDMA, 33Mhz + * VIA VT82C586a - Added UDMA to 33Mhz + * VIA VT82C586b - UDMA33 + * VIA VT82C596a - Nonfunctional UDMA66 + * VIA VT82C596b - Working UDMA66 + * VIA VT82C686 - Nonfunctional UDMA66 + * VIA VT82C686a - Working UDMA66 + * VIA VT82C686b - Updated to UDMA100 + * VIA VT8231 - UDMA100 + * VIA VT8233 - UDMA100 + * VIA VT8233a - UDMA133 + * VIA VT8233c - UDMA100 + * VIA VT8235 - UDMA133 + * VIA VT8237 - UDMA133 + * + * Most registers remain compatible across chips. Others start reserved + * and acquire sensible semantics if set to 1 (eg cable detect). A few + * exceptions exist, notably around the FIFO settings. + * + * One additional quirk of the VIA design is that like ALi they use few + * PCI IDs for a lot of chips. + * + * Based heavily on: + * + * Version 3.38 + * + * VIA IDE driver for Linux. Supported southbridges: + * + * vt82c576, vt82c586, vt82c586a, vt82c586b, vt82c596a, vt82c596b, + * vt82c686, vt82c686a, vt82c686b, vt8231, vt8233, vt8233c, vt8233a, + * vt8235, vt8237 + * + * Copyright (c) 2000-2002 Vojtech Pavlik + * + * Based on the work of: + * Michel Aubry + * Jeff Garzik + * Andre Hedrick + + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_via" +#define DRV_VERSION "0.1.13" + +/* + * The following comes directly from Vojtech Pavlik's ide/pci/via82cxxx + * driver. + */ + +enum { + VIA_UDMA = 0x007, + VIA_UDMA_NONE = 0x000, + VIA_UDMA_33 = 0x001, + VIA_UDMA_66 = 0x002, + VIA_UDMA_100 = 0x003, + VIA_UDMA_133 = 0x004, + VIA_BAD_PREQ = 0x010, /* Crashes if PREQ# till DDACK# set */ + VIA_BAD_CLK66 = 0x020, /* 66 MHz clock doesn't work correctly */ + VIA_SET_FIFO = 0x040, /* Needs to have FIFO split set */ + VIA_NO_UNMASK = 0x080, /* Doesn't work with IRQ unmasking on */ + VIA_BAD_ID = 0x100, /* Has wrong vendor ID (0x1107) */ + VIA_BAD_AST = 0x200, /* Don't touch Address Setup Timing */ + VIA_NO_ENABLES = 0x400, /* Has no enablebits */ +}; + +/* + * VIA SouthBridge chips. + */ + +static const struct via_isa_bridge { + const char *name; + u16 id; + u8 rev_min; + u8 rev_max; + u16 flags; +} via_isa_bridges[] = { + { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES}, + { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "vt8233a", PCI_DEVICE_ID_VIA_8233A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "vt8233c", PCI_DEVICE_ID_VIA_8233C_0, 0x00, 0x2f, VIA_UDMA_100 }, + { "vt8233", PCI_DEVICE_ID_VIA_8233_0, 0x00, 0x2f, VIA_UDMA_100 }, + { "vt8231", PCI_DEVICE_ID_VIA_8231, 0x00, 0x2f, VIA_UDMA_100 }, + { "vt82c686b", PCI_DEVICE_ID_VIA_82C686, 0x40, 0x4f, VIA_UDMA_100 }, + { "vt82c686a", PCI_DEVICE_ID_VIA_82C686, 0x10, 0x2f, VIA_UDMA_66 }, + { "vt82c686", PCI_DEVICE_ID_VIA_82C686, 0x00, 0x0f, VIA_UDMA_33 | VIA_BAD_CLK66 }, + { "vt82c596b", PCI_DEVICE_ID_VIA_82C596, 0x10, 0x2f, VIA_UDMA_66 }, + { "vt82c596a", PCI_DEVICE_ID_VIA_82C596, 0x00, 0x0f, VIA_UDMA_33 | VIA_BAD_CLK66 }, + { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x47, 0x4f, VIA_UDMA_33 | VIA_SET_FIFO }, + { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x40, 0x46, VIA_UDMA_33 | VIA_SET_FIFO | VIA_BAD_PREQ }, + { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x30, 0x3f, VIA_UDMA_33 | VIA_SET_FIFO }, + { "vt82c586a", PCI_DEVICE_ID_VIA_82C586_0, 0x20, 0x2f, VIA_UDMA_33 | VIA_SET_FIFO }, + { "vt82c586", PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f, VIA_UDMA_NONE | VIA_SET_FIFO }, + { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK }, + { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID }, + { NULL } +}; + +/** + * via_cable_detect - cable detection + * @ap: ATA port + * + * Perform cable detection. Actually for the VIA case the BIOS + * already did this for us. We read the values provided by the + * BIOS. If you are using an 8235 in a non-PC configuration you + * may need to update this code. + * + * Hotplug also impacts on this. + */ + +static int via_cable_detect(struct ata_port *ap) { + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 ata66; + + pci_read_config_dword(pdev, 0x50, &ata66); + /* Check both the drive cable reporting bits, we might not have + two drives */ + if (ata66 & (0x10100000 >> (16 * ap->port_no))) + return ATA_CBL_PATA80; + else + return ATA_CBL_PATA40; +} + +static int via_pre_reset(struct ata_port *ap) +{ + const struct via_isa_bridge *config = ap->host->private_data; + + if (!(config->flags & VIA_NO_ENABLES)) { + static const struct pci_bits via_enable_bits[] = { + { 0x40, 1, 0x02, 0x02 }, + { 0x40, 1, 0x01, 0x01 } + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (!pci_test_config_bits(pdev, &via_enable_bits[ap->port_no])) { + ata_port_disable(ap); + printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id); + return 0; + } + } + + if ((config->flags & VIA_UDMA) >= VIA_UDMA_66) + ap->cbl = via_cable_detect(ap); + else + ap->cbl = ATA_CBL_PATA40; + return ata_std_prereset(ap); +} + + +/** + * via_error_handler - reset for VIA chips + * @ap: ATA port + * + * Handle the reset callback for the later chips with cable detect + */ + +static void via_error_handler(struct ata_port *ap) +{ + ata_bmdma_drive_eh(ap, via_pre_reset, ata_std_softreset, NULL, ata_std_postreset); +} + +/** + * via_do_set_mode - set initial PIO mode data + * @ap: ATA interface + * @adev: ATA device + * @mode: ATA mode being programmed + * @tdiv: Clocks per PCI clock + * @set_ast: Set to program address setup + * @udma_type: UDMA mode/format of registers + * + * Program the VIA registers for DMA and PIO modes. Uses the ata timing + * support in order to compute modes. + * + * FIXME: Hotplug will require we serialize multiple mode changes + * on the two channels. + */ + +static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev, int mode, int tdiv, int set_ast, int udma_type) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct ata_device *peer = ata_dev_pair(adev); + struct ata_timing t, p; + static int via_clock = 33333; /* Bus clock in kHZ - ought to be tunable one day */ + unsigned long T = 1000000000 / via_clock; + unsigned long UT = T/tdiv; + int ut; + int offset = 3 - (2*ap->port_no) - adev->devno; + + + /* Calculate the timing values we require */ + ata_timing_compute(adev, mode, &t, T, UT); + + /* We share 8bit timing so we must merge the constraints */ + if (peer) { + if (peer->pio_mode) { + ata_timing_compute(peer, peer->pio_mode, &p, T, UT); + ata_timing_merge(&p, &t, &t, ATA_TIMING_8BIT); + } + } + + /* Address setup is programmable but breaks on UDMA133 setups */ + if (set_ast) { + u8 setup; /* 2 bits per drive */ + int shift = 2 * offset; + + pci_read_config_byte(pdev, 0x4C, &setup); + setup &= ~(3 << shift); + setup |= FIT(t.setup, 1, 4) << shift; /* 1,4 or 1,4 - 1 FIXME */ + pci_write_config_byte(pdev, 0x4C, setup); + } + + /* Load the PIO mode bits */ + pci_write_config_byte(pdev, 0x4F - ap->port_no, + ((FIT(t.act8b, 1, 16) - 1) << 4) | (FIT(t.rec8b, 1, 16) - 1)); + pci_write_config_byte(pdev, 0x48 + offset, + ((FIT(t.active, 1, 16) - 1) << 4) | (FIT(t.recover, 1, 16) - 1)); + + /* Load the UDMA bits according to type */ + switch(udma_type) { + default: + /* BUG() ? */ + /* fall through */ + case 33: + ut = t.udma ? (0xe0 | (FIT(t.udma, 2, 5) - 2)) : 0x03; + break; + case 66: + ut = t.udma ? (0xe8 | (FIT(t.udma, 2, 9) - 2)) : 0x0f; + break; + case 100: + ut = t.udma ? (0xe0 | (FIT(t.udma, 2, 9) - 2)) : 0x07; + break; + case 133: + ut = t.udma ? (0xe0 | (FIT(t.udma, 2, 9) - 2)) : 0x07; + break; + } + /* Set UDMA unless device is not UDMA capable */ + if (udma_type) + pci_write_config_byte(pdev, 0x50 + offset, ut); +} + +static void via_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + const struct via_isa_bridge *config = ap->host->private_data; + int set_ast = (config->flags & VIA_BAD_AST) ? 0 : 1; + int mode = config->flags & VIA_UDMA; + static u8 tclock[5] = { 1, 1, 2, 3, 4 }; + static u8 udma[5] = { 0, 33, 66, 100, 133 }; + + via_do_set_mode(ap, adev, adev->pio_mode, tclock[mode], set_ast, udma[mode]); +} + +static void via_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + const struct via_isa_bridge *config = ap->host->private_data; + int set_ast = (config->flags & VIA_BAD_AST) ? 0 : 1; + int mode = config->flags & VIA_UDMA; + static u8 tclock[5] = { 1, 1, 2, 3, 4 }; + static u8 udma[5] = { 0, 33, 66, 100, 133 }; + + via_do_set_mode(ap, adev, adev->dma_mode, tclock[mode], set_ast, udma[mode]); +} + +static struct scsi_host_template via_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .max_sectors = ATA_MAX_SECTORS, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations via_port_ops = { + .port_disable = ata_port_disable, + .set_piomode = via_set_piomode, + .set_dmamode = via_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = via_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +static struct ata_port_operations via_port_ops_noirq = { + .port_disable = ata_port_disable, + .set_piomode = via_set_piomode, + .set_dmamode = via_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = via_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + .eng_timeout = ata_eng_timeout, + .data_xfer = ata_pio_data_xfer_noirq, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + + .port_start = ata_port_start, + .port_stop = ata_port_stop, + .host_stop = ata_host_stop +}; + +/** + * via_init_one - discovery callback + * @pdev: PCI device ID + * @id: PCI table info + * + * A VIA IDE interface has been discovered. Figure out what revision + * and perform configuration work before handing it to the ATA layer + */ + +static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + /* Early VIA without UDMA support */ + static struct ata_port_info via_mwdma_info = { + .sht = &via_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &via_port_ops + }; + /* Ditto with IRQ masking required */ + static struct ata_port_info via_mwdma_info_borked = { + .sht = &via_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .port_ops = &via_port_ops_noirq, + }; + /* VIA UDMA 33 devices (and borked 66) */ + static struct ata_port_info via_udma33_info = { + .sht = &via_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7, + .port_ops = &via_port_ops + }; + /* VIA UDMA 66 devices */ + static struct ata_port_info via_udma66_info = { + .sht = &via_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x1f, + .port_ops = &via_port_ops + }; + /* VIA UDMA 100 devices */ + static struct ata_port_info via_udma100_info = { + .sht = &via_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x3f, + .port_ops = &via_port_ops + }; + /* UDMA133 with bad AST (All current 133) */ + static struct ata_port_info via_udma133_info = { + .sht = &via_sht, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = 0x7f, /* FIXME: should check north bridge */ + .port_ops = &via_port_ops + }; + struct ata_port_info *port_info[2], *type; + struct pci_dev *isa = NULL; + const struct via_isa_bridge *config; + static int printed_version; + u8 t; + u8 enable; + u32 timing; + + if (!printed_version++) + dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); + + /* To find out how the IDE will behave and what features we + actually have to look at the bridge not the IDE controller */ + for (config = via_isa_bridges; config->id; config++) + if ((isa = pci_get_device(PCI_VENDOR_ID_VIA + + !!(config->flags & VIA_BAD_ID), + config->id, NULL))) { + + pci_read_config_byte(isa, PCI_REVISION_ID, &t); + if (t >= config->rev_min && + t <= config->rev_max) + break; + pci_dev_put(isa); + } + + if (!config->id) { + printk(KERN_WARNING "via: Unknown VIA SouthBridge, disabling.\n"); + return -ENODEV; + } + pci_dev_put(isa); + + /* 0x40 low bits indicate enabled channels */ + pci_read_config_byte(pdev, 0x40 , &enable); + enable &= 3; + if (enable == 0) { + return -ENODEV; + } + + /* Initialise the FIFO for the enabled channels. */ + if (config->flags & VIA_SET_FIFO) { + u8 fifo_setting[4] = {0x00, 0x60, 0x00, 0x20}; + u8 fifo; + + pci_read_config_byte(pdev, 0x43, &fifo); + + /* Clear PREQ# until DDACK# for errata */ + if (config->flags & VIA_BAD_PREQ) + fifo &= 0x7F; + else + fifo &= 0x9f; + /* Turn on FIFO for enabled channels */ + fifo |= fifo_setting[enable]; + pci_write_config_byte(pdev, 0x43, fifo); + } + /* Clock set up */ + switch(config->flags & VIA_UDMA) { + case VIA_UDMA_NONE: + if (config->flags & VIA_NO_UNMASK) + type = &via_mwdma_info_borked; + else + type = &via_mwdma_info; + break; + case VIA_UDMA_33: + type = &via_udma33_info; + break; + case VIA_UDMA_66: + type = &via_udma66_info; + /* The 66 MHz devices require we enable the clock */ + pci_read_config_dword(pdev, 0x50, &timing); + timing |= 0x80008; + pci_write_config_dword(pdev, 0x50, timing); + break; + case VIA_UDMA_100: + type = &via_udma100_info; + break; + case VIA_UDMA_133: + type = &via_udma133_info; + break; + default: + WARN_ON(1); + return -ENODEV; + } + + if (config->flags & VIA_BAD_CLK66) { + /* Disable the 66MHz clock on problem devices */ + pci_read_config_dword(pdev, 0x50, &timing); + timing &= ~0x80008; + pci_write_config_dword(pdev, 0x50, timing); + } + + /* We have established the device type, now fire it up */ + type->private_data = (void *)config; + + port_info[0] = port_info[1] = type; + return ata_pci_init_one(pdev, port_info, 2); +} + +static const struct pci_device_id via[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576_1), }, + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1), }, + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_6410), }, + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), }, + { 0, }, +}; + +static struct pci_driver via_pci_driver = { + .name = DRV_NAME, + .id_table = via, + .probe = via_init_one, + .remove = ata_pci_remove_one +}; + +static int __init via_init(void) +{ + return pci_register_driver(&via_pci_driver); +} + + +static void __exit via_exit(void) +{ + pci_unregister_driver(&via_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("low-level driver for VIA PATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, via); +MODULE_VERSION(DRV_VERSION); + +module_init(via_init); +module_exit(via_exit); diff --git a/include/linux/libata.h b/include/linux/libata.h index 563885cb0995..a6d818148ae0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -46,7 +46,7 @@ #undef ATA_VERBOSE_DEBUG /* yet more debugging output */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ #undef ATA_NDEBUG /* define to disable quick runtime checks */ -#undef ATA_ENABLE_PATA /* define to enable PATA support in some +#define ATA_ENABLE_PATA /* define to enable PATA support in some * low-level drivers */ -- cgit v1.2.3-71-gd317 From 187afbed1814ea0851bf30bacbf807217dd7864b Mon Sep 17 00:00:00 2001 From: Jon Masters Date: Mon, 28 Aug 2006 17:08:21 -0500 Subject: [SCSI] MODULE_FIRMWARE for binary firmware(s) Right now, various kernel modules are being migrated over to use request_firmware in order to pull in binary firmware blobs from userland when the module is loaded. This makes sense. However, there is right now little mechanism in place to automatically determine which binary firmware blobs must be included with a kernel in order to satisfy the prerequisites of these drivers. This affects vendors, but also regular users to a certain extent too. The attached patch introduces MODULE_FIRMWARE as a mechanism for advertising that a particular firmware file is to be loaded - it will then show up via modinfo and could be used e.g. when packaging a kernel. Signed-off-by: Jon Masters Comments added in line with all the other MODULE_ tag Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- include/linux/module.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 0dfb794c52d3..d4486cc2e7fe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -156,6 +156,11 @@ extern struct module __this_module; */ #define MODULE_VERSION(_version) MODULE_INFO(version, _version) +/* Optional firmware file (or files) needed by the module + * format is simply firmware file name. Multiple firmware + * files require multiple MODULE_FIRMWARE() specifiers */ +#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) + /* Given an address, look for it in the exception tables */ const struct exception_table_entry *search_exception_tables(unsigned long add); -- cgit v1.2.3-71-gd317 From 9bec2e38527a9f2497b3d976715c672d08d6160d Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 31 Aug 2006 00:02:15 -0400 Subject: [libata] Trim trailing whitespace. --- drivers/ata/ahci.c | 2 +- drivers/ata/pdc_adma.c | 2 +- include/linux/ata.h | 4 ++-- include/linux/libata.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 3f1106fdaed1..1aabc81d82f1 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1075,7 +1075,7 @@ static void ahci_host_intr(struct ata_port *ap) return; /* ignore interim PIO setup fis interrupts */ - if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS)) + if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS)) return; if (ata_ratelimit()) diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c index 912211ada816..0e23ecb77bc2 100644 --- a/drivers/ata/pdc_adma.c +++ b/drivers/ata/pdc_adma.c @@ -497,7 +497,7 @@ static inline unsigned int adma_intr_mmio(struct ata_host *host) continue; DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n", ap->id, qc->tf.protocol, status); - + /* complete taskfile transaction */ pp->state = adma_state_idle; qc->err_mask |= ac_err_mask(status); diff --git a/include/linux/ata.h b/include/linux/ata.h index 991b858acc30..d89441907024 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -320,8 +320,8 @@ static inline unsigned int ata_id_major_version(const u16 *id) static inline int ata_id_current_chs_valid(const u16 *id) { - /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command - has not been issued to the device then the values of + /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command + has not been issued to the device then the values of id[54] to id[56] are vendor specific. */ return (id[53] & 0x01) && /* Current translation valid */ id[54] && /* cylinders in current translation */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 563885cb0995..0ddf16c17b93 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -198,7 +198,7 @@ enum { /* host set flags */ ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ - + /* various lengths of time */ ATA_TMOUT_BOOT = 30 * HZ, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7 * HZ, /* heuristic */ -- cgit v1.2.3-71-gd317 From 492dfb489658dfe4a755fa29dd0e34e9c8bd8fb8 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 30 Aug 2006 15:48:45 -0400 Subject: [SCSI] block: add support for shared tag maps The current block queue implementation already contains most of the machinery for shared tag maps. The only remaining pieces are a way to allocate and destroy a tag map independently of the queues (so that the maps can be managed on the life cycle of the overseeing entity) Acked-by: Jens Axboe Signed-off-by: James Bottomley --- block/ll_rw_blk.c | 109 +++++++++++++++++++++++++++++++++++++++---------- include/linux/blkdev.h | 2 + 2 files changed, 90 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index ddd9253f9d55..556a3d354eab 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -848,21 +848,18 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag) EXPORT_SYMBOL(blk_queue_find_tag); /** - * __blk_queue_free_tags - release tag maintenance info - * @q: the request queue for the device + * __blk_free_tags - release a given set of tag maintenance info + * @bqt: the tag map to free * - * Notes: - * blk_cleanup_queue() will take care of calling this function, if tagging - * has been used. So there's no need to call this directly. - **/ -static void __blk_queue_free_tags(request_queue_t *q) + * Tries to free the specified @bqt@. Returns true if it was + * actually freed and false if there are still references using it + */ +static int __blk_free_tags(struct blk_queue_tag *bqt) { - struct blk_queue_tag *bqt = q->queue_tags; - - if (!bqt) - return; + int retval; - if (atomic_dec_and_test(&bqt->refcnt)) { + retval = atomic_dec_and_test(&bqt->refcnt); + if (retval) { BUG_ON(bqt->busy); BUG_ON(!list_empty(&bqt->busy_list)); @@ -873,12 +870,49 @@ static void __blk_queue_free_tags(request_queue_t *q) bqt->tag_map = NULL; kfree(bqt); + } + return retval; +} + +/** + * __blk_queue_free_tags - release tag maintenance info + * @q: the request queue for the device + * + * Notes: + * blk_cleanup_queue() will take care of calling this function, if tagging + * has been used. So there's no need to call this directly. + **/ +static void __blk_queue_free_tags(request_queue_t *q) +{ + struct blk_queue_tag *bqt = q->queue_tags; + + if (!bqt) + return; + + __blk_free_tags(bqt); + q->queue_tags = NULL; q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); } + +/** + * blk_free_tags - release a given set of tag maintenance info + * @bqt: the tag map to free + * + * For externally managed @bqt@ frees the map. Callers of this + * function must guarantee to have released all the queues that + * might have been using this tag map. + */ +void blk_free_tags(struct blk_queue_tag *bqt) +{ + if (unlikely(!__blk_free_tags(bqt))) + BUG(); +} +EXPORT_SYMBOL(blk_free_tags); + /** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device @@ -901,7 +935,7 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) unsigned long *tag_map; int nr_ulongs; - if (depth > q->nr_requests * 2) { + if (q && depth > q->nr_requests * 2) { depth = q->nr_requests * 2; printk(KERN_ERR "%s: adjusted depth to %d\n", __FUNCTION__, depth); @@ -927,6 +961,38 @@ fail: return -ENOMEM; } +static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, + int depth) +{ + struct blk_queue_tag *tags; + + tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); + if (!tags) + goto fail; + + if (init_tag_map(q, tags, depth)) + goto fail; + + INIT_LIST_HEAD(&tags->busy_list); + tags->busy = 0; + atomic_set(&tags->refcnt, 1); + return tags; +fail: + kfree(tags); + return NULL; +} + +/** + * blk_init_tags - initialize the tag info for an external tag map + * @depth: the maximum queue depth supported + * @tags: the tag to use + **/ +struct blk_queue_tag *blk_init_tags(int depth) +{ + return __blk_queue_init_tags(NULL, depth); +} +EXPORT_SYMBOL(blk_init_tags); + /** * blk_queue_init_tags - initialize the queue tag info * @q: the request queue for the device @@ -941,16 +1007,10 @@ int blk_queue_init_tags(request_queue_t *q, int depth, BUG_ON(tags && q->queue_tags && tags != q->queue_tags); if (!tags && !q->queue_tags) { - tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); - if (!tags) - goto fail; + tags = __blk_queue_init_tags(q, depth); - if (init_tag_map(q, tags, depth)) + if (!tags) goto fail; - - INIT_LIST_HEAD(&tags->busy_list); - tags->busy = 0; - atomic_set(&tags->refcnt, 1); } else if (q->queue_tags) { if ((rc = blk_queue_resize_tags(q, depth))) return rc; @@ -1001,6 +1061,13 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth) return 0; } + /* + * Currently cannot replace a shared tag map with a new + * one, so error out if this is the case + */ + if (atomic_read(&bqt->refcnt) != 1) + return -EBUSY; + /* * save the old state info, so we can copy it back */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aafe82788b4e..427b0d61be6c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -746,6 +746,8 @@ extern void blk_queue_free_tags(request_queue_t *); extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern long blk_congestion_wait(int rw, long timeout); +extern struct blk_queue_tag *blk_init_tags(int); +extern void blk_free_tags(struct blk_queue_tag *); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); extern int blkdev_issue_flush(struct block_device *, sector_t *); -- cgit v1.2.3-71-gd317 From 84314fd4740ad73550c76dee4a9578979d84af48 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:30:09 -0400 Subject: [SCSI] SCSI and FC Transport: add netlink support for posting of transport events This patch formally adds support for the posting of FC events via netlink. It is a followup to the original RFC at: http://marc.theaimsgroup.com/?l=linux-scsi&m=114530667923464&w=2 and the initial posting at: http://marc.theaimsgroup.com/?l=linux-scsi&m=115507374832500&w=2 The patch has been updated to optimize the send path, per the discussions in the initial posting. Per discussions at the Storage Summit and at OLS, we are to use netlink for async events from transports. Also per discussions, to avoid a netlink protocol per transport, I've create a single NETLINK_SCSITRANSPORT protocol, which can then be used by all transports. This patch: - Creates new files scsi_netlink.c and scsi_netlink.h, which contains the single and shared definitions for the SCSI Transport. It is tied into the base SCSI subsystem intialization. Contains a single interface routine, scsi_send_transport_event(), for a transport to send an event (via multicast to a protocol specific group). - Creates a new scsi_netlink_fc.h file, which contains the FC netlink event messages - Adds 3 new routines to the fc transport: fc_get_event_number() - to get a FC event # fc_host_post_event() - to send a simple FC event (32 bits of data) fc_host_post_vendor_event() - to send a Vendor unique event, with arbitrary amounts of data. Note: the separation of event number allows for a LLD to send a standard event, followed by vendor-specific data for the event. Note: This patch assumes 2 prior fc transport patches have been installed: http://marc.theaimsgroup.com/?l=linux-scsi&m=115555807316329&w=2 http://marc.theaimsgroup.com/?l=linux-scsi&m=115581614930261&w=2 Sorry - next time I'll do something like making these individual patches of the same posting when I know they'll be posted closely together. Signed-off-by: James Smart Tidy up configuration not to make SCSI always select NET Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 6 ++ drivers/scsi/Makefile | 1 + drivers/scsi/scsi.c | 3 + drivers/scsi/scsi_netlink.c | 199 ++++++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_priv.h | 11 +++ drivers/scsi/scsi_transport_fc.c | 200 ++++++++++++++++++++++++++++++++++++++- include/linux/netlink.h | 2 + include/scsi/scsi_netlink.h | 86 +++++++++++++++++ include/scsi/scsi_netlink_fc.h | 71 ++++++++++++++ include/scsi/scsi_transport_fc.h | 34 +++++++ 10 files changed, 612 insertions(+), 1 deletion(-) create mode 100644 drivers/scsi/scsi_netlink.c create mode 100644 include/scsi/scsi_netlink.h create mode 100644 include/scsi/scsi_netlink_fc.h (limited to 'include/linux') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index c8c606589ea6..4d1998d23f0f 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -27,6 +27,11 @@ config SCSI However, do not compile this as a module if your root file system (the one containing the directory /) is located on a SCSI device. +config SCSI_NETLINK + tristate + default n + select NET + config SCSI_PROC_FS bool "legacy /proc/scsi/ support" depends on SCSI && PROC_FS @@ -222,6 +227,7 @@ config SCSI_SPI_ATTRS config SCSI_FC_ATTRS tristate "FiberChannel Transport Attributes" depends on SCSI + select SCSI_NETLINK help If you wish to export transport-specific information about each attached FiberChannel device to sysfs, say Y. diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index fd9aeb1ba07f..8fc2c594b537 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -159,6 +159,7 @@ scsi_mod-y += scsi.o hosts.o scsi_ioctl.o constants.o \ scsicam.o scsi_error.o scsi_lib.o \ scsi_scan.o scsi_sysfs.o \ scsi_devinfo.o +scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 37843927e47f..eedfd059b82b 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1118,6 +1118,8 @@ static int __init init_scsi(void) for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(scsi_done_q, i)); + scsi_netlink_init(); + printk(KERN_NOTICE "SCSI subsystem initialized\n"); return 0; @@ -1138,6 +1140,7 @@ cleanup_queue: static void __exit exit_scsi(void) { + scsi_netlink_exit(); scsi_sysfs_unregister(); scsi_exit_sysctl(); scsi_exit_hosts(); diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c new file mode 100644 index 000000000000..1b59b27e887f --- /dev/null +++ b/drivers/scsi/scsi_netlink.c @@ -0,0 +1,199 @@ +/* + * scsi_netlink.c - SCSI Transport Netlink Interface + * + * Copyright (C) 2006 James Smart, Emulex Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include + +#include +#include "scsi_priv.h" + +struct sock *scsi_nl_sock = NULL; +EXPORT_SYMBOL_GPL(scsi_nl_sock); + + +/** + * scsi_nl_rcv_msg - + * Receive message handler. Extracts message from a receive buffer. + * Validates message header and calls appropriate transport message handler + * + * @skb: socket receive buffer + * + **/ +static void +scsi_nl_rcv_msg(struct sk_buff *skb) +{ + struct nlmsghdr *nlh; + struct scsi_nl_hdr *hdr; + uint32_t rlen; + int err; + + while (skb->len >= NLMSG_SPACE(0)) { + err = 0; + + nlh = (struct nlmsghdr *) skb->data; + if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) || + (skb->len < nlh->nlmsg_len)) { + printk(KERN_WARNING "%s: discarding partial skb\n", + __FUNCTION__); + return; + } + + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + + if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) { + err = -EBADMSG; + goto next_msg; + } + + hdr = NLMSG_DATA(nlh); + if ((hdr->version != SCSI_NL_VERSION) || + (hdr->magic != SCSI_NL_MAGIC)) { + err = -EPROTOTYPE; + goto next_msg; + } + + if (security_netlink_recv(skb, CAP_SYS_ADMIN)) { + err = -EPERM; + goto next_msg; + } + + if (nlh->nlmsg_len < (sizeof(*nlh) + hdr->msglen)) { + printk(KERN_WARNING "%s: discarding partial message\n", + __FUNCTION__); + return; + } + + /* + * We currently don't support anyone sending us a message + */ + +next_msg: + if ((err) || (nlh->nlmsg_flags & NLM_F_ACK)) + netlink_ack(skb, nlh, err); + + skb_pull(skb, rlen); + } +} + + +/** + * scsi_nl_rcv_msg - + * Receive handler for a socket. Extracts a received message buffer from + * the socket, and starts message processing. + * + * @sk: socket + * @len: unused + * + **/ +static void +scsi_nl_rcv(struct sock *sk, int len) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) { + scsi_nl_rcv_msg(skb); + kfree_skb(skb); + } +} + + +/** + * scsi_nl_rcv_event - + * Event handler for a netlink socket. + * + * @this: event notifier block + * @event: event type + * @ptr: event payload + * + **/ +static int +scsi_nl_rcv_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (n->protocol != NETLINK_SCSITRANSPORT) + return NOTIFY_DONE; + + /* + * Currently, we are not tracking PID's, etc. There is nothing + * to handle. + */ + + return NOTIFY_DONE; +} + +static struct notifier_block scsi_netlink_notifier = { + .notifier_call = scsi_nl_rcv_event, +}; + + +/** + * scsi_netlink_init - + * Called by SCSI subsystem to intialize the SCSI transport netlink + * interface + * + **/ +void +scsi_netlink_init(void) +{ + int error; + + error = netlink_register_notifier(&scsi_netlink_notifier); + if (error) { + printk(KERN_ERR "%s: register of event handler failed - %d\n", + __FUNCTION__, error); + return; + } + + scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, + SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE); + if (!scsi_nl_sock) { + printk(KERN_ERR "%s: register of recieve handler failed\n", + __FUNCTION__); + netlink_unregister_notifier(&scsi_netlink_notifier); + } + + return; +} + + +/** + * scsi_netlink_exit - + * Called by SCSI subsystem to disable the SCSI transport netlink + * interface + * + **/ +void +scsi_netlink_exit(void) +{ + if (scsi_nl_sock) { + sock_release(scsi_nl_sock->sk_socket); + netlink_unregister_notifier(&scsi_netlink_notifier); + } + + return; +} + + diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index ae24c85aaeea..5d023d44e5e7 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -8,6 +8,7 @@ struct scsi_cmnd; struct scsi_device; struct scsi_host_template; struct Scsi_Host; +struct scsi_nl_hdr; /* @@ -110,6 +111,16 @@ extern void __scsi_remove_device(struct scsi_device *); extern struct bus_type scsi_bus_type; +/* scsi_netlink.c */ +#ifdef CONFIG_SCSI_NETLINK +extern void scsi_netlink_init(void); +extern void scsi_netlink_exit(void); +extern struct sock *scsi_nl_sock; +#else +static inline void scsi_netlink_init(void) {} +static inline void scsi_netlink_exit(void) {} +#endif + /* * internal scsi timeout functions: for use by mid-layer and transport * classes. diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 79d31ca2b741..05989f130554 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include "scsi_priv.h" static int fc_queue_work(struct Scsi_Host *, struct work_struct *); @@ -93,6 +96,29 @@ fc_enum_name_search(port_type, fc_port_type, fc_port_type_names) #define FC_PORTTYPE_MAX_NAMELEN 50 +/* Convert fc_host_event_code values to ascii string name */ +static const struct { + enum fc_host_event_code value; + char *name; +} fc_host_event_code_names[] = { + { FCH_EVT_LIP, "lip" }, + { FCH_EVT_LINKUP, "link_up" }, + { FCH_EVT_LINKDOWN, "link_down" }, + { FCH_EVT_LIPRESET, "lip_reset" }, + { FCH_EVT_RSCN, "rscn" }, + { FCH_EVT_ADAPTER_CHANGE, "adapter_chg" }, + { FCH_EVT_PORT_UNKNOWN, "port_unknown" }, + { FCH_EVT_PORT_ONLINE, "port_online" }, + { FCH_EVT_PORT_OFFLINE, "port_offline" }, + { FCH_EVT_PORT_FABRIC, "port_fabric" }, + { FCH_EVT_LINK_UNKNOWN, "link_unknown" }, + { FCH_EVT_VENDOR_UNIQUE, "vendor_unique" }, +}; +fc_enum_name_search(host_event_code, fc_host_event_code, + fc_host_event_code_names) +#define FC_HOST_EVENT_CODE_MAX_NAMELEN 30 + + /* Convert fc_port_state values to ascii string name */ static struct { enum fc_port_state value; @@ -377,10 +403,182 @@ MODULE_PARM_DESC(dev_loss_tmo, " exceeded, the scsi target is removed. Value should be" " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT."); +/** + * Netlink Infrastructure + **/ + +static atomic_t fc_event_seq; + +/** + * fc_get_event_number - Obtain the next sequential FC event number + * + * Notes: + * We could have inline'd this, but it would have required fc_event_seq to + * be exposed. For now, live with the subroutine call. + * Atomic used to avoid lock/unlock... + **/ +u32 +fc_get_event_number(void) +{ + return atomic_add_return(1, &fc_event_seq); +} +EXPORT_SYMBOL(fc_get_event_number); + + +/** + * fc_host_post_event - called to post an even on an fc_host. + * + * @shost: host the event occurred on + * @event_number: fc event number obtained from get_fc_event_number() + * @event_code: fc_host event being posted + * @event_data: 32bits of data for the event being posted + * + * Notes: + * This routine assumes no locks are held on entry. + **/ +void +fc_host_post_event(struct Scsi_Host *shost, u32 event_number, + enum fc_host_event_code event_code, u32 event_data) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct fc_nl_event *event; + const char *name; + u32 len, skblen; + int err; + + if (!scsi_nl_sock) { + err = -ENOENT; + goto send_fail; + } + + len = FC_NL_MSGALIGN(sizeof(*event)); + skblen = NLMSG_SPACE(len); + + skb = alloc_skb(skblen, GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto send_fail; + } + + nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, + skblen - sizeof(*nlh), 0); + if (!nlh) { + err = -ENOBUFS; + goto send_fail_skb; + } + event = NLMSG_DATA(nlh); + + INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC, + FC_NL_ASYNC_EVENT, len); + event->seconds = get_seconds(); + event->vendor_id = 0; + event->host_no = shost->host_no; + event->event_datalen = sizeof(u32); /* bytes */ + event->event_num = event_number; + event->event_code = event_code; + event->event_data = event_data; + + err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS); + if (err && (err != -ESRCH)) /* filter no recipient errors */ + /* nlmsg_multicast already kfree_skb'd */ + goto send_fail; + + return; + +send_fail_skb: + kfree_skb(skb); +send_fail: + name = get_fc_host_event_code_name(event_code); + printk(KERN_WARNING + "%s: Dropped Event : host %d %s data 0x%08x - err %d\n", + __FUNCTION__, shost->host_no, + (name) ? name : "", event_data, err); + return; +} +EXPORT_SYMBOL(fc_host_post_event); + + +/** + * fc_host_post_vendor_event - called to post a vendor unique event on + * a fc_host + * + * @shost: host the event occurred on + * @event_number: fc event number obtained from get_fc_event_number() + * @data_len: amount, in bytes, of vendor unique data + * @data_buf: pointer to vendor unique data + * + * Notes: + * This routine assumes no locks are held on entry. + **/ +void +fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, + u32 data_len, char * data_buf, u32 vendor_id) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct fc_nl_event *event; + u32 len, skblen; + int err; + + if (!scsi_nl_sock) { + err = -ENOENT; + goto send_vendor_fail; + } + + len = FC_NL_MSGALIGN(sizeof(*event) + data_len); + skblen = NLMSG_SPACE(len); + + skb = alloc_skb(skblen, GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto send_vendor_fail; + } + + nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, + skblen - sizeof(*nlh), 0); + if (!nlh) { + err = -ENOBUFS; + goto send_vendor_fail_skb; + } + event = NLMSG_DATA(nlh); + + INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC, + FC_NL_ASYNC_EVENT, len); + event->seconds = get_seconds(); + event->vendor_id = vendor_id; + event->host_no = shost->host_no; + event->event_datalen = data_len; /* bytes */ + event->event_num = event_number; + event->event_code = FCH_EVT_VENDOR_UNIQUE; + memcpy(&event->event_data, data_buf, data_len); + + err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS); + if (err && (err != -ESRCH)) /* filter no recipient errors */ + /* nlmsg_multicast already kfree_skb'd */ + goto send_vendor_fail; + + return; + +send_vendor_fail_skb: + kfree_skb(skb); +send_vendor_fail: + printk(KERN_WARNING + "%s: Dropped Event : host %d vendor_unique - err %d\n", + __FUNCTION__, shost->host_no, err); + return; +} +EXPORT_SYMBOL(fc_host_post_vendor_event); + + static __init int fc_transport_init(void) { - int error = transport_class_register(&fc_host_class); + int error; + + atomic_set(&fc_event_seq, 0); + + error = transport_class_register(&fc_host_class); if (error) return error; error = transport_class_register(&fc_rport_class); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 855b44668caa..66411622e06e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -21,6 +21,8 @@ #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ #define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ #define MAX_LINKS 32 diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h new file mode 100644 index 000000000000..7a3a20e640c0 --- /dev/null +++ b/include/scsi/scsi_netlink.h @@ -0,0 +1,86 @@ +/* + * SCSI Transport Netlink Interface + * Used for the posting of outbound SCSI transport events + * + * Copyright (C) 2006 James Smart, Emulex Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef SCSI_NETLINK_H +#define SCSI_NETLINK_H + +/* + * This file intended to be included by both kernel and user space + */ + +/* Single Netlink Message type to send all SCSI Transport messages */ +#define SCSI_TRANSPORT_MSG NLMSG_MIN_TYPE + 1 + +/* SCSI Transport Broadcast Groups */ + /* leaving groups 0 and 1 unassigned */ +#define SCSI_NL_GRP_FC_EVENTS (1<<2) /* Group 2 */ +#define SCSI_NL_GRP_CNT 3 + + +/* SCSI_TRANSPORT_MSG event message header */ +struct scsi_nl_hdr { + uint8_t version; + uint8_t transport; + uint16_t magic; + uint16_t msgtype; + uint16_t msglen; +} __attribute__((aligned(sizeof(uint64_t)))); + +/* scsi_nl_hdr->version value */ +#define SCSI_NL_VERSION 1 + +/* scsi_nl_hdr->magic value */ +#define SCSI_NL_MAGIC 0xA1B2 + +/* scsi_nl_hdr->transport value */ +#define SCSI_NL_TRANSPORT 0 +#define SCSI_NL_TRANSPORT_FC 1 +#define SCSI_NL_MAX_TRANSPORTS 2 + +/* scsi_nl_hdr->msgtype values are defined in each transport */ + + +/* + * Vendor ID: + * If transports post vendor-unique events, they must pass a well-known + * 32-bit vendor identifier. This identifier consists of 8 bits indicating + * the "type" of identifier contained, and 24 bits of id data. + * + * Identifiers for each type: + * PCI : ID data is the 16 bit PCI Registered Vendor ID + */ +#define SCSI_NL_VID_ID_MASK 0x00FFFFFF +#define SCSI_NL_VID_TYPE_MASK 0xFF000000 +#define SCSI_NL_VID_TYPE_PCI 0x01000000 + + +#define INIT_SCSI_NL_HDR(hdr, t, mtype, mlen) \ + { \ + (hdr)->version = SCSI_NL_VERSION; \ + (hdr)->transport = t; \ + (hdr)->magic = SCSI_NL_MAGIC; \ + (hdr)->msgtype = mtype; \ + (hdr)->msglen = mlen; \ + } + + +#endif /* SCSI_NETLINK_H */ + diff --git a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h new file mode 100644 index 000000000000..b213d2909fed --- /dev/null +++ b/include/scsi/scsi_netlink_fc.h @@ -0,0 +1,71 @@ +/* + * FC Transport Netlink Interface + * + * Copyright (C) 2006 James Smart, Emulex Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef SCSI_NETLINK_FC_H +#define SCSI_NETLINK_FC_H + +#include + +/* + * This file intended to be included by both kernel and user space + */ + +/* + * FC Transport Message Types + */ + /* kernel -> user */ +#define FC_NL_ASYNC_EVENT 0x0100 + /* user -> kernel */ +/* none */ + + +/* + * Message Structures : + */ + +/* macro to round up message lengths to 8byte boundary */ +#define FC_NL_MSGALIGN(len) (((len) + 7) & ~7) + + +/* + * FC Transport Broadcast Event Message : + * FC_NL_ASYNC_EVENT + * + * Note: if Vendor Unique message, &event_data will be start of + * vendor unique payload, and the length of the payload is + * per event_datalen + * + * Note: When specifying vendor_id, be sure to read the Vendor Type and ID + * formatting requirements specified in scsi_netlink.h + */ +struct fc_nl_event { + struct scsi_nl_hdr snlh; /* must be 1st element ! */ + uint64_t seconds; + uint32_t vendor_id; + uint16_t host_no; + uint16_t event_datalen; + uint32_t event_num; + uint32_t event_code; + uint32_t event_data; +} __attribute__((aligned(sizeof(uint64_t)))); + + +#endif /* SCSI_NETLINK_FC_H */ + diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index c74be5dabfeb..f91c5358af3a 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -29,6 +29,7 @@ #include #include +#include struct scsi_transport_template; @@ -283,6 +284,30 @@ struct fc_host_statistics { }; +/* + * FC Event Codes - Polled and Async, following FC HBAAPI v2.0 guidelines + */ + +/* + * fc_host_event_code: If you alter this, you also need to alter + * scsi_transport_fc.c (for the ascii descriptions). + */ +enum fc_host_event_code { + FCH_EVT_LIP = 0x1, + FCH_EVT_LINKUP = 0x2, + FCH_EVT_LINKDOWN = 0x3, + FCH_EVT_LIPRESET = 0x4, + FCH_EVT_RSCN = 0x5, + FCH_EVT_ADAPTER_CHANGE = 0x103, + FCH_EVT_PORT_UNKNOWN = 0x200, + FCH_EVT_PORT_OFFLINE = 0x201, + FCH_EVT_PORT_ONLINE = 0x202, + FCH_EVT_PORT_FABRIC = 0x204, + FCH_EVT_LINK_UNKNOWN = 0x500, + FCH_EVT_VENDOR_UNIQUE = 0xffff, +}; + + /* * FC Local Port (Host) Attributes * @@ -526,5 +551,14 @@ struct fc_rport *fc_remote_port_add(struct Scsi_Host *shost, void fc_remote_port_delete(struct fc_rport *rport); void fc_remote_port_rolechg(struct fc_rport *rport, u32 roles); int scsi_is_fc_rport(const struct device *); +u32 fc_get_event_number(void); +void fc_host_post_event(struct Scsi_Host *shost, u32 event_number, + enum fc_host_event_code event_code, u32 event_data); +void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, + u32 data_len, char * data_buf, u32 vendor_id); + /* Note: when specifying vendor_id to fc_host_post_vendor_event() + * be sure to read the Vendor Type and ID formatting requirements + * specified in scsi_netlink.h + */ #endif /* SCSI_TRANSPORT_FC_H */ -- cgit v1.2.3-71-gd317 From 76fd85937097a0c2ec8ab23bf21dc10992d1c398 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 8 Sep 2006 11:16:13 -0700 Subject: [PATCH] ethtool: allow const ethtool_ops The ethtool_ops structure is immutable, it expected to be setup by the driver and is never changed. This patch allows drivers to declare there ethtool_ops structure read-only. Signed-off-by: Stephen Hemminger Signed-off-by: Jeff Garzik --- include/linux/netdevice.h | 2 +- net/core/ethtool.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 50a4719512ed..a2e747353367 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -342,7 +342,7 @@ struct net_device /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; - struct ethtool_ops *ethtool_ops; + const struct ethtool_ops *ethtool_ops; /* * This marks the end of the "visible" part of the structure. All diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 2797e2815418..e0ca04f38cef 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -143,7 +143,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) { struct ethtool_drvinfo info; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; if (!ops->get_drvinfo) return -EOPNOTSUPP; @@ -169,7 +169,7 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; void *regbuf; int reglen, ret; @@ -282,7 +282,7 @@ static int ethtool_get_link(struct net_device *dev, void __user *useraddr) static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; @@ -327,7 +327,7 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; @@ -640,7 +640,7 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; int ret; @@ -673,7 +673,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; @@ -733,7 +733,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) { struct ethtool_stats stats; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; int ret; -- cgit v1.2.3-71-gd317 From 132919ba80ad207755fe271277bfefff865a54fe Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 27 Aug 2006 13:56:52 +0100 Subject: [MMC] Remove data->blksz_bits member data->blksz_bits is unused now - remove it. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 1 - drivers/mmc/mmc_block.c | 1 - include/linux/mmc/mmc.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 74eaaee66de0..5b9caa7978d3 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -996,7 +996,6 @@ static void mmc_read_scrs(struct mmc_host *host) mmc_set_data_timeout(&data, card, 0); - data.blksz_bits = 3; data.blksz = 1 << 3; data.blocks = 1; data.flags = MMC_DATA_READ; diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index a0e0dad1b419..f3a99dd8df8f 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -172,7 +172,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) brq.cmd.arg = req->sector << 9; brq.cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; - brq.data.blksz_bits = md->block_bits; brq.data.blksz = 1 << md->block_bits; brq.data.blocks = req->nr_sectors >> (md->block_bits - 9); brq.stop.opcode = MMC_STOP_TRANSMISSION; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 627e2c08ce41..a3594dfd6963 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -68,7 +68,6 @@ struct mmc_command { struct mmc_data { unsigned int timeout_ns; /* data timeout (in ns, max 80ms) */ unsigned int timeout_clks; /* data timeout (in clocks) */ - unsigned int blksz_bits; /* data block size */ unsigned int blksz; /* data block size */ unsigned int blocks; /* number of blocks */ unsigned int error; /* data error */ -- cgit v1.2.3-71-gd317 From db53f28b3a6d9338cca1b7e917dc063ac99e1871 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 30 Aug 2006 15:14:56 +0100 Subject: [MMC] Add multi block-write capability Add a capability flag for drivers to set when they can perform multi- block transfers to cards _and_ correctly report the number of bytes transferred should an error occur. The last point is very important - if a driver reports more bytes than were actually accepted by the card and an error occurs, there is the possibility for data loss. Pierre Ossman provided the patch for wbsd and sdhci. Signed-off-by: Pierre Ossman Signed-off-by: Russell King --- drivers/mmc/mmc_block.c | 27 ++++++++++++++++++++------- drivers/mmc/mmci.c | 1 + drivers/mmc/sdhci.c | 2 +- drivers/mmc/wbsd.c | 2 +- include/linux/mmc/host.h | 1 + 5 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index f3a99dd8df8f..8d18b87bfd34 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -165,6 +166,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) do { struct mmc_blk_request brq; struct mmc_command cmd; + u32 readcmd, writecmd; memset(&brq, 0, sizeof(struct mmc_blk_request)); brq.mrq.cmd = &brq.cmd; @@ -180,20 +182,31 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) mmc_set_data_timeout(&brq.data, card, rq_data_dir(req) != READ); - if (rq_data_dir(req) == READ) { - brq.cmd.opcode = brq.data.blocks > 1 ? MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK; - brq.data.flags |= MMC_DATA_READ; - } else { - brq.cmd.opcode = MMC_WRITE_BLOCK; - brq.data.flags |= MMC_DATA_WRITE; + /* + * If the host doesn't support multiple block writes, force + * block writes to single block. + */ + if (rq_data_dir(req) != READ && + !(card->host->caps & MMC_CAP_MULTIWRITE)) brq.data.blocks = 1; - } if (brq.data.blocks > 1) { brq.data.flags |= MMC_DATA_MULTI; brq.mrq.stop = &brq.stop; + readcmd = MMC_READ_MULTIPLE_BLOCK; + writecmd = MMC_WRITE_MULTIPLE_BLOCK; } else { brq.mrq.stop = NULL; + readcmd = MMC_READ_SINGLE_BLOCK; + writecmd = MMC_WRITE_BLOCK; + } + + if (rq_data_dir(req) == READ) { + brq.cmd.opcode = readcmd; + brq.data.flags |= MMC_DATA_READ; + } else { + brq.cmd.opcode = writecmd; + brq.data.flags |= MMC_DATA_WRITE; } brq.data.sg = mq->sg; diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c index 8419489e7744..2b5a0cc9ea56 100644 --- a/drivers/mmc/mmci.c +++ b/drivers/mmc/mmci.c @@ -509,6 +509,7 @@ static int mmci_probe(struct amba_device *dev, void *id) mmc->f_min = (host->mclk + 511) / 512; mmc->f_max = min(host->mclk, fmax); mmc->ocr_avail = plat->ocr_mask; + mmc->caps = MMC_CAP_MULTIWRITE; /* * We can do SGIO diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c index 4e21b3b9d330..dea4edd1c434 100644 --- a/drivers/mmc/sdhci.c +++ b/drivers/mmc/sdhci.c @@ -1262,7 +1262,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) mmc->ops = &sdhci_ops; mmc->f_min = host->max_clk / 256; mmc->f_max = host->max_clk; - mmc->caps = MMC_CAP_4_BIT_DATA; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; mmc->ocr_avail = 0; if (caps & SDHCI_CAN_VDD_330) diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c index c351c6d1a18a..4a6617d9a49f 100644 --- a/drivers/mmc/wbsd.c +++ b/drivers/mmc/wbsd.c @@ -1323,7 +1323,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev) mmc->f_min = 375000; mmc->f_max = 24000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_4_BIT_DATA; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; spin_lock_init(&host->lock); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ba095aebedff..b282ec9bba08 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -85,6 +85,7 @@ struct mmc_host { unsigned long caps; /* Host capabilities */ #define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ +#define MMC_CAP_MULTIWRITE (1 << 1) /* Can accurately report bytes sent to card on error */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v1.2.3-71-gd317 From fea63e38013ec628ab3f7fddc4c2148064b7910a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 16 Sep 2006 03:04:15 +0900 Subject: [PATCH] libata: fix non-uniform ports handling Non-uniform ports handling got broken while updating libata to handle those in the same host. Only separate irq for the non-uniform secondary port was implemented while all other fields (host flags, transfer mode...) of the secondary port simply shared those of the first. For ata_piix combined mode, which ATM is the only user of non-uniform ports, this causes the secondary port assume the wrong type. This can cause PATA port to use SATA ops, which results in bogus check on PCS and detection failure. This patch adds ata_probe_ent->pinfo2 which points to optional port_info for the secondary port. For the time being, this seems to be the simplest solution. This workaround will be removed together with ata_probe_ent itself after init model is updated to allow more flexibility. Signed-off-by: Tejun Heo Cc: Alan Cox Cc: Nelson A. de Oliveira Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 18 +++++++++++++----- drivers/ata/libata-sff.c | 2 ++ include/linux/libata.h | 8 ++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1c9315401f7a..bb66a12c84e5 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5269,11 +5269,19 @@ void ata_port_init(struct ata_port *ap, struct ata_host *host, ap->host = host; ap->dev = ent->dev; ap->port_no = port_no; - ap->pio_mask = ent->pio_mask; - ap->mwdma_mask = ent->mwdma_mask; - ap->udma_mask = ent->udma_mask; - ap->flags |= ent->port_flags; - ap->ops = ent->port_ops; + if (port_no == 1 && ent->pinfo2) { + ap->pio_mask = ent->pinfo2->pio_mask; + ap->mwdma_mask = ent->pinfo2->mwdma_mask; + ap->udma_mask = ent->pinfo2->udma_mask; + ap->flags |= ent->pinfo2->flags; + ap->ops = ent->pinfo2->port_ops; + } else { + ap->pio_mask = ent->pio_mask; + ap->mwdma_mask = ent->mwdma_mask; + ap->udma_mask = ent->udma_mask; + ap->flags |= ent->port_flags; + ap->ops = ent->port_ops; + } ap->hw_sata_spd_limit = UINT_MAX; ap->active_tag = ATA_TAG_POISON; ap->last_ctl = 0xFF; diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index d51dc41fa195..688bb55e197a 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -858,6 +858,7 @@ ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int probe_ent->port[p].bmdma_addr = bmdma; } ata_std_ports(&probe_ent->port[p]); + probe_ent->pinfo2 = port[1]; p++; } @@ -907,6 +908,7 @@ static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, probe_ent->_host_flags |= ATA_HOST_SIMPLEX; } ata_std_ports(&probe_ent->port[1]); + probe_ent->pinfo2 = port[1]; } else probe_ent->dummy_port_mask |= ATA_PORT_SECONDARY; diff --git a/include/linux/libata.h b/include/linux/libata.h index 8715305f611f..ff67e7524fe9 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -361,6 +361,14 @@ struct ata_probe_ent { unsigned long _host_flags; void __iomem *mmio_base; void *private_data; + + /* port_info for the secondary port. Together with irq2, it's + * used to implement non-uniform secondary port. Currently, + * the only user is ata_piix combined mode. This workaround + * will be removed together with ata_probe_ent when init model + * is updated. + */ + const struct ata_port_info *pinfo2; }; struct ata_host { -- cgit v1.2.3-71-gd317 From 93590859884784520a1850767f86296abc2cdc6d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 12 Sep 2006 16:55:12 +0100 Subject: [PATCH] libata: improve handling of diagostic fail (and hardware that misreports it) Our ATA probe code checks that a device is not reporting a diagnostic failure during start up. Unfortunately at least one device seems to like doing this - the Gigabyte iRAM. This is only done for the master right now (which is fine for the iRAM as it is SATA), as with PATA some combinations of ATAPI device seem to fool the check into seeing a drive that isn't there if it is applied to the slave. Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 19 +++++++++++++++++-- include/linux/libata.h | 6 ++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index e85c2f8cf193..753b0152afd1 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -616,8 +616,11 @@ ata_dev_try_classify(struct ata_port *ap, unsigned int device, u8 *r_err) if (r_err) *r_err = err; - /* see if device passed diags */ - if (err == 1) + /* see if device passed diags: if master then continue and warn later */ + if (err == 0 && device == 0) + /* diagnostic fail : do nothing _YET_ */ + ap->device[device].horkage |= ATA_HORKAGE_DIAGNOSTIC; + else if (err == 1) /* do nothing */ ; else if ((device == 0) && (err == 0x81)) /* do nothing */ ; @@ -1523,6 +1526,18 @@ int ata_dev_configure(struct ata_device *dev, int print_info) cdb_intr_string); } + if (dev->horkage & ATA_HORKAGE_DIAGNOSTIC) { + /* Let the user know. We don't want to disallow opens for + rescue purposes, or in case the vendor is just a blithering + idiot */ + if (print_info) { + ata_dev_printk(dev, KERN_WARNING, +"Drive reports diagnostics failure. This may indicate a drive\n"); + ata_dev_printk(dev, KERN_WARNING, +"fault or invalid emulation. Contact drive vendor for information.\n"); + } + } + ata_set_port_max_cmd_len(ap); /* limit bridge transfers to udma5, 200 sectors */ diff --git a/include/linux/libata.h b/include/linux/libata.h index ff67e7524fe9..1ef3d3901b47 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -289,6 +289,11 @@ enum { * most devices. */ ATA_SPINUP_WAIT = 8000, + + /* Horkage types. May be set by libata or controller on drives + (some horkage may be drive/controller pair dependant */ + + ATA_HORKAGE_DIAGNOSTIC = (1 << 0), /* Failed boot diag */ }; enum hsm_task_states { @@ -476,6 +481,7 @@ struct ata_device { /* error history */ struct ata_ering ering; + unsigned int horkage; /* List of broken features */ }; /* Offset into struct ata_device. Fields above it are maintained -- cgit v1.2.3-71-gd317 From fadcfa33b6319a5faf8af2287f08bf93a7f926b6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Sep 2006 12:43:58 +0100 Subject: [HEADERS] One line per header in Kbuild files to reduce conflicts Signed-off-by: David Woodhouse --- include/Kbuild | 11 +- include/asm-alpha/Kbuild | 10 +- include/asm-generic/Kbuild | 15 +- include/asm-generic/Kbuild.asm | 38 +++- include/asm-i386/Kbuild | 9 +- include/asm-ia64/Kbuild | 18 +- include/asm-powerpc/Kbuild | 45 +++- include/asm-s390/Kbuild | 11 +- include/asm-sparc/Kbuild | 24 +- include/asm-sparc64/Kbuild | 27 ++- include/asm-x86_64/Kbuild | 18 +- include/linux/Kbuild | 400 +++++++++++++++++++++++++++++----- include/linux/byteorder/Kbuild | 9 +- include/linux/dvb/Kbuild | 11 +- include/linux/netfilter/Kbuild | 47 +++- include/linux/netfilter_arp/Kbuild | 5 +- include/linux/netfilter_bridge/Kbuild | 21 +- include/linux/netfilter_ipv4/Kbuild | 82 +++++-- include/linux/netfilter_ipv6/Kbuild | 27 ++- include/linux/nfsd/Kbuild | 9 +- include/linux/raid/Kbuild | 3 +- include/linux/sunrpc/Kbuild | 2 +- include/linux/tc_act/Kbuild | 5 +- include/linux/tc_ematch/Kbuild | 5 +- include/mtd/Kbuild | 8 +- include/rdma/Kbuild | 2 +- include/scsi/Kbuild | 4 +- include/sound/Kbuild | 12 +- include/video/Kbuild | 2 +- 29 files changed, 721 insertions(+), 159 deletions(-) (limited to 'include/linux') diff --git a/include/Kbuild b/include/Kbuild index cb2534800b19..2d03f995865f 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,2 +1,9 @@ -header-y += asm-generic/ linux/ scsi/ sound/ mtd/ rdma/ video/ -header-y += asm-$(ARCH)/ +header-y += asm-generic/ +header-y += linux/ +header-y += scsi/ +header-y += sound/ +header-y += mtd/ +header-y += rdma/ +header-y += video/ + +header-y += asm-$(ARCH)/ diff --git a/include/asm-alpha/Kbuild b/include/asm-alpha/Kbuild index 2b06b3bad5ff..b7c8f188b313 100644 --- a/include/asm-alpha/Kbuild +++ b/include/asm-alpha/Kbuild @@ -1,5 +1,11 @@ include include/asm-generic/Kbuild.asm -unifdef-y += console.h fpu.h sysinfo.h compiler.h +header-y += gentrap.h +header-y += regdef.h +header-y += pal.h +header-y += reg.h -header-y += gentrap.h regdef.h pal.h reg.h +unifdef-y += console.h +unifdef-y += fpu.h +unifdef-y += sysinfo.h +unifdef-y += compiler.h diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 70594b275a6e..3c06be381701 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,3 +1,12 @@ -header-y += atomic.h errno-base.h errno.h fcntl.h ioctl.h ipc.h mman.h \ - signal.h statfs.h -unifdef-y := resource.h siginfo.h +header-y += atomic.h +header-y += errno-base.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ipc.h +header-y += mman.h +header-y += signal.h +header-y += statfs.h + +unifdef-y += resource.h +unifdef-y += siginfo.h diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index c00de6028fa8..a84c3d88a189 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -1,8 +1,34 @@ -unifdef-y += a.out.h auxvec.h byteorder.h errno.h fcntl.h ioctl.h \ - ioctls.h ipcbuf.h mman.h msgbuf.h param.h poll.h \ - posix_types.h ptrace.h resource.h sembuf.h shmbuf.h shmparam.h \ - sigcontext.h siginfo.h signal.h socket.h sockios.h stat.h \ - statfs.h termbits.h termios.h types.h unistd.h user.h +unifdef-y += a.out.h +unifdef-y += auxvec.h +unifdef-y += byteorder.h +unifdef-y += errno.h +unifdef-y += fcntl.h +unifdef-y += ioctl.h +unifdef-y += ioctls.h +unifdef-y += ipcbuf.h +unifdef-y += mman.h +unifdef-y += msgbuf.h +unifdef-y += param.h +unifdef-y += poll.h +unifdef-y += posix_types.h +unifdef-y += ptrace.h +unifdef-y += resource.h +unifdef-y += sembuf.h +unifdef-y += shmbuf.h +unifdef-y += sigcontext.h +unifdef-y += siginfo.h +unifdef-y += signal.h +unifdef-y += socket.h +unifdef-y += sockios.h +unifdef-y += stat.h +unifdef-y += statfs.h +unifdef-y += termbits.h +unifdef-y += termios.h +unifdef-y += types.h +unifdef-y += unistd.h +unifdef-y += user.h # These probably shouldn't be exported -unifdef-y += elf.h page.h +unifdef-y += shmparam.h +unifdef-y += elf.h +unifdef-y += page.h diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index 2308190321da..b75a348d0c1c 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -1,5 +1,10 @@ include include/asm-generic/Kbuild.asm -header-y += boot.h debugreg.h ldt.h ucontext.h +header-y += boot.h +header-y += debugreg.h +header-y += ldt.h +header-y += ucontext.h -unifdef-y += mtrr.h setup.h vm86.h +unifdef-y += mtrr.h +unifdef-y += setup.h +unifdef-y += vm86.h diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild index f1cb00f39c22..15818a18bc52 100644 --- a/include/asm-ia64/Kbuild +++ b/include/asm-ia64/Kbuild @@ -1,7 +1,17 @@ include include/asm-generic/Kbuild.asm -header-y += break.h fpu.h fpswa.h gcc_intrin.h ia64regs.h \ - intel_intrin.h intrinsics.h perfmon_default_smpl.h \ - ptrace_offsets.h rse.h setup.h ucontext.h +header-y += break.h +header-y += fpu.h +header-y += fpswa.h +header-y += gcc_intrin.h +header-y += ia64regs.h +header-y += intel_intrin.h +header-y += intrinsics.h +header-y += perfmon_default_smpl.h +header-y += ptrace_offsets.h +header-y += rse.h +header-y += setup.h +header-y += ucontext.h -unifdef-y += perfmon.h ustack.h +unifdef-y += perfmon.h +unifdef-y += ustack.h diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild index ac61d7eb6021..9827849953a3 100644 --- a/include/asm-powerpc/Kbuild +++ b/include/asm-powerpc/Kbuild @@ -1,10 +1,41 @@ include include/asm-generic/Kbuild.asm -unifdef-y += a.out.h asm-compat.h bootx.h byteorder.h cputable.h elf.h \ - nvram.h param.h posix_types.h ptrace.h seccomp.h signal.h \ - termios.h types.h unistd.h +header-y += auxvec.h +header-y += ioctls.h +header-y += mman.h +header-y += sembuf.h +header-y += siginfo.h +header-y += stat.h +header-y += errno.h +header-y += ipcbuf.h +header-y += msgbuf.h +header-y += shmbuf.h +header-y += socket.h +header-y += termbits.h +header-y += fcntl.h +header-y += ipc.h +header-y += poll.h +header-y += shmparam.h +header-y += sockios.h +header-y += ucontext.h +header-y += ioctl.h +header-y += linkage.h +header-y += resource.h +header-y += sigcontext.h +header-y += statfs.h -header-y += auxvec.h ioctls.h mman.h sembuf.h siginfo.h stat.h errno.h \ - ipcbuf.h msgbuf.h shmbuf.h socket.h termbits.h fcntl.h ipc.h \ - poll.h shmparam.h sockios.h ucontext.h ioctl.h linkage.h \ - resource.h sigcontext.h statfs.h +unifdef-y += a.out.h +unifdef-y += asm-compat.h +unifdef-y += bootx.h +unifdef-y += byteorder.h +unifdef-y += cputable.h +unifdef-y += elf.h +unifdef-y += nvram.h +unifdef-y += param.h +unifdef-y += posix_types.h +unifdef-y += ptrace.h +unifdef-y += seccomp.h +unifdef-y += signal.h +unifdef-y += termios.h +unifdef-y += types.h +unifdef-y += unistd.h diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index ed8955f49e47..14158a4a9c87 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild @@ -1,4 +1,11 @@ include include/asm-generic/Kbuild.asm -unifdef-y += cmb.h debug.h -header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h +header-y += dasd.h +header-y += qeth.h +header-y += tape390.h +header-y += ucontext.h +header-y += vtoc.h +header-y += z90crypt.h + +unifdef-y += cmb.h +unifdef-y += debug.h diff --git a/include/asm-sparc/Kbuild b/include/asm-sparc/Kbuild index e2a57fd7abfa..b22b67a64ecc 100644 --- a/include/asm-sparc/Kbuild +++ b/include/asm-sparc/Kbuild @@ -1,6 +1,22 @@ include include/asm-generic/Kbuild.asm -unifdef-y += fbio.h perfctr.h psr.h -header-y += apc.h asi.h auxio.h bpp.h head.h ipc.h jsflash.h \ - openpromio.h pbm.h pconf.h pgtsun4.h reg.h traps.h \ - turbosparc.h vfc_ioctls.h winmacro.h +header-y += apc.h +header-y += asi.h +header-y += auxio.h +header-y += bpp.h +header-y += head.h +header-y += ipc.h +header-y += jsflash.h +header-y += openpromio.h +header-y += pbm.h +header-y += pconf.h +header-y += pgtsun4.h +header-y += reg.h +header-y += traps.h +header-y += turbosparc.h +header-y += vfc_ioctls.h +header-y += winmacro.h + +unifdef-y += fbio.h +unifdef-y += perfctr.h +unifdef-y += psr.h diff --git a/include/asm-sparc64/Kbuild b/include/asm-sparc64/Kbuild index 9284c3cb27ec..4b59ce46cc2d 100644 --- a/include/asm-sparc64/Kbuild +++ b/include/asm-sparc64/Kbuild @@ -4,7 +4,26 @@ ALTARCH := sparc ARCHDEF := defined __sparc__ && defined __arch64__ ALTARCHDEF := defined __sparc__ && !defined __arch64__ -unifdef-y += fbio.h perfctr.h -header-y += apb.h asi.h bbc.h bpp.h display7seg.h envctrl.h floppy.h \ - ipc.h kdebug.h mostek.h openprom.h openpromio.h parport.h \ - pconf.h psrcompat.h pstate.h reg.h uctx.h utrap.h watchdog.h +header-y += apb.h +header-y += asi.h +header-y += bbc.h +header-y += bpp.h +header-y += display7seg.h +header-y += envctrl.h +header-y += floppy.h +header-y += ipc.h +header-y += kdebug.h +header-y += mostek.h +header-y += openprom.h +header-y += openpromio.h +header-y += parport.h +header-y += pconf.h +header-y += psrcompat.h +header-y += pstate.h +header-y += reg.h +header-y += uctx.h +header-y += utrap.h +header-y += watchdog.h + +unifdef-y += fbio.h +unifdef-y += perfctr.h diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index dc4d101e8a16..40f2f13fe174 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -4,8 +4,18 @@ ALTARCH := i386 ARCHDEF := defined __x86_64__ ALTARCHDEF := defined __i386__ -header-y += boot.h bootsetup.h cpufeature.h debugreg.h ldt.h \ - msr.h prctl.h setup.h sigcontext32.h ucontext.h \ - vsyscall32.h +header-y += boot.h +header-y += bootsetup.h +header-y += cpufeature.h +header-y += debugreg.h +header-y += ldt.h +header-y += msr.h +header-y += prctl.h +header-y += setup.h +header-y += sigcontext32.h +header-y += ucontext.h +header-y += vsyscall32.h -unifdef-y += mce.h mtrr.h vsyscall.h +unifdef-y += mce.h +unifdef-y += mtrr.h +unifdef-y += vsyscall.h diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 2b8a7d68fae3..7d076d97b2f7 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -1,63 +1,343 @@ -header-y := byteorder/ dvb/ hdlc/ isdn/ nfsd/ raid/ sunrpc/ tc_act/ \ - netfilter/ netfilter_arp/ netfilter_bridge/ netfilter_ipv4/ \ - netfilter_ipv6/ +header-y += byteorder/ +header-y += dvb/ +header-y += hdlc/ +header-y += isdn/ +header-y += nfsd/ +header-y += raid/ +header-y += sunrpc/ +header-y += tc_act/ +header-y += netfilter/ +header-y += netfilter_arp/ +header-y += netfilter_bridge/ +header-y += netfilter_ipv4/ +header-y += netfilter_ipv6/ -header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h \ - atmapi.h atmbr2684.h atmclip.h atm_eni.h atm_he.h \ - atm_idt77105.h atmioc.h atmlec.h atmmpc.h atm_nicstar.h \ - atmppp.h atmsap.h atmsvc.h atm_zatm.h auto_fs4.h auxvec.h \ - awe_voice.h ax25.h b1lli.h baycom.h bfs_fs.h blkpg.h \ - bpqether.h cdk.h chio.h coda_psdev.h coff.h comstats.h \ - consolemap.h cycx_cfm.h dm-ioctl.h dn.h dqblk_v1.h \ - dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h \ - fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h \ - fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h \ - hpfs_fs.h hysdn_if.h i2c-dev.h i8k.h icmp.h \ - if_arcnet.h if_arp.h if_bonding.h if_cablemodem.h if_fc.h \ - if_fddi.h if.h if_hippi.h if_infiniband.h if_packet.h \ - if_plip.h if_ppp.h if_slip.h if_strip.h if_tunnel.h in6.h \ - in_route.h ioctl.h ip.h ipmi_msgdefs.h ip_mp_alg.h ipsec.h \ - ipx.h irda.h isdn_divertif.h iso_fs.h ite_gpio.h ixjuser.h \ - jffs2.h keyctl.h limits.h major.h matroxfb.h meye.h minix_fs.h \ - mmtimer.h mqueue.h mtio.h ncp_no.h netfilter_arp.h netrom.h \ - nfs2.h nfs4_mount.h nfs_mount.h openprom_fs.h param.h \ - pci_ids.h pci_regs.h personality.h pfkeyv2.h pg.h pkt_cls.h \ - pkt_sched.h posix_types.h ppdev.h prctl.h ps2esdi.h qic117.h \ - qnxtypes.h quotaio_v1.h quotaio_v2.h radeonfb.h raw.h \ - resource.h rose.h sctp.h smbno.h snmp.h sockios.h som.h \ - sound.h stddef.h synclink.h telephony.h termios.h ticable.h \ - times.h tiocl.h tipc.h toshiba.h ultrasound.h un.h utime.h \ - utsname.h video_decoder.h video_encoder.h videotext.h vt.h \ - wavefront.h wireless.h xattr.h x25.h zorro_ids.h +header-y += affs_fs.h +header-y += affs_hardblocks.h +header-y += aio_abi.h +header-y += a.out.h +header-y += arcfb.h +header-y += atmapi.h +header-y += atmbr2684.h +header-y += atmclip.h +header-y += atm_eni.h +header-y += atm_he.h +header-y += atm_idt77105.h +header-y += atmioc.h +header-y += atmlec.h +header-y += atmmpc.h +header-y += atm_nicstar.h +header-y += atmppp.h +header-y += atmsap.h +header-y += atmsvc.h +header-y += atm_zatm.h +header-y += auto_fs4.h +header-y += auxvec.h +header-y += awe_voice.h +header-y += ax25.h +header-y += b1lli.h +header-y += baycom.h +header-y += bfs_fs.h +header-y += blkpg.h +header-y += bpqether.h +header-y += cdk.h +header-y += chio.h +header-y += coda_psdev.h +header-y += coff.h +header-y += comstats.h +header-y += consolemap.h +header-y += cycx_cfm.h +header-y += dm-ioctl.h +header-y += dn.h +header-y += dqblk_v1.h +header-y += dqblk_v2.h +header-y += dqblk_xfs.h +header-y += efs_fs_sb.h +header-y += elf-fdpic.h +header-y += elf.h +header-y += elf-em.h +header-y += fadvise.h +header-y += fd.h +header-y += fdreg.h +header-y += ftape-header-segment.h +header-y += ftape-vendors.h +header-y += fuse.h +header-y += futex.h +header-y += genetlink.h +header-y += gen_stats.h +header-y += gigaset_dev.h +header-y += hdsmart.h +header-y += hpfs_fs.h +header-y += hysdn_if.h +header-y += i2c-dev.h +header-y += i8k.h +header-y += icmp.h +header-y += if_arcnet.h +header-y += if_arp.h +header-y += if_bonding.h +header-y += if_cablemodem.h +header-y += if_fc.h +header-y += if_fddi.h +header-y += if.h +header-y += if_hippi.h +header-y += if_infiniband.h +header-y += if_packet.h +header-y += if_plip.h +header-y += if_ppp.h +header-y += if_slip.h +header-y += if_strip.h +header-y += if_tunnel.h +header-y += in6.h +header-y += in_route.h +header-y += ioctl.h +header-y += ip.h +header-y += ipmi_msgdefs.h +header-y += ip_mp_alg.h +header-y += ipsec.h +header-y += ipx.h +header-y += irda.h +header-y += isdn_divertif.h +header-y += iso_fs.h +header-y += ite_gpio.h +header-y += ixjuser.h +header-y += jffs2.h +header-y += keyctl.h +header-y += limits.h +header-y += major.h +header-y += matroxfb.h +header-y += meye.h +header-y += minix_fs.h +header-y += mmtimer.h +header-y += mqueue.h +header-y += mtio.h +header-y += ncp_no.h +header-y += netfilter_arp.h +header-y += netrom.h +header-y += nfs2.h +header-y += nfs4_mount.h +header-y += nfs_mount.h +header-y += openprom_fs.h +header-y += param.h +header-y += pci_ids.h +header-y += pci_regs.h +header-y += personality.h +header-y += pfkeyv2.h +header-y += pg.h +header-y += pkt_cls.h +header-y += pkt_sched.h +header-y += posix_types.h +header-y += ppdev.h +header-y += prctl.h +header-y += ps2esdi.h +header-y += qic117.h +header-y += qnxtypes.h +header-y += quotaio_v1.h +header-y += quotaio_v2.h +header-y += radeonfb.h +header-y += raw.h +header-y += resource.h +header-y += rose.h +header-y += sctp.h +header-y += smbno.h +header-y += snmp.h +header-y += sockios.h +header-y += som.h +header-y += sound.h +header-y += stddef.h +header-y += synclink.h +header-y += telephony.h +header-y += termios.h +header-y += ticable.h +header-y += times.h +header-y += tiocl.h +header-y += tipc.h +header-y += toshiba.h +header-y += ultrasound.h +header-y += un.h +header-y += utime.h +header-y += utsname.h +header-y += video_decoder.h +header-y += video_encoder.h +header-y += videotext.h +header-y += vt.h +header-y += wavefront.h +header-y += wireless.h +header-y += xattr.h +header-y += x25.h +header-y += zorro_ids.h -unifdef-y += acct.h adb.h adfs_fs.h agpgart.h apm_bios.h atalk.h \ - atmarp.h atmdev.h atm.h atm_tcp.h audit.h auto_fs.h binfmts.h \ - capability.h capi.h cciss_ioctl.h cdrom.h cm4000_cs.h \ - cn_proc.h coda.h connector.h cramfs_fs.h cuda.h cyclades.h \ - dccp.h dirent.h divert.h elfcore.h errno.h errqueue.h \ - ethtool.h eventpoll.h ext2_fs.h ext3_fs.h fb.h fcntl.h \ - filter.h flat.h fs.h ftape.h gameport.h generic_serial.h \ - genhd.h hayesesp.h hdlcdrv.h hdlc.h hdreg.h hiddev.h hpet.h \ - i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h \ - if_eql.h if_ether.h if_frad.h if_ltalk.h if_pppox.h \ - if_shaper.h if_tr.h if_tun.h if_vlan.h if_wanpipe.h igmp.h \ - inet_diag.h in.h inotify.h input.h ipc.h ipmi.h ipv6.h \ - ipv6_route.h isdn.h isdnif.h isdn_ppp.h isicom.h jbd.h \ - joystick.h kdev_t.h kd.h kernelcapi.h kernel.h keyboard.h \ - llc.h loop.h lp.h mempolicy.h mii.h mman.h mroute.h msdos_fs.h \ - msg.h nbd.h ncp_fs.h ncp.h ncp_mount.h netdevice.h \ - netfilter_bridge.h netfilter_decnet.h netfilter.h \ - netfilter_ipv4.h netfilter_ipv6.h netfilter_logging.h net.h \ - netlink.h nfs3.h nfs4.h nfsacl.h nfs_fs.h nfs.h nfs_idmap.h \ - n_r3964.h nubus.h nvram.h parport.h patchkey.h pci.h pktcdvd.h \ - pmu.h poll.h ppp_defs.h ppp-comp.h ptrace.h qnx4_fs.h quota.h \ - random.h reboot.h reiserfs_fs.h reiserfs_xattr.h romfs_fs.h \ - route.h rtc.h rtnetlink.h scc.h sched.h sdla.h \ - selinux_netlink.h sem.h serial_core.h serial.h serio.h shm.h \ - signal.h smb_fs.h smb.h smb_mount.h socket.h sonet.h sonypi.h \ - soundcard.h stat.h sysctl.h tcp.h time.h timex.h tty.h types.h \ - udf_fs_i.h udp.h uinput.h uio.h unistd.h usb_ch9.h \ - usbdevice_fs.h user.h videodev2.h videodev.h wait.h \ - wanrouter.h watchdog.h xfrm.h zftape.h +unifdef-y += acct.h +unifdef-y += adb.h +unifdef-y += adfs_fs.h +unifdef-y += agpgart.h +unifdef-y += apm_bios.h +unifdef-y += atalk.h +unifdef-y += atmarp.h +unifdef-y += atmdev.h +unifdef-y += atm.h +unifdef-y += atm_tcp.h +unifdef-y += audit.h +unifdef-y += auto_fs.h +unifdef-y += binfmts.h +unifdef-y += capability.h +unifdef-y += capi.h +unifdef-y += cciss_ioctl.h +unifdef-y += cdrom.h +unifdef-y += cm4000_cs.h +unifdef-y += cn_proc.h +unifdef-y += coda.h +unifdef-y += connector.h +unifdef-y += cramfs_fs.h +unifdef-y += cuda.h +unifdef-y += cyclades.h +unifdef-y += dccp.h +unifdef-y += dirent.h +unifdef-y += divert.h +unifdef-y += elfcore.h +unifdef-y += errno.h +unifdef-y += errqueue.h +unifdef-y += ethtool.h +unifdef-y += eventpoll.h +unifdef-y += ext2_fs.h +unifdef-y += ext3_fs.h +unifdef-y += fb.h +unifdef-y += fcntl.h +unifdef-y += filter.h +unifdef-y += flat.h +unifdef-y += fs.h +unifdef-y += ftape.h +unifdef-y += gameport.h +unifdef-y += generic_serial.h +unifdef-y += genhd.h +unifdef-y += hayesesp.h +unifdef-y += hdlcdrv.h +unifdef-y += hdlc.h +unifdef-y += hdreg.h +unifdef-y += hiddev.h +unifdef-y += hpet.h +unifdef-y += i2c.h +unifdef-y += i2o-dev.h +unifdef-y += icmpv6.h +unifdef-y += if_bridge.h +unifdef-y += if_ec.h +unifdef-y += if_eql.h +unifdef-y += if_ether.h +unifdef-y += if_frad.h +unifdef-y += if_ltalk.h +unifdef-y += if_pppox.h +unifdef-y += if_shaper.h +unifdef-y += if_tr.h +unifdef-y += if_tun.h +unifdef-y += if_vlan.h +unifdef-y += if_wanpipe.h +unifdef-y += igmp.h +unifdef-y += inet_diag.h +unifdef-y += in.h +unifdef-y += inotify.h +unifdef-y += input.h +unifdef-y += ipc.h +unifdef-y += ipmi.h +unifdef-y += ipv6.h +unifdef-y += ipv6_route.h +unifdef-y += isdn.h +unifdef-y += isdnif.h +unifdef-y += isdn_ppp.h +unifdef-y += isicom.h +unifdef-y += jbd.h +unifdef-y += joystick.h +unifdef-y += kdev_t.h +unifdef-y += kd.h +unifdef-y += kernelcapi.h +unifdef-y += kernel.h +unifdef-y += keyboard.h +unifdef-y += llc.h +unifdef-y += loop.h +unifdef-y += lp.h +unifdef-y += mempolicy.h +unifdef-y += mii.h +unifdef-y += mman.h +unifdef-y += mroute.h +unifdef-y += msdos_fs.h +unifdef-y += msg.h +unifdef-y += nbd.h +unifdef-y += ncp_fs.h +unifdef-y += ncp.h +unifdef-y += ncp_mount.h +unifdef-y += netdevice.h +unifdef-y += netfilter_bridge.h +unifdef-y += netfilter_decnet.h +unifdef-y += netfilter.h +unifdef-y += netfilter_ipv4.h +unifdef-y += netfilter_ipv6.h +unifdef-y += netfilter_logging.h +unifdef-y += net.h +unifdef-y += netlink.h +unifdef-y += nfs3.h +unifdef-y += nfs4.h +unifdef-y += nfsacl.h +unifdef-y += nfs_fs.h +unifdef-y += nfs.h +unifdef-y += nfs_idmap.h +unifdef-y += n_r3964.h +unifdef-y += nubus.h +unifdef-y += nvram.h +unifdef-y += parport.h +unifdef-y += patchkey.h +unifdef-y += pci.h +unifdef-y += pktcdvd.h +unifdef-y += pmu.h +unifdef-y += poll.h +unifdef-y += ppp_defs.h +unifdef-y += ppp-comp.h +unifdef-y += ptrace.h +unifdef-y += qnx4_fs.h +unifdef-y += quota.h +unifdef-y += random.h +unifdef-y += reboot.h +unifdef-y += reiserfs_fs.h +unifdef-y += reiserfs_xattr.h +unifdef-y += romfs_fs.h +unifdef-y += route.h +unifdef-y += rtc.h +unifdef-y += rtnetlink.h +unifdef-y += scc.h +unifdef-y += sched.h +unifdef-y += sdla.h +unifdef-y += selinux_netlink.h +unifdef-y += sem.h +unifdef-y += serial_core.h +unifdef-y += serial.h +unifdef-y += serio.h +unifdef-y += shm.h +unifdef-y += signal.h +unifdef-y += smb_fs.h +unifdef-y += smb.h +unifdef-y += smb_mount.h +unifdef-y += socket.h +unifdef-y += sonet.h +unifdef-y += sonypi.h +unifdef-y += soundcard.h +unifdef-y += stat.h +unifdef-y += sysctl.h +unifdef-y += tcp.h +unifdef-y += time.h +unifdef-y += timex.h +unifdef-y += tty.h +unifdef-y += types.h +unifdef-y += udf_fs_i.h +unifdef-y += udp.h +unifdef-y += uinput.h +unifdef-y += uio.h +unifdef-y += unistd.h +unifdef-y += usb_ch9.h +unifdef-y += usbdevice_fs.h +unifdef-y += user.h +unifdef-y += videodev2.h +unifdef-y += videodev.h +unifdef-y += wait.h +unifdef-y += wanrouter.h +unifdef-y += watchdog.h +unifdef-y += xfrm.h +unifdef-y += zftape.h -objhdr-y := version.h +objhdr-y += version.h diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild index 84a57d4fb212..56499ab9e32e 100644 --- a/include/linux/byteorder/Kbuild +++ b/include/linux/byteorder/Kbuild @@ -1,2 +1,7 @@ -unifdef-y += generic.h swabb.h swab.h -header-y += big_endian.h little_endian.h pdp_endian.h +header-y += big_endian.h +header-y += little_endian.h +header-y += pdp_endian.h + +unifdef-y += generic.h +unifdef-y += swabb.h +unifdef-y += swab.h diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild index 63973af72fd5..d97b3a51e227 100644 --- a/include/linux/dvb/Kbuild +++ b/include/linux/dvb/Kbuild @@ -1,2 +1,9 @@ -header-y += ca.h frontend.h net.h osd.h version.h -unifdef-y := audio.h dmx.h video.h +header-y += ca.h +header-y += frontend.h +header-y += net.h +header-y += osd.h +header-y += version.h + +unifdef-y += audio.h +unifdef-y += dmx.h +unifdef-y += video.h diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 1d3a14e2da6e..9a285cecf249 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -1,11 +1,38 @@ -header-y := nf_conntrack_sctp.h nf_conntrack_tuple_common.h \ - nfnetlink_conntrack.h nfnetlink_log.h nfnetlink_queue.h \ - xt_CLASSIFY.h xt_comment.h xt_connbytes.h xt_connmark.h \ - xt_CONNMARK.h xt_conntrack.h xt_dccp.h xt_esp.h \ - xt_helper.h xt_length.h xt_limit.h xt_mac.h xt_mark.h \ - xt_MARK.h xt_multiport.h xt_NFQUEUE.h xt_pkttype.h \ - xt_policy.h xt_realm.h xt_sctp.h xt_state.h xt_string.h \ - xt_tcpmss.h xt_tcpudp.h xt_SECMARK.h xt_CONNSECMARK.h +header-y += nf_conntrack_sctp.h +header-y += nf_conntrack_tuple_common.h +header-y += nfnetlink_conntrack.h +header-y += nfnetlink_log.h +header-y += nfnetlink_queue.h +header-y += xt_CLASSIFY.h +header-y += xt_comment.h +header-y += xt_connbytes.h +header-y += xt_connmark.h +header-y += xt_CONNMARK.h +header-y += xt_conntrack.h +header-y += xt_dccp.h +header-y += xt_esp.h +header-y += xt_helper.h +header-y += xt_length.h +header-y += xt_limit.h +header-y += xt_mac.h +header-y += xt_mark.h +header-y += xt_MARK.h +header-y += xt_multiport.h +header-y += xt_NFQUEUE.h +header-y += xt_pkttype.h +header-y += xt_policy.h +header-y += xt_realm.h +header-y += xt_sctp.h +header-y += xt_state.h +header-y += xt_string.h +header-y += xt_tcpmss.h +header-y += xt_tcpudp.h +header-y += xt_SECMARK.h +header-y += xt_CONNSECMARK.h -unifdef-y := nf_conntrack_common.h nf_conntrack_ftp.h \ - nf_conntrack_tcp.h nfnetlink.h x_tables.h xt_physdev.h +unifdef-y += nf_conntrack_common.h +unifdef-y += nf_conntrack_ftp.h +unifdef-y += nf_conntrack_tcp.h +unifdef-y += nfnetlink.h +unifdef-y += x_tables.h +unifdef-y += xt_physdev.h diff --git a/include/linux/netfilter_arp/Kbuild b/include/linux/netfilter_arp/Kbuild index 198ec5e7b17d..4f13dfcb92ea 100644 --- a/include/linux/netfilter_arp/Kbuild +++ b/include/linux/netfilter_arp/Kbuild @@ -1,2 +1,3 @@ -header-y := arpt_mangle.h -unifdef-y := arp_tables.h +header-y += arpt_mangle.h + +unifdef-y += arp_tables.h diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild index 5b1aba6abbad..76ff4c47d8c4 100644 --- a/include/linux/netfilter_bridge/Kbuild +++ b/include/linux/netfilter_bridge/Kbuild @@ -1,4 +1,17 @@ -header-y += ebt_among.h ebt_arp.h ebt_arpreply.h ebt_ip.h ebt_limit.h \ - ebt_log.h ebt_mark_m.h ebt_mark_t.h ebt_nat.h ebt_pkttype.h \ - ebt_redirect.h ebt_stp.h ebt_ulog.h ebt_vlan.h -unifdef-y := ebtables.h ebt_802_3.h +header-y += ebt_among.h +header-y += ebt_arp.h +header-y += ebt_arpreply.h +header-y += ebt_ip.h +header-y += ebt_limit.h +header-y += ebt_log.h +header-y += ebt_mark_m.h +header-y += ebt_mark_t.h +header-y += ebt_nat.h +header-y += ebt_pkttype.h +header-y += ebt_redirect.h +header-y += ebt_stp.h +header-y += ebt_ulog.h +header-y += ebt_vlan.h + +unifdef-y += ebtables.h +unifdef-y += ebt_802_3.h diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 04e4d2721689..591c1a809c00 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -1,21 +1,63 @@ +header-y += ip_conntrack_helper.h +header-y += ip_conntrack_helper_h323_asn1.h +header-y += ip_conntrack_helper_h323_types.h +header-y += ip_conntrack_protocol.h +header-y += ip_conntrack_sctp.h +header-y += ip_conntrack_tcp.h +header-y += ip_conntrack_tftp.h +header-y += ip_nat_pptp.h +header-y += ipt_addrtype.h +header-y += ipt_ah.h +header-y += ipt_CLASSIFY.h +header-y += ipt_CLUSTERIP.h +header-y += ipt_comment.h +header-y += ipt_connbytes.h +header-y += ipt_connmark.h +header-y += ipt_CONNMARK.h +header-y += ipt_conntrack.h +header-y += ipt_dccp.h +header-y += ipt_dscp.h +header-y += ipt_DSCP.h +header-y += ipt_ecn.h +header-y += ipt_ECN.h +header-y += ipt_esp.h +header-y += ipt_hashlimit.h +header-y += ipt_helper.h +header-y += ipt_iprange.h +header-y += ipt_length.h +header-y += ipt_limit.h +header-y += ipt_LOG.h +header-y += ipt_mac.h +header-y += ipt_mark.h +header-y += ipt_MARK.h +header-y += ipt_multiport.h +header-y += ipt_NFQUEUE.h +header-y += ipt_owner.h +header-y += ipt_physdev.h +header-y += ipt_pkttype.h +header-y += ipt_policy.h +header-y += ipt_realm.h +header-y += ipt_recent.h +header-y += ipt_REJECT.h +header-y += ipt_SAME.h +header-y += ipt_sctp.h +header-y += ipt_state.h +header-y += ipt_string.h +header-y += ipt_tcpmss.h +header-y += ipt_TCPMSS.h +header-y += ipt_tos.h +header-y += ipt_TOS.h +header-y += ipt_ttl.h +header-y += ipt_TTL.h +header-y += ipt_ULOG.h -header-y := ip_conntrack_helper.h ip_conntrack_helper_h323_asn1.h \ - ip_conntrack_helper_h323_types.h ip_conntrack_protocol.h \ - ip_conntrack_sctp.h ip_conntrack_tcp.h ip_conntrack_tftp.h \ - ip_nat_pptp.h ipt_addrtype.h ipt_ah.h \ - ipt_CLASSIFY.h ipt_CLUSTERIP.h ipt_comment.h \ - ipt_connbytes.h ipt_connmark.h ipt_CONNMARK.h \ - ipt_conntrack.h ipt_dccp.h ipt_dscp.h ipt_DSCP.h ipt_ecn.h \ - ipt_ECN.h ipt_esp.h ipt_hashlimit.h ipt_helper.h \ - ipt_iprange.h ipt_length.h ipt_limit.h ipt_LOG.h ipt_mac.h \ - ipt_mark.h ipt_MARK.h ipt_multiport.h ipt_NFQUEUE.h \ - ipt_owner.h ipt_physdev.h ipt_pkttype.h ipt_policy.h \ - ipt_realm.h ipt_recent.h ipt_REJECT.h ipt_SAME.h \ - ipt_sctp.h ipt_state.h ipt_string.h ipt_tcpmss.h \ - ipt_TCPMSS.h ipt_tos.h ipt_TOS.h ipt_ttl.h ipt_TTL.h \ - ipt_ULOG.h - -unifdef-y := ip_conntrack.h ip_conntrack_h323.h ip_conntrack_irc.h \ - ip_conntrack_pptp.h ip_conntrack_proto_gre.h \ - ip_conntrack_tuple.h ip_nat.h ip_nat_rule.h ip_queue.h \ - ip_tables.h +unifdef-y += ip_conntrack.h +unifdef-y += ip_conntrack_h323.h +unifdef-y += ip_conntrack_irc.h +unifdef-y += ip_conntrack_pptp.h +unifdef-y += ip_conntrack_proto_gre.h +unifdef-y += ip_conntrack_tuple.h +unifdef-y += ip_nat.h +unifdef-y += ip_nat_rule.h +unifdef-y += ip_queue.h +unifdef-y += ip_tables.h diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index 913ddbf55b4b..9dd978d149ff 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -1,6 +1,21 @@ -header-y += ip6t_HL.h ip6t_LOG.h ip6t_MARK.h ip6t_REJECT.h ip6t_ah.h \ - ip6t_esp.h ip6t_frag.h ip6t_hl.h ip6t_ipv6header.h \ - ip6t_length.h ip6t_limit.h ip6t_mac.h ip6t_mark.h \ - ip6t_multiport.h ip6t_opts.h ip6t_owner.h ip6t_policy.h \ - ip6t_physdev.h ip6t_rt.h -unifdef-y := ip6_tables.h +header-y += ip6t_HL.h +header-y += ip6t_LOG.h +header-y += ip6t_MARK.h +header-y += ip6t_REJECT.h +header-y += ip6t_ah.h +header-y += ip6t_esp.h +header-y += ip6t_frag.h +header-y += ip6t_hl.h +header-y += ip6t_ipv6header.h +header-y += ip6t_length.h +header-y += ip6t_limit.h +header-y += ip6t_mac.h +header-y += ip6t_mark.h +header-y += ip6t_multiport.h +header-y += ip6t_opts.h +header-y += ip6t_owner.h +header-y += ip6t_policy.h +header-y += ip6t_physdev.h +header-y += ip6t_rt.h + +unifdef-y += ip6_tables.h diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild index c8c545665885..d9c5455808e5 100644 --- a/include/linux/nfsd/Kbuild +++ b/include/linux/nfsd/Kbuild @@ -1,2 +1,7 @@ -unifdef-y := const.h export.h stats.h syscall.h nfsfh.h debug.h auth.h - +unifdef-y += const.h +unifdef-y += export.h +unifdef-y += stats.h +unifdef-y += syscall.h +unifdef-y += nfsfh.h +unifdef-y += debug.h +unifdef-y += auth.h diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild index 73fa27a8d552..2415a64c5e51 100644 --- a/include/linux/raid/Kbuild +++ b/include/linux/raid/Kbuild @@ -1 +1,2 @@ -header-y += md_p.h md_u.h +header-y += md_p.h +header-y += md_u.h diff --git a/include/linux/sunrpc/Kbuild b/include/linux/sunrpc/Kbuild index 0d1d768a27bf..fb438f158eee 100644 --- a/include/linux/sunrpc/Kbuild +++ b/include/linux/sunrpc/Kbuild @@ -1 +1 @@ -unifdef-y := debug.h +unifdef-y += debug.h diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 5251a505b2f1..78dfbac36375 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -1 +1,4 @@ -header-y += tc_gact.h tc_ipt.h tc_mirred.h tc_pedit.h +header-y += tc_gact.h +header-y += tc_ipt.h +header-y += tc_mirred.h +header-y += tc_pedit.h diff --git a/include/linux/tc_ematch/Kbuild b/include/linux/tc_ematch/Kbuild index 381e93018df6..4a58a1c32a00 100644 --- a/include/linux/tc_ematch/Kbuild +++ b/include/linux/tc_ematch/Kbuild @@ -1 +1,4 @@ -headers-y := tc_em_cmp.h tc_em_meta.h tc_em_nbyte.h tc_em_text.h +header-y += tc_em_cmp.h +header-y += tc_em_meta.h +header-y += tc_em_nbyte.h +header-y += tc_em_text.h diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild index e1da2a5b2a57..13e7a3c6d794 100644 --- a/include/mtd/Kbuild +++ b/include/mtd/Kbuild @@ -1,2 +1,6 @@ -unifdef-y := mtd-abi.h -header-y := inftl-user.h jffs2-user.h mtd-user.h nftl-user.h +header-y += inftl-user.h +header-y += jffs2-user.h +header-y += mtd-user.h +header-y += nftl-user.h + +unifdef-y += mtd-abi.h diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild index eb710ba9b1a0..e7c043216558 100644 --- a/include/rdma/Kbuild +++ b/include/rdma/Kbuild @@ -1 +1 @@ -header-y := ib_user_mad.h +header-y += ib_user_mad.h diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild index 14a033d73314..744f85011f1e 100644 --- a/include/scsi/Kbuild +++ b/include/scsi/Kbuild @@ -1,2 +1,4 @@ header-y += scsi.h -unifdef-y := scsi_ioctl.h sg.h + +unifdef-y += scsi_ioctl.h +unifdef-y += sg.h diff --git a/include/sound/Kbuild b/include/sound/Kbuild index 3a5a3df61496..fd054a344324 100644 --- a/include/sound/Kbuild +++ b/include/sound/Kbuild @@ -1,2 +1,10 @@ -header-y := asound_fm.h hdsp.h hdspm.h sfnt_info.h sscape_ioctl.h -unifdef-y := asequencer.h asound.h emu10k1.h sb16_csp.h +header-y += asound_fm.h +header-y += hdsp.h +header-y += hdspm.h +header-y += sfnt_info.h +header-y += sscape_ioctl.h + +unifdef-y += asequencer.h +unifdef-y += asound.h +unifdef-y += emu10k1.h +unifdef-y += sb16_csp.h diff --git a/include/video/Kbuild b/include/video/Kbuild index 76a60737cc15..a14f9c045b8c 100644 --- a/include/video/Kbuild +++ b/include/video/Kbuild @@ -1 +1 @@ -unifdef-y := sisfb.h +unifdef-y += sisfb.h -- cgit v1.2.3-71-gd317 From 1534c3820c26aca4e2567f97b8add8bea40e7e2b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:25 +0200 Subject: [S390] zcrypt adjunct processor bus. Add a bus for the adjunct processor interface. Up to 64 devices can be connect to the ap bus interface, each device with 16 domains. That makes 1024 message queues. The interface is asynchronous, the answer to a message sent to a queue needs to be received at some later point in time. Unfortunately the interface does not provide interrupts when a message reply is pending. So the ap bus needs to implement some fancy polling, each active queue is polled once per 1/HZ second or continuously if an idle cpus exsists and the poll thread is activ (see poll_thread parameter). The ap bus uses the sysfs path /sys/bus/ap and has two bus attributes, ap_domain and config_time. The ap_domain selects one of the 16 domains to be used for this system. This limits the maximum number of ap devices to 64. The config_time attribute contains the number of seconds between two ap bus scans to find new devices. The ap bus uses the modalias entries of the form "ap:tN" to autoload the ap driver for hardware type N. Currently known types are: 3 - PCICC, 4 - PCICA, 5 - PCIXCC, 6 - CEX2A and 7 - CEX2C. Signed-off-by: Cornelia Huck Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 1221 +++++++++++++++++++++++++++++++++++++++ drivers/s390/crypto/ap_bus.h | 158 +++++ include/linux/mod_devicetable.h | 11 + scripts/mod/file2alias.c | 12 + 4 files changed, 1402 insertions(+) create mode 100644 drivers/s390/crypto/ap_bus.c create mode 100644 drivers/s390/crypto/ap_bus.h (limited to 'include/linux') diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c new file mode 100644 index 000000000000..6ed0985c0c91 --- /dev/null +++ b/drivers/s390/crypto/ap_bus.c @@ -0,0 +1,1221 @@ +/* + * linux/drivers/s390/crypto/ap_bus.c + * + * Copyright (C) 2006 IBM Corporation + * Author(s): Cornelia Huck + * Martin Schwidefsky + * Ralph Wuerthner + * + * Adjunct processor bus. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ap_bus.h" + +/* Some prototypes. */ +static void ap_scan_bus(void *); +static void ap_poll_all(unsigned long); +static void ap_poll_timeout(unsigned long); +static int ap_poll_thread_start(void); +static void ap_poll_thread_stop(void); + +/** + * Module description. + */ +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("Adjunct Processor Bus driver, " + "Copyright 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); + +/** + * Module parameter + */ +int ap_domain_index = -1; /* Adjunct Processor Domain Index */ +module_param_named(domain, ap_domain_index, int, 0000); +MODULE_PARM_DESC(domain, "domain index for ap devices"); +EXPORT_SYMBOL(ap_domain_index); + +static int ap_thread_flag = 1; +module_param_named(poll_thread, ap_thread_flag, int, 0000); +MODULE_PARM_DESC(poll_thread, "Turn on/off poll thread, default is 1 (on)."); + +static struct device *ap_root_device = NULL; + +/** + * Workqueue & timer for bus rescan. + */ +static struct workqueue_struct *ap_work_queue; +static struct timer_list ap_config_timer; +static int ap_config_time = AP_CONFIG_TIME; +static DECLARE_WORK(ap_config_work, ap_scan_bus, NULL); + +/** + * Tasklet & timer for AP request polling. + */ +static struct timer_list ap_poll_timer = TIMER_INITIALIZER(ap_poll_timeout,0,0); +static DECLARE_TASKLET(ap_tasklet, ap_poll_all, 0); +static atomic_t ap_poll_requests = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(ap_poll_wait); +static struct task_struct *ap_poll_kthread = NULL; +static DEFINE_MUTEX(ap_poll_thread_mutex); + +/** + * Test if ap instructions are available. + * + * Returns 0 if the ap instructions are installed. + */ +static inline int ap_instructions_available(void) +{ + register unsigned long reg0 asm ("0") = AP_MKQID(0,0); + register unsigned long reg1 asm ("1") = -ENODEV; + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile( + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %1,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg0), "+d" (reg1), "+d" (reg2) : : "cc" ); + return reg1; +} + +/** + * Test adjunct processor queue. + * @qid: the ap queue number + * @queue_depth: pointer to queue depth value + * @device_type: pointer to device type value + * + * Returns ap queue status structure. + */ +static inline struct ap_queue_status +ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + *device_type = (int) (reg2 >> 24); + *queue_depth = (int) (reg2 & 0xff); + return reg1; +} + +/** + * Reset adjunct processor queue. + * @qid: the ap queue number + * + * Returns ap queue status structure. + */ +static inline struct ap_queue_status ap_reset_queue(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | 0x01000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile( + ".long 0xb2af0000" /* PQAP(RAPQ) */ + : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + return reg1; +} + +/** + * Send message to adjunct processor queue. + * @qid: the ap queue number + * @psmid: the program supplied message identifier + * @msg: the message text + * @length: the message length + * + * Returns ap queue status structure. + * + * Condition code 1 on NQAP can't happen because the L bit is 1. + * + * Condition code 2 on NQAP also means the send is incomplete, + * because a segment boundary was reached. The NQAP is repeated. + */ +static inline struct ap_queue_status +__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) +{ + typedef struct { char _[length]; } msgblock; + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = (unsigned int) psmid; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* DQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg) + : "cc" ); + return reg1; +} + +int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) +{ + struct ap_queue_status status; + + status = __ap_send(qid, psmid, msg, length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + return 0; + case AP_RESPONSE_Q_FULL: + return -EBUSY; + default: /* Device is gone. */ + return -ENODEV; + } +} +EXPORT_SYMBOL(ap_send); + +/* + * Receive message from adjunct processor queue. + * @qid: the ap queue number + * @psmid: pointer to program supplied message identifier + * @msg: the message text + * @length: the message length + * + * Returns ap queue status structure. + * + * Condition code 1 on DQAP means the receive has taken place + * but only partially. The response is incomplete, hence the + * DQAP is repeated. + * + * Condition code 2 on DQAP also means the receive is incomplete, + * this time because a segment boundary was reached. Again, the + * DQAP is repeated. + * + * Note that gpr2 is used by the DQAP instruction to keep track of + * any 'residual' length, in case the instruction gets interrupted. + * Hence it gets zeroed before the instruction. + */ +static inline struct ap_queue_status +__ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) +{ + typedef struct { char _[length]; } msgblock; + register unsigned long reg0 asm("0") = qid | 0x80000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm("2") = 0UL; + register unsigned long reg4 asm("4") = (unsigned long) msg; + register unsigned long reg5 asm("5") = (unsigned long) length; + register unsigned long reg6 asm("6") = 0UL; + register unsigned long reg7 asm("7") = 0UL; + + + asm volatile( + "0: .long 0xb2ae0064\n" + " brc 6,0b\n" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), + "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7), + "=m" (*(msgblock *) msg) : : "cc" ); + *psmid = (((unsigned long long) reg6) << 32) + reg7; + return reg1; +} + +int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) +{ + struct ap_queue_status status; + + status = __ap_recv(qid, psmid, msg, length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + return 0; + case AP_RESPONSE_NO_PENDING_REPLY: + if (status.queue_empty) + return -ENOENT; + return -EBUSY; + default: + return -ENODEV; + } +} +EXPORT_SYMBOL(ap_recv); + +/** + * Check if an AP queue is available. The test is repeated for + * AP_MAX_RESET times. + * @qid: the ap queue number + * @queue_depth: pointer to queue depth value + * @device_type: pointer to device type value + */ +static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type) +{ + struct ap_queue_status status; + int t_depth, t_device_type, rc, i; + + rc = -EBUSY; + for (i = 0; i < AP_MAX_RESET; i++) { + status = ap_test_queue(qid, &t_depth, &t_device_type); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + *queue_depth = t_depth + 1; + *device_type = t_device_type; + rc = 0; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + rc = -ENODEV; + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + break; + case AP_RESPONSE_DECONFIGURED: + rc = -ENODEV; + break; + case AP_RESPONSE_CHECKSTOPPED: + rc = -ENODEV; + break; + case AP_RESPONSE_BUSY: + break; + default: + BUG(); + } + if (rc != -EBUSY) + break; + if (i < AP_MAX_RESET - 1) + udelay(5); + } + return rc; +} + +/** + * Reset an AP queue and wait for it to become available again. + * @qid: the ap queue number + */ +static int ap_init_queue(ap_qid_t qid) +{ + struct ap_queue_status status; + int rc, dummy, i; + + rc = -ENODEV; + status = ap_reset_queue(qid); + for (i = 0; i < AP_MAX_RESET; i++) { + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + if (status.queue_empty) + rc = 0; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + i = AP_MAX_RESET; /* return with -ENODEV */ + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + default: + break; + } + if (rc != -ENODEV) + break; + if (i < AP_MAX_RESET - 1) { + udelay(5); + status = ap_test_queue(qid, &dummy, &dummy); + } + } + return rc; +} + +/** + * AP device related attributes. + */ +static ssize_t ap_hwtype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->device_type); +} +static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL); + +static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->queue_depth); +} +static DEVICE_ATTR(depth, 0444, ap_depth_show, NULL); + +static ssize_t ap_request_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int rc; + + spin_lock_bh(&ap_dev->lock); + rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->total_request_count); + spin_unlock_bh(&ap_dev->lock); + return rc; +} + +static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL); + +static ssize_t ap_modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type); +} + +static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL); + +static struct attribute *ap_dev_attrs[] = { + &dev_attr_hwtype.attr, + &dev_attr_depth.attr, + &dev_attr_request_count.attr, + &dev_attr_modalias.attr, + NULL +}; +static struct attribute_group ap_dev_attr_group = { + .attrs = ap_dev_attrs +}; + +/** + * AP bus driver registration/unregistration. + */ +static int ap_bus_match(struct device *dev, struct device_driver *drv) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + struct ap_driver *ap_drv = to_ap_drv(drv); + struct ap_device_id *id; + + /** + * Compare device type of the device with the list of + * supported types of the device_driver. + */ + for (id = ap_drv->ids; id->match_flags; id++) { + if ((id->match_flags & AP_DEVICE_ID_MATCH_DEVICE_TYPE) && + (id->dev_type != ap_dev->device_type)) + continue; + return 1; + } + return 0; +} + +/** + * uevent function for AP devices. It sets up a single environment + * variable DEV_TYPE which contains the hardware device type. + */ +static int ap_uevent (struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int length; + + if (!ap_dev) + return -ENODEV; + + /* Set up DEV_TYPE environment variable. */ + envp[0] = buffer; + length = scnprintf(buffer, buffer_size, "DEV_TYPE=%04X", + ap_dev->device_type); + if (buffer_size - length <= 0) + return -ENOMEM; + envp[1] = 0; + return 0; +} + +static struct bus_type ap_bus_type = { + .name = "ap", + .match = &ap_bus_match, + .uevent = &ap_uevent, +}; + +static int ap_device_probe(struct device *dev) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + struct ap_driver *ap_drv = to_ap_drv(dev->driver); + int rc; + + ap_dev->drv = ap_drv; + rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV; + if (rc) + ap_dev->unregistered = 1; + return rc; +} + +/** + * Flush all requests from the request/pending queue of an AP device. + * @ap_dev: pointer to the AP device. + */ +static inline void __ap_flush_queue(struct ap_device *ap_dev) +{ + struct ap_message *ap_msg, *next; + + list_for_each_entry_safe(ap_msg, next, &ap_dev->pendingq, list) { + list_del_init(&ap_msg->list); + ap_dev->pendingq_count--; + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + } + list_for_each_entry_safe(ap_msg, next, &ap_dev->requestq, list) { + list_del_init(&ap_msg->list); + ap_dev->requestq_count--; + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + } +} + +void ap_flush_queue(struct ap_device *ap_dev) +{ + spin_lock_bh(&ap_dev->lock); + __ap_flush_queue(ap_dev); + spin_unlock_bh(&ap_dev->lock); +} +EXPORT_SYMBOL(ap_flush_queue); + +static int ap_device_remove(struct device *dev) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + struct ap_driver *ap_drv = ap_dev->drv; + + spin_lock_bh(&ap_dev->lock); + __ap_flush_queue(ap_dev); + /** + * set ->unregistered to 1 while holding the lock. This prevents + * new messages to be put on the queue from now on. + */ + ap_dev->unregistered = 1; + spin_unlock_bh(&ap_dev->lock); + if (ap_drv->remove) + ap_drv->remove(ap_dev); + return 0; +} + +int ap_driver_register(struct ap_driver *ap_drv, struct module *owner, + char *name) +{ + struct device_driver *drv = &ap_drv->driver; + + drv->bus = &ap_bus_type; + drv->probe = ap_device_probe; + drv->remove = ap_device_remove; + drv->owner = owner; + drv->name = name; + return driver_register(drv); +} +EXPORT_SYMBOL(ap_driver_register); + +void ap_driver_unregister(struct ap_driver *ap_drv) +{ + driver_unregister(&ap_drv->driver); +} +EXPORT_SYMBOL(ap_driver_unregister); + +/** + * AP bus attributes. + */ +static ssize_t ap_domain_show(struct bus_type *bus, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ap_domain_index); +} + +static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL); + +static ssize_t ap_config_time_show(struct bus_type *bus, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time); +} + +static ssize_t ap_config_time_store(struct bus_type *bus, + const char *buf, size_t count) +{ + int time; + + if (sscanf(buf, "%d\n", &time) != 1 || time < 5 || time > 120) + return -EINVAL; + ap_config_time = time; + if (!timer_pending(&ap_config_timer) || + !mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ)) { + ap_config_timer.expires = jiffies + ap_config_time * HZ; + add_timer(&ap_config_timer); + } + return count; +} + +static BUS_ATTR(config_time, 0644, ap_config_time_show, ap_config_time_store); + +static ssize_t ap_poll_thread_show(struct bus_type *bus, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ap_poll_kthread ? 1 : 0); +} + +static ssize_t ap_poll_thread_store(struct bus_type *bus, + const char *buf, size_t count) +{ + int flag, rc; + + if (sscanf(buf, "%d\n", &flag) != 1) + return -EINVAL; + if (flag) { + rc = ap_poll_thread_start(); + if (rc) + return rc; + } + else + ap_poll_thread_stop(); + return count; +} + +static BUS_ATTR(poll_thread, 0644, ap_poll_thread_show, ap_poll_thread_store); + +static struct bus_attribute *const ap_bus_attrs[] = { + &bus_attr_ap_domain, + &bus_attr_config_time, + &bus_attr_poll_thread, + NULL +}; + +/** + * Pick one of the 16 ap domains. + */ +static inline int ap_select_domain(void) +{ + int queue_depth, device_type, count, max_count, best_domain; + int rc, i, j; + + /** + * We want to use a single domain. Either the one specified with + * the "domain=" parameter or the domain with the maximum number + * of devices. + */ + if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS) + /* Domain has already been selected. */ + return 0; + best_domain = -1; + max_count = 0; + for (i = 0; i < AP_DOMAINS; i++) { + count = 0; + for (j = 0; j < AP_DEVICES; j++) { + ap_qid_t qid = AP_MKQID(j, i); + rc = ap_query_queue(qid, &queue_depth, &device_type); + if (rc) + continue; + count++; + } + if (count > max_count) { + max_count = count; + best_domain = i; + } + } + if (best_domain >= 0){ + ap_domain_index = best_domain; + return 0; + } + return -ENODEV; +} + +/** + * Find the device type if query queue returned a device type of 0. + * @ap_dev: pointer to the AP device. + */ +static int ap_probe_device_type(struct ap_device *ap_dev) +{ + static unsigned char msg[] = { + 0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x01,0x00,0x43,0x43,0x41,0x2d,0x41,0x50, + 0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01, + 0x00,0x00,0x00,0x00,0x50,0x4b,0x00,0x00, + 0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x54,0x32,0x01,0x00,0xa0,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xb8,0x05,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00, + 0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20, + 0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53, + 0x2d,0x31,0x2e,0x32,0x37,0x00,0x11,0x22, + 0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00, + 0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88, + 0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66, + 0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44, + 0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22, + 0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00, + 0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77, + 0x88,0x1e,0x00,0x00,0x57,0x00,0x00,0x00, + 0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00, + 0x03,0x02,0x00,0x00,0x40,0x01,0x00,0x01, + 0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c, + 0xf6,0xd2,0x7b,0x58,0x4b,0xf9,0x28,0x68, + 0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66, + 0x63,0x42,0xef,0xf8,0xfd,0xa4,0xf8,0xb0, + 0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8, + 0x53,0x8c,0x6f,0x4e,0x72,0x8f,0x6c,0x04, + 0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57, + 0xf7,0xdd,0xfd,0x4f,0x11,0x36,0x95,0x5d, + }; + struct ap_queue_status status; + unsigned long long psmid; + char *reply; + int rc, i; + + reply = (void *) get_zeroed_page(GFP_KERNEL); + if (!reply) { + rc = -ENOMEM; + goto out; + } + + status = __ap_send(ap_dev->qid, 0x0102030405060708ULL, + msg, sizeof(msg)); + if (status.response_code != AP_RESPONSE_NORMAL) { + rc = -ENODEV; + goto out_free; + } + + /* Wait for the test message to complete. */ + for (i = 0; i < 6; i++) { + mdelay(300); + status = __ap_recv(ap_dev->qid, &psmid, reply, 4096); + if (status.response_code == AP_RESPONSE_NORMAL && + psmid == 0x0102030405060708ULL) + break; + } + if (i < 6) { + /* Got an answer. */ + if (reply[0] == 0x00 && reply[1] == 0x86) + ap_dev->device_type = AP_DEVICE_TYPE_PCICC; + else + ap_dev->device_type = AP_DEVICE_TYPE_PCICA; + rc = 0; + } else + rc = -ENODEV; + +out_free: + free_page((unsigned long) reply); +out: + return rc; +} + +/** + * Scan the ap bus for new devices. + */ +static int __ap_scan_bus(struct device *dev, void *data) +{ + return to_ap_dev(dev)->qid == (ap_qid_t)(unsigned long) data; +} + +static void ap_device_release(struct device *dev) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + + kfree(ap_dev); +} + +static void ap_scan_bus(void *data) +{ + struct ap_device *ap_dev; + struct device *dev; + ap_qid_t qid; + int queue_depth, device_type; + int rc, i; + + if (ap_select_domain() != 0) + return; + for (i = 0; i < AP_DEVICES; i++) { + qid = AP_MKQID(i, ap_domain_index); + dev = bus_find_device(&ap_bus_type, NULL, + (void *)(unsigned long)qid, + __ap_scan_bus); + if (dev) { + put_device(dev); + continue; + } + rc = ap_query_queue(qid, &queue_depth, &device_type); + if (rc) + continue; + rc = ap_init_queue(qid); + if (rc) + continue; + ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL); + if (!ap_dev) + break; + ap_dev->qid = qid; + ap_dev->queue_depth = queue_depth; + spin_lock_init(&ap_dev->lock); + INIT_LIST_HEAD(&ap_dev->pendingq); + INIT_LIST_HEAD(&ap_dev->requestq); + if (device_type == 0) + ap_probe_device_type(ap_dev); + else + ap_dev->device_type = device_type; + + ap_dev->device.bus = &ap_bus_type; + ap_dev->device.parent = ap_root_device; + snprintf(ap_dev->device.bus_id, BUS_ID_SIZE, "card%02x", + AP_QID_DEVICE(ap_dev->qid)); + ap_dev->device.release = ap_device_release; + rc = device_register(&ap_dev->device); + if (rc) { + kfree(ap_dev); + continue; + } + /* Add device attributes. */ + rc = sysfs_create_group(&ap_dev->device.kobj, + &ap_dev_attr_group); + if (rc) + device_unregister(&ap_dev->device); + } +} + +static void +ap_config_timeout(unsigned long ptr) +{ + queue_work(ap_work_queue, &ap_config_work); + ap_config_timer.expires = jiffies + ap_config_time * HZ; + add_timer(&ap_config_timer); +} + +/** + * Set up the timer to run the poll tasklet + */ +static inline void ap_schedule_poll_timer(void) +{ + if (timer_pending(&ap_poll_timer)) + return; + mod_timer(&ap_poll_timer, jiffies + AP_POLL_TIME); +} + +/** + * Receive pending reply messages from an AP device. + * @ap_dev: pointer to the AP device + * @flags: pointer to control flags, bit 2^0 is set if another poll is + * required, bit 2^1 is set if the poll timer needs to get armed + * Returns 0 if the device is still present, -ENODEV if not. + */ +static inline int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags) +{ + struct ap_queue_status status; + struct ap_message *ap_msg; + + if (ap_dev->queue_count <= 0) + return 0; + status = __ap_recv(ap_dev->qid, &ap_dev->reply->psmid, + ap_dev->reply->message, ap_dev->reply->length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + atomic_dec(&ap_poll_requests); + ap_dev->queue_count--; + list_for_each_entry(ap_msg, &ap_dev->pendingq, list) { + if (ap_msg->psmid != ap_dev->reply->psmid) + continue; + list_del_init(&ap_msg->list); + ap_dev->pendingq_count--; + ap_dev->drv->receive(ap_dev, ap_msg, ap_dev->reply); + break; + } + if (ap_dev->queue_count > 0) + *flags |= 1; + break; + case AP_RESPONSE_NO_PENDING_REPLY: + if (status.queue_empty) { + /* The card shouldn't forget requests but who knows. */ + ap_dev->queue_count = 0; + list_splice_init(&ap_dev->pendingq, &ap_dev->requestq); + ap_dev->requestq_count += ap_dev->pendingq_count; + ap_dev->pendingq_count = 0; + } else + *flags |= 2; + break; + default: + return -ENODEV; + } + return 0; +} + +/** + * Send messages from the request queue to an AP device. + * @ap_dev: pointer to the AP device + * @flags: pointer to control flags, bit 2^0 is set if another poll is + * required, bit 2^1 is set if the poll timer needs to get armed + * Returns 0 if the device is still present, -ENODEV if not. + */ +static inline int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags) +{ + struct ap_queue_status status; + struct ap_message *ap_msg; + + if (ap_dev->requestq_count <= 0 || + ap_dev->queue_count >= ap_dev->queue_depth) + return 0; + /* Start the next request on the queue. */ + ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list); + status = __ap_send(ap_dev->qid, ap_msg->psmid, + ap_msg->message, ap_msg->length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + atomic_inc(&ap_poll_requests); + ap_dev->queue_count++; + list_move_tail(&ap_msg->list, &ap_dev->pendingq); + ap_dev->requestq_count--; + ap_dev->pendingq_count++; + if (ap_dev->queue_count < ap_dev->queue_depth && + ap_dev->requestq_count > 0) + *flags |= 1; + *flags |= 2; + break; + case AP_RESPONSE_Q_FULL: + *flags |= 2; + break; + case AP_RESPONSE_MESSAGE_TOO_BIG: + return -EINVAL; + default: + return -ENODEV; + } + return 0; +} + +/** + * Poll AP device for pending replies and send new messages. If either + * ap_poll_read or ap_poll_write returns -ENODEV unregister the device. + * @ap_dev: pointer to the bus device + * @flags: pointer to control flags, bit 2^0 is set if another poll is + * required, bit 2^1 is set if the poll timer needs to get armed + * Returns 0. + */ +static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags) +{ + int rc; + + rc = ap_poll_read(ap_dev, flags); + if (rc) + return rc; + return ap_poll_write(ap_dev, flags); +} + +/** + * Queue a message to a device. + * @ap_dev: pointer to the AP device + * @ap_msg: the message to be queued + */ +static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg) +{ + struct ap_queue_status status; + + if (list_empty(&ap_dev->requestq) && + ap_dev->queue_count < ap_dev->queue_depth) { + status = __ap_send(ap_dev->qid, ap_msg->psmid, + ap_msg->message, ap_msg->length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + list_add_tail(&ap_msg->list, &ap_dev->pendingq); + atomic_inc(&ap_poll_requests); + ap_dev->pendingq_count++; + ap_dev->queue_count++; + ap_dev->total_request_count++; + break; + case AP_RESPONSE_Q_FULL: + list_add_tail(&ap_msg->list, &ap_dev->requestq); + ap_dev->requestq_count++; + ap_dev->total_request_count++; + return -EBUSY; + case AP_RESPONSE_MESSAGE_TOO_BIG: + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-EINVAL)); + return -EINVAL; + default: /* Device is gone. */ + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + return -ENODEV; + } + } else { + list_add_tail(&ap_msg->list, &ap_dev->requestq); + ap_dev->requestq_count++; + ap_dev->total_request_count++; + return -EBUSY; + } + ap_schedule_poll_timer(); + return 0; +} + +void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg) +{ + unsigned long flags; + int rc; + + spin_lock_bh(&ap_dev->lock); + if (!ap_dev->unregistered) { + /* Make room on the queue by polling for finished requests. */ + rc = ap_poll_queue(ap_dev, &flags); + if (!rc) + rc = __ap_queue_message(ap_dev, ap_msg); + if (!rc) + wake_up(&ap_poll_wait); + } else { + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + rc = 0; + } + spin_unlock_bh(&ap_dev->lock); + if (rc == -ENODEV) + device_unregister(&ap_dev->device); +} +EXPORT_SYMBOL(ap_queue_message); + +/** + * Cancel a crypto request. This is done by removing the request + * from the devive pendingq or requestq queue. Note that the + * request stays on the AP queue. When it finishes the message + * reply will be discarded because the psmid can't be found. + * @ap_dev: AP device that has the message queued + * @ap_msg: the message that is to be removed + */ +void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg) +{ + struct ap_message *tmp; + + spin_lock_bh(&ap_dev->lock); + if (!list_empty(&ap_msg->list)) { + list_for_each_entry(tmp, &ap_dev->pendingq, list) + if (tmp->psmid == ap_msg->psmid) { + ap_dev->pendingq_count--; + goto found; + } + ap_dev->requestq_count--; + found: + list_del_init(&ap_msg->list); + } + spin_unlock_bh(&ap_dev->lock); +} +EXPORT_SYMBOL(ap_cancel_message); + +/** + * AP receive polling for finished AP requests + */ +static void ap_poll_timeout(unsigned long unused) +{ + tasklet_schedule(&ap_tasklet); +} + +/** + * Poll all AP devices on the bus in a round robin fashion. Continue + * polling until bit 2^0 of the control flags is not set. If bit 2^1 + * of the control flags has been set arm the poll timer. + */ +static int __ap_poll_all(struct device *dev, void *data) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int rc; + + spin_lock(&ap_dev->lock); + if (!ap_dev->unregistered) { + rc = ap_poll_queue(to_ap_dev(dev), (unsigned long *) data); + } else + rc = 0; + spin_unlock(&ap_dev->lock); + if (rc) + device_unregister(&ap_dev->device); + return 0; +} + +static void ap_poll_all(unsigned long dummy) +{ + unsigned long flags; + + do { + flags = 0; + bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all); + } while (flags & 1); + if (flags & 2) + ap_schedule_poll_timer(); +} + +/** + * AP bus poll thread. The purpose of this thread is to poll for + * finished requests in a loop if there is a "free" cpu - that is + * a cpu that doesn't have anything better to do. The polling stops + * as soon as there is another task or if all messages have been + * delivered. + */ +static int ap_poll_thread(void *data) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long flags; + int requests; + + set_user_nice(current, -20); + while (1) { + if (need_resched()) { + schedule(); + continue; + } + add_wait_queue(&ap_poll_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + requests = atomic_read(&ap_poll_requests); + if (requests <= 0) + schedule(); + set_current_state(TASK_RUNNING); + remove_wait_queue(&ap_poll_wait, &wait); + + local_bh_disable(); + flags = 0; + bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all); + local_bh_enable(); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&ap_poll_wait, &wait); + return 0; +} + +static int ap_poll_thread_start(void) +{ + int rc; + + mutex_lock(&ap_poll_thread_mutex); + if (!ap_poll_kthread) { + ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll"); + rc = IS_ERR(ap_poll_kthread) ? PTR_ERR(ap_poll_kthread) : 0; + if (rc) + ap_poll_kthread = NULL; + } + else + rc = 0; + mutex_unlock(&ap_poll_thread_mutex); + return rc; +} + +static void ap_poll_thread_stop(void) +{ + mutex_lock(&ap_poll_thread_mutex); + if (ap_poll_kthread) { + kthread_stop(ap_poll_kthread); + ap_poll_kthread = NULL; + } + mutex_unlock(&ap_poll_thread_mutex); +} + +/** + * The module initialization code. + */ +int __init ap_module_init(void) +{ + int rc, i; + + if (ap_domain_index < -1 || ap_domain_index >= AP_DOMAINS) { + printk(KERN_WARNING "Invalid param: domain = %d. " + " Not loading.\n", ap_domain_index); + return -EINVAL; + } + if (ap_instructions_available() != 0) { + printk(KERN_WARNING "AP instructions not installed.\n"); + return -ENODEV; + } + + /* Create /sys/bus/ap. */ + rc = bus_register(&ap_bus_type); + if (rc) + goto out; + for (i = 0; ap_bus_attrs[i]; i++) { + rc = bus_create_file(&ap_bus_type, ap_bus_attrs[i]); + if (rc) + goto out_bus; + } + + /* Create /sys/devices/ap. */ + ap_root_device = s390_root_dev_register("ap"); + rc = IS_ERR(ap_root_device) ? PTR_ERR(ap_root_device) : 0; + if (rc) + goto out_bus; + + ap_work_queue = create_singlethread_workqueue("kapwork"); + if (!ap_work_queue) { + rc = -ENOMEM; + goto out_root; + } + + if (ap_select_domain() == 0) + ap_scan_bus(NULL); + + /* Setup the ap bus rescan timer. */ + init_timer(&ap_config_timer); + ap_config_timer.function = ap_config_timeout; + ap_config_timer.data = 0; + ap_config_timer.expires = jiffies + ap_config_time * HZ; + add_timer(&ap_config_timer); + + /* Start the low priority AP bus poll thread. */ + if (ap_thread_flag) { + rc = ap_poll_thread_start(); + if (rc) + goto out_work; + } + + return 0; + +out_work: + del_timer_sync(&ap_config_timer); + del_timer_sync(&ap_poll_timer); + destroy_workqueue(ap_work_queue); +out_root: + s390_root_dev_unregister(ap_root_device); +out_bus: + while (i--) + bus_remove_file(&ap_bus_type, ap_bus_attrs[i]); + bus_unregister(&ap_bus_type); +out: + return rc; +} + +static int __ap_match_all(struct device *dev, void *data) +{ + return 1; +} + +/** + * The module termination code + */ +void ap_module_exit(void) +{ + int i; + struct device *dev; + + ap_poll_thread_stop(); + del_timer_sync(&ap_config_timer); + del_timer_sync(&ap_poll_timer); + destroy_workqueue(ap_work_queue); + s390_root_dev_unregister(ap_root_device); + while ((dev = bus_find_device(&ap_bus_type, NULL, NULL, + __ap_match_all))) + { + device_unregister(dev); + put_device(dev); + } + for (i = 0; ap_bus_attrs[i]; i++) + bus_remove_file(&ap_bus_type, ap_bus_attrs[i]); + bus_unregister(&ap_bus_type); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(ap_module_init); +module_exit(ap_module_exit); +#endif diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h new file mode 100644 index 000000000000..83b69c01cd6e --- /dev/null +++ b/drivers/s390/crypto/ap_bus.h @@ -0,0 +1,158 @@ +/* + * linux/drivers/s390/crypto/ap_bus.h + * + * Copyright (C) 2006 IBM Corporation + * Author(s): Cornelia Huck + * Martin Schwidefsky + * Ralph Wuerthner + * + * Adjunct processor bus header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _AP_BUS_H_ +#define _AP_BUS_H_ + +#include +#include +#include + +#define AP_DEVICES 64 /* Number of AP devices. */ +#define AP_DOMAINS 16 /* Number of AP domains. */ +#define AP_MAX_RESET 90 /* Maximum number of resets. */ +#define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ +#define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ + +extern int ap_domain_index; + +/** + * The ap_qid_t identifier of an ap queue. It contains a + * 6 bit device index and a 4 bit queue index (domain). + */ +typedef unsigned int ap_qid_t; + +#define AP_MKQID(_device,_queue) (((_device) & 63) << 8 | ((_queue) & 15)) +#define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63) +#define AP_QID_QUEUE(_qid) ((_qid) & 15) + +/** + * The ap queue status word is returned by all three AP functions + * (PQAP, NQAP and DQAP). There's a set of flags in the first + * byte, followed by a 1 byte response code. + */ +struct ap_queue_status { + unsigned int queue_empty : 1; + unsigned int replies_waiting : 1; + unsigned int queue_full : 1; + unsigned int pad1 : 5; + unsigned int response_code : 8; + unsigned int pad2 : 16; +}; + +#define AP_RESPONSE_NORMAL 0x00 +#define AP_RESPONSE_Q_NOT_AVAIL 0x01 +#define AP_RESPONSE_RESET_IN_PROGRESS 0x02 +#define AP_RESPONSE_DECONFIGURED 0x03 +#define AP_RESPONSE_CHECKSTOPPED 0x04 +#define AP_RESPONSE_BUSY 0x05 +#define AP_RESPONSE_Q_FULL 0x10 +#define AP_RESPONSE_NO_PENDING_REPLY 0x10 +#define AP_RESPONSE_INDEX_TOO_BIG 0x11 +#define AP_RESPONSE_NO_FIRST_PART 0x13 +#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15 + +/** + * Known device types + */ +#define AP_DEVICE_TYPE_PCICC 3 +#define AP_DEVICE_TYPE_PCICA 4 +#define AP_DEVICE_TYPE_PCIXCC 5 +#define AP_DEVICE_TYPE_CEX2A 6 +#define AP_DEVICE_TYPE_CEX2C 7 + +struct ap_device; +struct ap_message; + +struct ap_driver { + struct device_driver driver; + struct ap_device_id *ids; + + int (*probe)(struct ap_device *); + void (*remove)(struct ap_device *); + /* receive is called from tasklet context */ + void (*receive)(struct ap_device *, struct ap_message *, + struct ap_message *); +}; + +#define to_ap_drv(x) container_of((x), struct ap_driver, driver) + +int ap_driver_register(struct ap_driver *, struct module *, char *); +void ap_driver_unregister(struct ap_driver *); + +struct ap_device { + struct device device; + struct ap_driver *drv; /* Pointer to AP device driver. */ + spinlock_t lock; /* Per device lock. */ + + ap_qid_t qid; /* AP queue id. */ + int queue_depth; /* AP queue depth.*/ + int device_type; /* AP device type. */ + int unregistered; /* marks AP device as unregistered */ + + int queue_count; /* # messages currently on AP queue. */ + + struct list_head pendingq; /* List of message sent to AP queue. */ + int pendingq_count; /* # requests on pendingq list. */ + struct list_head requestq; /* List of message yet to be sent. */ + int requestq_count; /* # requests on requestq list. */ + int total_request_count; /* # requests ever for this AP device. */ + + struct ap_message *reply; /* Per device reply message. */ + + void *private; /* ap driver private pointer. */ +}; + +#define to_ap_dev(x) container_of((x), struct ap_device, device) + +struct ap_message { + struct list_head list; /* Request queueing. */ + unsigned long long psmid; /* Message id. */ + void *message; /* Pointer to message buffer. */ + size_t length; /* Message length. */ + + void *private; /* ap driver private pointer. */ +}; + +#define AP_DEVICE(dt) \ + .dev_type=(dt), \ + .match_flags=AP_DEVICE_ID_MATCH_DEVICE_TYPE, + +/** + * Note: don't use ap_send/ap_recv after using ap_queue_message + * for the first time. Otherwise the ap message queue will get + * confused. + */ +int ap_send(ap_qid_t, unsigned long long, void *, size_t); +int ap_recv(ap_qid_t, unsigned long long *, void *, size_t); + +void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg); +void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg); +void ap_flush_queue(struct ap_device *ap_dev); + +int ap_module_init(void); +void ap_module_exit(void); + +#endif /* _AP_BUS_H_ */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f6977708585c..f7ca0b09075d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -148,6 +148,17 @@ struct ccw_device_id { #define CCW_DEVICE_ID_MATCH_DEVICE_TYPE 0x04 #define CCW_DEVICE_ID_MATCH_DEVICE_MODEL 0x08 +/* s390 AP bus devices */ +struct ap_device_id { + __u16 match_flags; /* which fields to match against */ + __u8 dev_type; /* device type */ + __u8 pad1; + __u32 pad2; + kernel_ulong_t driver_info; +}; + +#define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 + #define PNP_ID_LEN 8 #define PNP_MAX_DEVICES 8 diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index e2de650d3dbf..de76da80443f 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -265,6 +265,14 @@ static int do_ccw_entry(const char *filename, return 1; } +/* looks like: "ap:tN" */ +static int do_ap_entry(const char *filename, + struct ap_device_id *id, char *alias) +{ + sprintf(alias, "ap:t%02X", id->dev_type); + return 1; +} + /* Looks like: "serio:tyNprNidNexN" */ static int do_serio_entry(const char *filename, struct serio_device_id *id, char *alias) @@ -503,6 +511,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, do_table(symval, sym->st_size, sizeof(struct ccw_device_id), "ccw", do_ccw_entry, mod); + else if (sym_is(symname, "__mod_ap_device_table")) + do_table(symval, sym->st_size, + sizeof(struct ap_device_id), "ap", + do_ap_entry, mod); else if (sym_is(symname, "__mod_serio_device_table")) do_table(symval, sym->st_size, sizeof(struct serio_device_id), "serio", -- cgit v1.2.3-71-gd317 From 799111020c66c41aef621a3b53ad112543754124 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:03:52 +1000 Subject: [CRYPTO] api: Fixed crypto_tfm context alignment Previously the __aligned__ attribute was added to the crypto_tfm context member to ensure it is alinged correctly on architectures such as arm. Unfortunately kmalloc does not use the same minimum alignment rules as gcc so this is useless. This patch changes it to use kmalloc's minimum alignment. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7f946241b879..cb1e6631b132 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -21,8 +21,9 @@ #include #include #include +#include #include -#include +#include /* * Algorithm masks and types. @@ -61,6 +62,26 @@ #define CRYPTO_DIR_ENCRYPT 1 #define CRYPTO_DIR_DECRYPT 0 +/* + * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual + * declaration) is used to ensure that the crypto_tfm context structure is + * aligned correctly for the given architecture so that there are no alignment + * faults for C data types. In particular, this is required on platforms such + * as arm where pointers are 32-bit aligned but there are data types such as + * u64 which require 64-bit alignment. + */ +#if defined(ARCH_KMALLOC_MINALIGN) +#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN +#elif defined(ARCH_SLAB_MINALIGN) +#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN +#endif + +#ifdef CRYPTO_MINALIGN +#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) +#else +#define CRYPTO_MINALIGN_ATTR +#endif + struct scatterlist; struct crypto_tfm; @@ -231,7 +252,7 @@ struct crypto_tfm { struct crypto_alg *__crt_alg; - char __crt_ctx[] __attribute__ ((__aligned__)); + void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; /* -- cgit v1.2.3-71-gd317 From 6521f30273fbec65146a0f16de74b7b402b0f7b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 20:28:44 +1000 Subject: [CRYPTO] api: Add crypto_alg reference counting Up until now we've relied on module reference counting to ensure that the crypto_alg structures don't disappear from under us. This was good enough as long as each crypto_alg came from exactly one module. However, with parameterised crypto algorithms a crypto_alg object may need two or more modules to operate. This means that we need to count the references to the crypto_alg object directly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 32 ++++++++++++++++++++++++++------ crypto/proc.c | 3 +++ include/linux/crypto.h | 3 +++ 3 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 8c2743a05f90..5994a58ef954 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -29,13 +29,26 @@ LIST_HEAD(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); -static inline int crypto_mod_get(struct crypto_alg *alg) +static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { - return try_module_get(alg->cra_module); + atomic_inc(&alg->cra_refcnt); + return alg; +} + +static inline void crypto_alg_put(struct crypto_alg *alg) +{ + if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) + alg->cra_destroy(alg); +} + +static struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) +{ + return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; } -static inline void crypto_mod_put(struct crypto_alg *alg) +static void crypto_mod_put(struct crypto_alg *alg) { + crypto_alg_put(alg); module_put(alg->cra_module); } @@ -274,6 +287,7 @@ int crypto_register_alg(struct crypto_alg *alg) } list_add(&alg->cra_list, &crypto_alg_list); + atomic_set(&alg->cra_refcnt, 1); out: up_write(&crypto_alg_sem); return ret; @@ -284,8 +298,6 @@ int crypto_unregister_alg(struct crypto_alg *alg) int ret = -ENOENT; struct crypto_alg *q; - BUG_ON(!alg->cra_module); - down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (alg == q) { @@ -296,7 +308,15 @@ int crypto_unregister_alg(struct crypto_alg *alg) } out: up_write(&crypto_alg_sem); - return ret; + + if (ret) + return ret; + + BUG_ON(atomic_read(&alg->cra_refcnt) != 1); + if (alg->cra_destroy) + alg->cra_destroy(alg); + + return 0; } int crypto_alg_available(const char *name, u32 flags) diff --git a/crypto/proc.c b/crypto/proc.c index c0a5dd7ce2cc..8543b7a157d6 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -12,6 +12,8 @@ * any later version. * */ + +#include #include #include #include @@ -54,6 +56,7 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "driver : %s\n", alg->cra_driver_name); seq_printf(m, "module : %s\n", module_name(alg->cra_module)); seq_printf(m, "priority : %d\n", alg->cra_priority); + seq_printf(m, "refcnt : %d\n", atomic_read(&alg->cra_refcnt)); switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: diff --git a/include/linux/crypto.h b/include/linux/crypto.h index cb1e6631b132..7f57ff8ec975 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -17,6 +17,7 @@ #ifndef _LINUX_CRYPTO_H #define _LINUX_CRYPTO_H +#include #include #include #include @@ -148,6 +149,7 @@ struct crypto_alg { unsigned int cra_alignmask; int cra_priority; + atomic_t cra_refcnt; char cra_name[CRYPTO_MAX_ALG_NAME]; char cra_driver_name[CRYPTO_MAX_ALG_NAME]; @@ -160,6 +162,7 @@ struct crypto_alg { int (*cra_init)(struct crypto_tfm *tfm); void (*cra_exit)(struct crypto_tfm *tfm); + void (*cra_destroy)(struct crypto_alg *alg); struct module *cra_module; }; -- cgit v1.2.3-71-gd317 From 2825982d9d66ebba4b532a07391dfbb357f71c5f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 21:23:26 +1000 Subject: [CRYPTO] api: Added event notification This patch adds a notifier chain for algorithm/template registration events. This will be used to register compound algorithms such as cbc(aes). In future this will also be passed onto user-space through netlink. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algapi.c | 50 +++++++++++++++++++- crypto/api.c | 122 ++++++++++++++++++++++++++++++++++++++++++++----- crypto/internal.h | 37 +++++++++++++++ include/linux/crypto.h | 4 +- 4 files changed, 199 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/crypto/algapi.c b/crypto/algapi.c index 232b37d81613..f0df85fc1f50 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -21,6 +21,24 @@ static LIST_HEAD(crypto_template_list); +void crypto_larval_error(const char *name) +{ + struct crypto_alg *alg; + + down_read(&crypto_alg_sem); + alg = __crypto_alg_lookup(name); + up_read(&crypto_alg_sem); + + if (alg) { + if (crypto_is_larval(alg)) { + struct crypto_larval *larval = (void *)alg; + complete(&larval->completion); + } + crypto_mod_put(alg); + } +} +EXPORT_SYMBOL_GPL(crypto_larval_error); + static inline int crypto_set_driver_name(struct crypto_alg *alg) { static const char suffix[] = "-generic"; @@ -60,14 +78,27 @@ static int __crypto_register_alg(struct crypto_alg *alg) struct crypto_alg *q; int ret = -EEXIST; + atomic_set(&alg->cra_refcnt, 1); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) goto out; + if (crypto_is_larval(q) && + (!strcmp(alg->cra_name, q->cra_name) || + !strcmp(alg->cra_driver_name, q->cra_name))) { + struct crypto_larval *larval = (void *)q; + + if (!crypto_mod_get(alg)) + continue; + larval->adult = alg; + complete(&larval->completion); + } } list_add(&alg->cra_list, &crypto_alg_list); - atomic_set(&alg->cra_refcnt, 1); + + crypto_notify(CRYPTO_MSG_ALG_REGISTER, alg); ret = 0; + out: return ret; } @@ -97,6 +128,7 @@ int crypto_unregister_alg(struct crypto_alg *alg) list_del_init(&alg->cra_list); ret = 0; } + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg); up_write(&crypto_alg_sem); if (ret) @@ -123,6 +155,7 @@ int crypto_register_template(struct crypto_template *tmpl) } list_add(&tmpl->list, &crypto_template_list); + crypto_notify(CRYPTO_MSG_TMPL_REGISTER, tmpl); err = 0; out: up_write(&crypto_alg_sem); @@ -145,8 +178,11 @@ void crypto_unregister_template(struct crypto_template *tmpl) hlist_for_each_entry(inst, p, list, list) { BUG_ON(list_empty(&inst->alg.cra_list)); list_del_init(&inst->alg.cra_list); + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg); } + crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl); + up_write(&crypto_alg_sem); hlist_for_each_entry_safe(inst, p, n, list, list) { @@ -212,6 +248,18 @@ err: } EXPORT_SYMBOL_GPL(crypto_register_instance); +int crypto_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&crypto_chain, nb); +} +EXPORT_SYMBOL_GPL(crypto_register_notifier); + +int crypto_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&crypto_chain, nb); +} +EXPORT_SYMBOL_GPL(crypto_unregister_notifier); + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/crypto/api.c b/crypto/api.c index c922090b4842..5a0d6a17cfd7 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -27,6 +28,9 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); EXPORT_SYMBOL_GPL(crypto_alg_sem); +BLOCKING_NOTIFIER_HEAD(crypto_chain); +EXPORT_SYMBOL_GPL(crypto_chain); + static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { atomic_inc(&alg->cra_refcnt); @@ -39,27 +43,24 @@ static inline void crypto_alg_put(struct crypto_alg *alg) alg->cra_destroy(alg); } -static struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) +struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; } +EXPORT_SYMBOL_GPL(crypto_mod_get); -static void crypto_mod_put(struct crypto_alg *alg) +void crypto_mod_put(struct crypto_alg *alg) { crypto_alg_put(alg); module_put(alg->cra_module); } +EXPORT_SYMBOL_GPL(crypto_mod_put); -static struct crypto_alg *crypto_alg_lookup(const char *name) +struct crypto_alg *__crypto_alg_lookup(const char *name) { struct crypto_alg *q, *alg = NULL; - int best = -1; + int best = -2; - if (!name) - return NULL; - - down_read(&crypto_alg_sem); - list_for_each_entry(q, &crypto_alg_list, cra_list) { int exact, fuzzy; @@ -79,16 +80,113 @@ static struct crypto_alg *crypto_alg_lookup(const char *name) if (exact) break; } - + + return alg; +} +EXPORT_SYMBOL_GPL(__crypto_alg_lookup); + +static void crypto_larval_destroy(struct crypto_alg *alg) +{ + struct crypto_larval *larval = (void *)alg; + + BUG_ON(!crypto_is_larval(alg)); + if (larval->adult) + crypto_mod_put(larval->adult); + kfree(larval); +} + +static struct crypto_alg *crypto_larval_alloc(const char *name) +{ + struct crypto_alg *alg; + struct crypto_larval *larval; + + larval = kzalloc(sizeof(*larval), GFP_KERNEL); + if (!larval) + return NULL; + + larval->alg.cra_flags = CRYPTO_ALG_LARVAL; + larval->alg.cra_priority = -1; + larval->alg.cra_destroy = crypto_larval_destroy; + + atomic_set(&larval->alg.cra_refcnt, 2); + strlcpy(larval->alg.cra_name, name, CRYPTO_MAX_ALG_NAME); + init_completion(&larval->completion); + + down_write(&crypto_alg_sem); + alg = __crypto_alg_lookup(name); + if (!alg) { + alg = &larval->alg; + list_add(&alg->cra_list, &crypto_alg_list); + } + up_write(&crypto_alg_sem); + + if (alg != &larval->alg) + kfree(larval); + + return alg; +} + +static void crypto_larval_kill(struct crypto_alg *alg) +{ + struct crypto_larval *larval = (void *)alg; + + down_write(&crypto_alg_sem); + list_del(&alg->cra_list); + up_write(&crypto_alg_sem); + complete(&larval->completion); + crypto_alg_put(alg); +} + +static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) +{ + struct crypto_larval *larval = (void *)alg; + + wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ); + alg = larval->adult; + if (alg && !crypto_mod_get(alg)) + alg = NULL; + crypto_mod_put(&larval->alg); + + return alg; +} + +static struct crypto_alg *crypto_alg_lookup(const char *name) +{ + struct crypto_alg *alg; + + if (!name) + return NULL; + + down_read(&crypto_alg_sem); + alg = __crypto_alg_lookup(name); up_read(&crypto_alg_sem); + return alg; } /* A far more intelligent version of this is planned. For now, just * try an exact match on the name of the algorithm. */ -static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name) +static struct crypto_alg *crypto_alg_mod_lookup(const char *name) { - return try_then_request_module(crypto_alg_lookup(name), name); + struct crypto_alg *alg; + struct crypto_alg *larval; + + alg = try_then_request_module(crypto_alg_lookup(name), name); + if (alg) + return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; + + larval = crypto_larval_alloc(name); + if (!larval || !crypto_is_larval(larval)) + return larval; + + if (crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval) == NOTIFY_STOP) + alg = crypto_larval_wait(larval); + else { + crypto_mod_put(larval); + alg = NULL; + } + crypto_larval_kill(larval); + return alg; } static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) diff --git a/crypto/internal.h b/crypto/internal.h index c3ab4a950f30..3a08d25fba45 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -14,6 +14,7 @@ #define _CRYPTO_INTERNAL_H #include +#include #include #include #include @@ -21,15 +22,32 @@ #include #include #include +#include #include #include #include +/* Crypto notification events. */ +enum { + CRYPTO_MSG_ALG_REQUEST, + CRYPTO_MSG_ALG_REGISTER, + CRYPTO_MSG_ALG_UNREGISTER, + CRYPTO_MSG_TMPL_REGISTER, + CRYPTO_MSG_TMPL_UNREGISTER, +}; + struct crypto_instance; struct crypto_template; +struct crypto_larval { + struct crypto_alg alg; + struct crypto_alg *adult; + struct completion completion; +}; + extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; +extern struct blocking_notifier_head crypto_chain; extern enum km_type crypto_km_types[]; @@ -104,6 +122,10 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, return alg->cra_ctxsize; } +struct crypto_alg *crypto_mod_get(struct crypto_alg *alg); +void crypto_mod_put(struct crypto_alg *alg); +struct crypto_alg *__crypto_alg_lookup(const char *name); + int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags); @@ -116,9 +138,14 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm); void crypto_exit_cipher_ops(struct crypto_tfm *tfm); void crypto_exit_compress_ops(struct crypto_tfm *tfm); +void crypto_larval_error(const char *name); + int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst); +int crypto_register_notifier(struct notifier_block *nb); +int crypto_unregister_notifier(struct notifier_block *nb); + static inline int crypto_tmpl_get(struct crypto_template *tmpl) { return try_module_get(tmpl->module); @@ -129,5 +156,15 @@ static inline void crypto_tmpl_put(struct crypto_template *tmpl) module_put(tmpl->module); } +static inline int crypto_is_larval(struct crypto_alg *alg) +{ + return alg->cra_flags & CRYPTO_ALG_LARVAL; +} + +static inline int crypto_notify(unsigned long val, void *v) +{ + return blocking_notifier_call_chain(&crypto_chain, val, v); +} + #endif /* _CRYPTO_INTERNAL_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7f57ff8ec975..3e3e95aff133 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -29,11 +29,13 @@ /* * Algorithm masks and types. */ -#define CRYPTO_ALG_TYPE_MASK 0x000000ff +#define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_DIGEST 0x00000002 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 +#define CRYPTO_ALG_LARVAL 0x00000010 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3-71-gd317 From 2b8c19dbdc692e81243a328725a02efb77b144a5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:31:44 +1000 Subject: [CRYPTO] api: Add cryptomgr The cryptomgr module is a simple manager of crypto algorithm instances. It ensures that parameterised algorithms of the type tmpl(alg) (e.g., cbc(aes)) are always created. This is meant to satisfy the needs for most users. For more complex cases such as deeper combinations or multiple parameters, a netlink module will be created which allows arbitrary expressions to be parsed in user-space. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/Kconfig | 8 +++ crypto/Makefile | 1 + crypto/api.c | 10 +++- crypto/cryptomgr.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/crypto.h | 9 +++ 5 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 crypto/cryptomgr.c (limited to 'include/linux') diff --git a/crypto/Kconfig b/crypto/Kconfig index aabc63195222..4ce509dba329 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -16,6 +16,14 @@ config CRYPTO_ALGAPI help This option provides the API for cryptographic algorithms. +config CRYPTO_MANAGER + tristate "Cryptographic algorithm manager" + select CRYPTO_ALGAPI + default m + help + Create default cryptographic template instantiations such as + cbc(aes). + config CRYPTO_HMAC bool "HMAC support" help diff --git a/crypto/Makefile b/crypto/Makefile index 6d51f80753a1..b8745f3d3595 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -8,6 +8,7 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o crypto_algapi-objs := algapi.o $(crypto_algapi-y) obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o +obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o diff --git a/crypto/api.c b/crypto/api.c index 5a0d6a17cfd7..67cd6f87b74a 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name) { struct crypto_alg *alg; struct crypto_alg *larval; + int ok; alg = try_then_request_module(crypto_alg_lookup(name), name); if (alg) @@ -179,7 +181,13 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name) if (!larval || !crypto_is_larval(larval)) return larval; - if (crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval) == NOTIFY_STOP) + ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval); + if (ok == NOTIFY_DONE) { + request_module("cryptomgr"); + ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval); + } + + if (ok == NOTIFY_STOP) alg = crypto_larval_wait(larval); else { crypto_mod_put(larval); diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c new file mode 100644 index 000000000000..e0ebe1b44b99 --- /dev/null +++ b/crypto/cryptomgr.c @@ -0,0 +1,146 @@ +/* + * Create default crypto algorithm instances. + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +struct cryptomgr_param { + struct work_struct work; + + struct { + struct rtattr attr; + struct crypto_attr_alg data; + } alg; + + struct { + char name[CRYPTO_MAX_ALG_NAME]; + } larval; + + char template[CRYPTO_MAX_ALG_NAME]; +}; + +static void cryptomgr_probe(void *data) +{ + struct cryptomgr_param *param = data; + struct crypto_template *tmpl; + struct crypto_instance *inst; + + tmpl = crypto_lookup_template(param->template); + if (!tmpl) + goto err; + + inst = tmpl->alloc(¶m->alg, sizeof(param->alg)); + if (IS_ERR(inst)) + goto err; + else if ((err = crypto_register_instance(tmpl, inst))) { + tmpl->free(inst); + goto err; + } + + crypto_tmpl_put(tmpl); + +out: + kfree(param); + return; + +err: + crypto_larval_error(param->larval.name); + goto out; +} + +static int cryptomgr_schedule_probe(struct crypto_larval *larval) +{ + struct cryptomgr_param *param; + const char *name = larval->alg.cra_name; + const char *p; + unsigned int len; + + param = kmalloc(sizeof(*param), GFP_KERNEL); + if (!param) + goto err; + + for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++) + ; + + len = p - name; + if (!len || *p != '(') + goto err_free_param; + + memcpy(param->template, name, len); + param->template[len] = 0; + + name = p + 1; + for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++) + ; + + len = p - name; + if (!len || *p != ')' || p[1]) + goto err_free_param; + + param->alg.attr.rta_len = sizeof(param->alg); + param->alg.attr.rta_type = CRYPTOA_ALG; + memcpy(param->alg.data.name, name, len); + param->alg.data.name[len] = 0; + + memcpy(param->larval.name, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME); + + INIT_WORK(¶m->work, cryptomgr_probe, param); + schedule_work(¶m->work); + + return NOTIFY_STOP; + +err_free_param: + kfree(param); +err: + return NOTIFY_OK; +} + +static int cryptomgr_notify(struct notifier_block *this, unsigned long msg, + void *data) +{ + switch (msg) { + case CRYPTO_MSG_ALG_REQUEST: + return cryptomgr_schedule_probe(data); + } + + return NOTIFY_DONE; +} + +static struct notifier_block cryptomgr_notifier = { + .notifier_call = cryptomgr_notify, +}; + +static int __init cryptomgr_init(void) +{ + return crypto_register_notifier(&cryptomgr_notifier); +} + +static void __exit cryptomgr_exit(void) +{ + int err = crypto_unregister_notifier(&cryptomgr_notifier); + BUG_ON(err); +} + +module_init(cryptomgr_init); +module_exit(cryptomgr_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Crypto Algorithm Manager"); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3e3e95aff133..85f73c381913 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -260,6 +260,15 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; +enum { + CRYPTOA_UNSPEC, + CRYPTOA_ALG, +}; + +struct crypto_attr_alg { + char name[CRYPTO_MAX_ALG_NAME]; +}; + /* * Transform user interface. */ -- cgit v1.2.3-71-gd317 From 6bfd48096ff8ecabf955958b51ddfa7988eb0a14 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:39:29 +1000 Subject: [CRYPTO] api: Added spawns Spawns lock a specific crypto algorithm in place. They can then be used with crypto_spawn_tfm to allocate a tfm for that algorithm. When the base algorithm of a spawn is deregistered, all its spawns will be automatically removed. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algapi.c | 185 +++++++++++++++++++++++++++++++++++++++++++----- crypto/api.c | 95 +++++++++++++++++-------- crypto/cryptomgr.c | 19 +++-- crypto/internal.h | 19 +++++ include/crypto/algapi.h | 11 +++ include/linux/crypto.h | 4 ++ 6 files changed, 280 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/crypto/algapi.c b/crypto/algapi.c index acea250677c0..36c4f1bdb521 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -73,27 +74,96 @@ static int crypto_check_alg(struct crypto_alg *alg) return crypto_set_driver_name(alg); } -static int __crypto_register_alg(struct crypto_alg *alg) +static void crypto_destroy_instance(struct crypto_alg *alg) +{ + struct crypto_instance *inst = (void *)alg; + struct crypto_template *tmpl = inst->tmpl; + + tmpl->free(inst); + crypto_tmpl_put(tmpl); +} + +static void crypto_remove_spawns(struct list_head *spawns, + struct list_head *list) +{ + struct crypto_spawn *spawn, *n; + + list_for_each_entry_safe(spawn, n, spawns, list) { + struct crypto_instance *inst = spawn->inst; + struct crypto_template *tmpl = inst->tmpl; + + list_del_init(&spawn->list); + spawn->alg = NULL; + + if (crypto_is_dead(&inst->alg)) + continue; + + inst->alg.cra_flags |= CRYPTO_ALG_DEAD; + if (!tmpl || !crypto_tmpl_get(tmpl)) + continue; + + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg); + list_move(&inst->alg.cra_list, list); + hlist_del(&inst->list); + inst->alg.cra_destroy = crypto_destroy_instance; + + if (!list_empty(&inst->alg.cra_users)) { + if (&n->list == spawns) + n = list_entry(inst->alg.cra_users.next, + typeof(*n), list); + __list_splice(&inst->alg.cra_users, spawns->prev); + } + } +} + +static int __crypto_register_alg(struct crypto_alg *alg, + struct list_head *list) { struct crypto_alg *q; - int ret = -EEXIST; + int ret = -EAGAIN; + + if (crypto_is_dead(alg)) + goto out; + + INIT_LIST_HEAD(&alg->cra_users); + + ret = -EEXIST; atomic_set(&alg->cra_refcnt, 1); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) goto out; - if (crypto_is_larval(q) && - (!strcmp(alg->cra_name, q->cra_name) || - !strcmp(alg->cra_driver_name, q->cra_name))) { + + if (crypto_is_moribund(q)) + continue; + + if (crypto_is_larval(q)) { struct crypto_larval *larval = (void *)q; + if (strcmp(alg->cra_name, q->cra_name) && + strcmp(alg->cra_driver_name, q->cra_name)) + continue; + + if (larval->adult) + continue; if ((q->cra_flags ^ alg->cra_flags) & larval->mask) continue; if (!crypto_mod_get(alg)) continue; + larval->adult = alg; complete(&larval->completion); + continue; } + + if (strcmp(alg->cra_name, q->cra_name)) + continue; + + if (strcmp(alg->cra_driver_name, q->cra_driver_name) && + q->cra_priority > alg->cra_priority) + continue; + + crypto_remove_spawns(&q->cra_users, list); } list_add(&alg->cra_list, &crypto_alg_list); @@ -105,8 +175,20 @@ out: return ret; } +static void crypto_remove_final(struct list_head *list) +{ + struct crypto_alg *alg; + struct crypto_alg *n; + + list_for_each_entry_safe(alg, n, list, cra_list) { + list_del_init(&alg->cra_list); + crypto_alg_put(alg); + } +} + int crypto_register_alg(struct crypto_alg *alg) { + LIST_HEAD(list); int err; err = crypto_check_alg(alg); @@ -114,23 +196,35 @@ int crypto_register_alg(struct crypto_alg *alg) return err; down_write(&crypto_alg_sem); - err = __crypto_register_alg(alg); + err = __crypto_register_alg(alg, &list); up_write(&crypto_alg_sem); + crypto_remove_final(&list); return err; } EXPORT_SYMBOL_GPL(crypto_register_alg); +static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list) +{ + if (unlikely(list_empty(&alg->cra_list))) + return -ENOENT; + + alg->cra_flags |= CRYPTO_ALG_DEAD; + + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg); + list_del_init(&alg->cra_list); + crypto_remove_spawns(&alg->cra_users, list); + + return 0; +} + int crypto_unregister_alg(struct crypto_alg *alg) { - int ret = -ENOENT; + int ret; + LIST_HEAD(list); down_write(&crypto_alg_sem); - if (likely(!list_empty(&alg->cra_list))) { - list_del_init(&alg->cra_list); - ret = 0; - } - crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg); + ret = crypto_remove_alg(alg, &list); up_write(&crypto_alg_sem); if (ret) @@ -140,6 +234,7 @@ int crypto_unregister_alg(struct crypto_alg *alg) if (alg->cra_destroy) alg->cra_destroy(alg); + crypto_remove_final(&list); return 0; } EXPORT_SYMBOL_GPL(crypto_unregister_alg); @@ -170,6 +265,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) struct crypto_instance *inst; struct hlist_node *p, *n; struct hlist_head *list; + LIST_HEAD(users); down_write(&crypto_alg_sem); @@ -178,9 +274,8 @@ void crypto_unregister_template(struct crypto_template *tmpl) list = &tmpl->instances; hlist_for_each_entry(inst, p, list, list) { - BUG_ON(list_empty(&inst->alg.cra_list)); - list_del_init(&inst->alg.cra_list); - crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg); + int err = crypto_remove_alg(&inst->alg, &users); + BUG_ON(err); } crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl); @@ -191,6 +286,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1); tmpl->free(inst); } + crypto_remove_final(&users); } EXPORT_SYMBOL_GPL(crypto_unregister_template); @@ -222,6 +318,7 @@ EXPORT_SYMBOL_GPL(crypto_lookup_template); int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst) { + LIST_HEAD(list); int err = -EINVAL; if (inst->alg.cra_destroy) @@ -235,7 +332,7 @@ int crypto_register_instance(struct crypto_template *tmpl, down_write(&crypto_alg_sem); - err = __crypto_register_alg(&inst->alg); + err = __crypto_register_alg(&inst->alg, &list); if (err) goto unlock; @@ -245,11 +342,67 @@ int crypto_register_instance(struct crypto_template *tmpl, unlock: up_write(&crypto_alg_sem); + crypto_remove_final(&list); + err: return err; } EXPORT_SYMBOL_GPL(crypto_register_instance); +int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, + struct crypto_instance *inst) +{ + int err = -EAGAIN; + + spawn->inst = inst; + + down_write(&crypto_alg_sem); + if (!crypto_is_moribund(alg)) { + list_add(&spawn->list, &alg->cra_users); + spawn->alg = alg; + err = 0; + } + up_write(&crypto_alg_sem); + + return err; +} +EXPORT_SYMBOL_GPL(crypto_init_spawn); + +void crypto_drop_spawn(struct crypto_spawn *spawn) +{ + down_write(&crypto_alg_sem); + list_del(&spawn->list); + up_write(&crypto_alg_sem); +} +EXPORT_SYMBOL_GPL(crypto_drop_spawn); + +struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn) +{ + struct crypto_alg *alg; + struct crypto_alg *alg2; + struct crypto_tfm *tfm; + + down_read(&crypto_alg_sem); + alg = spawn->alg; + alg2 = alg; + if (alg2) + alg2 = crypto_mod_get(alg2); + up_read(&crypto_alg_sem); + + if (!alg2) { + if (alg) + crypto_shoot_alg(alg); + return ERR_PTR(-EAGAIN); + } + + tfm = __crypto_alloc_tfm(alg, 0); + if (IS_ERR(tfm)) + crypto_mod_put(alg); + + return tfm; +} +EXPORT_SYMBOL_GPL(crypto_spawn_tfm); + int crypto_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&crypto_chain, nb); diff --git a/crypto/api.c b/crypto/api.c index ddf6a767acdd..7e5522cf856e 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -15,11 +15,13 @@ * */ +#include #include #include #include #include #include +#include #include #include #include "internal.h" @@ -38,12 +40,6 @@ static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) return alg; } -static inline void crypto_alg_put(struct crypto_alg *alg) -{ - if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) - alg->cra_destroy(alg); -} - struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; @@ -65,6 +61,9 @@ struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask) list_for_each_entry(q, &crypto_alg_list, cra_list) { int exact, fuzzy; + if (crypto_is_moribund(q)) + continue; + if ((q->cra_flags ^ type) & mask) continue; @@ -111,7 +110,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type, larval = kzalloc(sizeof(*larval), GFP_KERNEL); if (!larval) - return NULL; + return ERR_PTR(-ENOMEM); larval->mask = mask; larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type; @@ -153,8 +152,11 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ); alg = larval->adult; - if (alg && !crypto_mod_get(alg)) - alg = NULL; + if (alg) { + if (!crypto_mod_get(alg)) + alg = ERR_PTR(-EAGAIN); + } else + alg = ERR_PTR(-ENOENT); crypto_mod_put(&larval->alg); return alg; @@ -165,9 +167,6 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, { struct crypto_alg *alg; - if (!name) - return NULL; - down_read(&crypto_alg_sem); alg = __crypto_alg_lookup(name, type, mask); up_read(&crypto_alg_sem); @@ -181,7 +180,10 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) struct crypto_alg *larval; int ok; - mask &= ~CRYPTO_ALG_LARVAL; + if (!name) + return ERR_PTR(-ENOENT); + + mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); type &= mask; alg = try_then_request_module(crypto_alg_lookup(name, type, mask), @@ -190,7 +192,7 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; larval = crypto_larval_alloc(name, type, mask); - if (!larval || !crypto_is_larval(larval)) + if (IS_ERR(larval) || !crypto_is_larval(larval)) return larval; ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval); @@ -203,7 +205,7 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) alg = crypto_larval_wait(larval); else { crypto_mod_put(larval); - alg = NULL; + alg = ERR_PTR(-ENOENT); } crypto_larval_kill(larval); return alg; @@ -298,31 +300,40 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1)); } -struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) +void crypto_shoot_alg(struct crypto_alg *alg) +{ + down_write(&crypto_alg_sem); + alg->cra_flags |= CRYPTO_ALG_DYING; + up_write(&crypto_alg_sem); +} +EXPORT_SYMBOL_GPL(crypto_shoot_alg); + +struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags) { struct crypto_tfm *tfm = NULL; - struct crypto_alg *alg; unsigned int tfm_size; - - alg = crypto_alg_mod_lookup(name, 0, 0); - if (alg == NULL) - goto out; + int err = -ENOMEM; tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags); tfm = kzalloc(tfm_size, GFP_KERNEL); if (tfm == NULL) - goto out_put; + goto out; tfm->__crt_alg = alg; - - if (crypto_init_flags(tfm, flags)) + + err = crypto_init_flags(tfm, flags); + if (err) goto out_free_tfm; - if (crypto_init_ops(tfm)) + err = crypto_init_ops(tfm); + if (err) goto out_free_tfm; - if (alg->cra_init && alg->cra_init(tfm)) + if (alg->cra_init && (err = alg->cra_init(tfm))) { + if (err == -EAGAIN) + crypto_shoot_alg(alg); goto cra_init_failed; + } goto out; @@ -330,12 +341,36 @@ cra_init_failed: crypto_exit_ops(tfm); out_free_tfm: kfree(tfm); - tfm = NULL; -out_put: - crypto_mod_put(alg); + tfm = ERR_PTR(err); out: return tfm; } +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); + +struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) +{ + struct crypto_tfm *tfm = NULL; + int err; + + do { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(name, 0, 0); + err = PTR_ERR(alg); + if (IS_ERR(alg)) + continue; + + tfm = __crypto_alloc_tfm(alg, flags); + err = 0; + if (IS_ERR(tfm)) { + crypto_mod_put(alg); + err = PTR_ERR(tfm); + tfm = NULL; + } + } while (err == -EAGAIN && !signal_pending(current)); + + return tfm; +} void crypto_free_tfm(struct crypto_tfm *tfm) { @@ -361,7 +396,7 @@ int crypto_alg_available(const char *name, u32 flags) int ret = 0; struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0); - if (alg) { + if (!IS_ERR(alg)) { crypto_mod_put(alg); ret = 1; } diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c index ae54942e3b31..9b5b15601068 100644 --- a/crypto/cryptomgr.c +++ b/crypto/cryptomgr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -44,21 +45,25 @@ static void cryptomgr_probe(void *data) struct cryptomgr_param *param = data; struct crypto_template *tmpl; struct crypto_instance *inst; + int err; tmpl = crypto_lookup_template(param->template); if (!tmpl) goto err; - inst = tmpl->alloc(¶m->alg, sizeof(param->alg)); - if (IS_ERR(inst)) - goto err; - else if ((err = crypto_register_instance(tmpl, inst))) { - tmpl->free(inst); - goto err; - } + do { + inst = tmpl->alloc(¶m->alg, sizeof(param->alg)); + if (IS_ERR(inst)) + err = PTR_ERR(inst); + else if ((err = crypto_register_instance(tmpl, inst))) + tmpl->free(inst); + } while (err == -EAGAIN && !signal_pending(current)); crypto_tmpl_put(tmpl); + if (err) + goto err; + out: kfree(param); return; diff --git a/crypto/internal.h b/crypto/internal.h index c08d93bdadc4..03c00b0e6b60 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -142,12 +142,21 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm); void crypto_larval_error(const char *name, u32 type, u32 mask); +void crypto_shoot_alg(struct crypto_alg *alg); +struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags); + int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst); int crypto_register_notifier(struct notifier_block *nb); int crypto_unregister_notifier(struct notifier_block *nb); +static inline void crypto_alg_put(struct crypto_alg *alg) +{ + if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) + alg->cra_destroy(alg); +} + static inline int crypto_tmpl_get(struct crypto_template *tmpl) { return try_module_get(tmpl->module); @@ -163,6 +172,16 @@ static inline int crypto_is_larval(struct crypto_alg *alg) return alg->cra_flags & CRYPTO_ALG_LARVAL; } +static inline int crypto_is_dead(struct crypto_alg *alg) +{ + return alg->cra_flags & CRYPTO_ALG_DEAD; +} + +static inline int crypto_is_moribund(struct crypto_alg *alg) +{ + return alg->cra_flags & (CRYPTO_ALG_DEAD | CRYPTO_ALG_DYING); +} + static inline int crypto_notify(unsigned long val, void *v) { return blocking_notifier_call_chain(&crypto_chain, val, v); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index ffec530d52fb..b20f4bdb23ba 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -36,10 +36,21 @@ struct crypto_template { char name[CRYPTO_MAX_ALG_NAME]; }; +struct crypto_spawn { + struct list_head list; + struct crypto_alg *alg; + struct crypto_instance *inst; +}; + int crypto_register_template(struct crypto_template *tmpl); void crypto_unregister_template(struct crypto_template *tmpl); struct crypto_template *crypto_lookup_template(const char *name); +int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, + struct crypto_instance *inst); +void crypto_drop_spawn(struct crypto_spawn *spawn); +struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn); + static inline void *crypto_instance_ctx(struct crypto_instance *inst) { return inst->__ctx; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 85f73c381913..40a6330abc8d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -35,6 +35,8 @@ #define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 #define CRYPTO_ALG_LARVAL 0x00000010 +#define CRYPTO_ALG_DEAD 0x00000020 +#define CRYPTO_ALG_DYING 0x00000040 /* * Transform masks and values (for crt_flags). @@ -145,6 +147,8 @@ struct compress_alg { struct crypto_alg { struct list_head cra_list; + struct list_head cra_users; + u32 cra_flags; unsigned int cra_blocksize; unsigned int cra_ctxsize; -- cgit v1.2.3-71-gd317 From b14cdd6704c96474ba5c74b5959487beaa5ee1cd Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Sun, 9 Jul 2006 09:02:24 +1000 Subject: [CRYPTO] api: Add missing accessors for new crypto_alg fields Add missing accessors for cra_driver_name and cra_priority. Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- include/linux/crypto.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 40a6330abc8d..d6e184c876b5 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -297,6 +297,16 @@ static inline const char *crypto_tfm_alg_name(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_name; } +static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_driver_name; +} + +static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_priority; +} + static inline const char *crypto_tfm_alg_modname(struct crypto_tfm *tfm) { return module_name(tfm->__crt_alg->cra_module); -- cgit v1.2.3-71-gd317 From 560c06ae1ab7c677002ea3b6ac83521bf12ee07d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 14:16:39 +1000 Subject: [CRYPTO] api: Get rid of flags argument to setkey Now that the tfm is passed directly to setkey instead of the ctx, we no longer need to pass the &tfm->crt_flags pointer. This patch also gets rid of a few unnecessary checks on the key length for ciphers as the cipher layer guarantees that the key length is within the bounds specified by the algorithm. Rather than testing dia_setkey every time, this patch does it only once during crypto_alloc_tfm. The redundant check from crypto_digest_setkey is also removed. Signed-off-by: Herbert Xu --- arch/i386/crypto/aes.c | 3 ++- arch/s390/crypto/aes_s390.c | 3 ++- arch/s390/crypto/des_s390.c | 13 ++++++++----- arch/x86_64/crypto/aes.c | 5 +++-- crypto/aes.c | 5 +++-- crypto/anubis.c | 3 ++- crypto/arc4.c | 2 +- crypto/blowfish.c | 3 +-- crypto/cast5.c | 8 +------- crypto/cast6.c | 5 +++-- crypto/cipher.c | 4 ++-- crypto/crc32c.c | 5 ++--- crypto/crypto_null.c | 2 +- crypto/des.c | 6 ++++-- crypto/digest.c | 15 ++++++++++----- crypto/khazad.c | 8 +------- crypto/michael_mic.c | 5 ++--- crypto/serpent.c | 19 +++---------------- crypto/tcrypt.c | 5 +---- crypto/tea.c | 16 ++-------------- crypto/twofish_common.c | 6 +++--- drivers/crypto/padlock-aes.c | 5 +++-- include/crypto/twofish.h | 3 +-- include/linux/crypto.h | 6 ++---- 24 files changed, 63 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c index d3806daa3de3..49aad9397f10 100644 --- a/arch/i386/crypto/aes.c +++ b/arch/i386/crypto/aes.c @@ -379,12 +379,13 @@ static void gen_tabs(void) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { int i; u32 ss[8]; struct aes_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; /* encryption schedule */ diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 5713c7e5bd16..c7c43c9de0d9 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -38,9 +38,10 @@ struct s390_aes_ctx { }; static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; switch (key_len) { case 16: diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index b3f7496a79b4..170757b3451d 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -45,9 +45,10 @@ struct crypt_s390_des3_192_ctx { }; static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; int ret; /* test if key is valid (not a weak key) */ @@ -167,11 +168,12 @@ static struct crypto_alg des_alg = { * */ static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { int i, ret; struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm); - const u8* temp_key = key; + const u8 *temp_key = key; + u32 *flags = &tfm->crt_flags; if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) { *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; @@ -303,11 +305,12 @@ static struct crypto_alg des3_128_alg = { * */ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { int i, ret; struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); - const u8* temp_key = key; + const u8 *temp_key = key; + u32 *flags = &tfm->crt_flags; if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c index 68866fab37aa..5cdb13ea5cc2 100644 --- a/arch/x86_64/crypto/aes.c +++ b/arch/x86_64/crypto/aes.c @@ -228,13 +228,14 @@ static void __init gen_tabs(void) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct aes_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; u32 i, j, t, u, v, w; - if (key_len != 16 && key_len != 24 && key_len != 32) { + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/aes.c b/crypto/aes.c index a038711831e7..e2440773878c 100644 --- a/crypto/aes.c +++ b/crypto/aes.c @@ -249,13 +249,14 @@ gen_tabs (void) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct aes_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; u32 i, t, u, v, w; - if (key_len != 16 && key_len != 24 && key_len != 32) { + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/anubis.c b/crypto/anubis.c index 7e2e1a29800e..1c771f7f4dc5 100644 --- a/crypto/anubis.c +++ b/crypto/anubis.c @@ -461,10 +461,11 @@ static const u32 rc[] = { }; static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct anubis_ctx *ctx = crypto_tfm_ctx(tfm); const __be32 *key = (const __be32 *)in_key; + u32 *flags = &tfm->crt_flags; int N, R, i, r; u32 kappa[ANUBIS_MAX_N]; u32 inter[ANUBIS_MAX_N]; diff --git a/crypto/arc4.c b/crypto/arc4.c index 5edc6a65b987..8be47e13a9e3 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -25,7 +25,7 @@ struct arc4_ctx { }; static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct arc4_ctx *ctx = crypto_tfm_ctx(tfm); int i, j = 0, k = 0; diff --git a/crypto/blowfish.c b/crypto/blowfish.c index 490265f42b3b..55238c4e37f0 100644 --- a/crypto/blowfish.c +++ b/crypto/blowfish.c @@ -399,8 +399,7 @@ static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) /* * Calculates the blowfish S and P boxes for encryption and decryption. */ -static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) +static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct bf_ctx *ctx = crypto_tfm_ctx(tfm); u32 *P = ctx->p; diff --git a/crypto/cast5.c b/crypto/cast5.c index 08eef58c1d3d..13ea60abc19a 100644 --- a/crypto/cast5.c +++ b/crypto/cast5.c @@ -769,8 +769,7 @@ static void key_schedule(u32 * x, u32 * z, u32 * k) } -static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned key_len, u32 *flags) +static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned key_len) { struct cast5_ctx *c = crypto_tfm_ctx(tfm); int i; @@ -778,11 +777,6 @@ static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, u32 z[4]; u32 k[16]; __be32 p_key[4]; - - if (key_len < 5 || key_len > 16) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } c->rr = key_len <= 10 ? 1 : 0; diff --git a/crypto/cast6.c b/crypto/cast6.c index 08e33bfc3ad1..136ab6dfe8c5 100644 --- a/crypto/cast6.c +++ b/crypto/cast6.c @@ -382,14 +382,15 @@ static inline void W(u32 *key, unsigned int i) { } static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned key_len, u32 *flags) + unsigned key_len) { int i; u32 key[8]; __be32 p_key[8]; /* padded key */ struct cast6_ctx *c = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; - if (key_len < 16 || key_len > 32 || key_len % 4 != 0) { + if (key_len % 4 != 0) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/cipher.c b/crypto/cipher.c index b899eb97abd7..56406a4a88d4 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -264,12 +264,12 @@ static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher; + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } else - return cia->cia_setkey(tfm, key, keylen, - &tfm->crt_flags); + return cia->cia_setkey(tfm, key, keylen); } static int ecb_encrypt(struct crypto_tfm *tfm, diff --git a/crypto/crc32c.c b/crypto/crc32c.c index 91ecd895e957..0fa744392a4c 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c.c @@ -44,13 +44,12 @@ static void chksum_init(struct crypto_tfm *tfm) * the seed. */ static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); if (keylen != sizeof(mctx->crc)) { - if (flags) - *flags = CRYPTO_TFM_RES_BAD_KEY_LEN; + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } mctx->key = le32_to_cpu(*(__le32 *)key); diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index a0d956b52949..24dbb5d8617e 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -48,7 +48,7 @@ static void null_final(struct crypto_tfm *tfm, u8 *out) { } static int null_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { return 0; } static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) diff --git a/crypto/des.c b/crypto/des.c index a9d3c235a6af..1df3a714fa47 100644 --- a/crypto/des.c +++ b/crypto/des.c @@ -784,9 +784,10 @@ static void dkey(u32 *pe, const u8 *k) } static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct des_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; u32 tmp[DES_EXPKEY_WORDS]; int ret; @@ -864,11 +865,12 @@ static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) * */ static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { const u32 *K = (const u32 *)key; struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm); u32 *expkey = dctx->expkey; + u32 *flags = &tfm->crt_flags; if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || !((K[2] ^ K[4]) | (K[3] ^ K[5])))) diff --git a/crypto/digest.c b/crypto/digest.c index 603006a7bef2..0df7f392a56a 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -76,12 +76,16 @@ static void final(struct crypto_tfm *tfm, u8 *out) tfm->__crt_alg->cra_digest.dia_final(tfm, out); } +static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +{ + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + return -ENOSYS; +} + static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { - u32 flags; - if (tfm->__crt_alg->cra_digest.dia_setkey == NULL) - return -ENOSYS; - return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen, &flags); + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); } static void digest(struct crypto_tfm *tfm, @@ -100,12 +104,13 @@ int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags) int crypto_init_digest_ops(struct crypto_tfm *tfm) { struct digest_tfm *ops = &tfm->crt_digest; + struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; ops->dit_init = init; ops->dit_update = update; ops->dit_final = final; ops->dit_digest = digest; - ops->dit_setkey = setkey; + ops->dit_setkey = dalg->dia_setkey ? setkey : nosetkey; return crypto_alloc_hmac_block(tfm); } diff --git a/crypto/khazad.c b/crypto/khazad.c index d4c9d3657b36..9fa24a2dd6ff 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -755,19 +755,13 @@ static const u64 c[KHAZAD_ROUNDS + 1] = { }; static int khazad_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct khazad_ctx *ctx = crypto_tfm_ctx(tfm); const __be32 *key = (const __be32 *)in_key; int r; const u64 *S = T7; u64 K2, K1; - - if (key_len != 16) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } /* key is supposed to be 32-bit aligned */ K2 = ((u64)be32_to_cpu(key[0]) << 32) | be32_to_cpu(key[1]); diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index d061da21cfda..094397b48849 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -123,14 +123,13 @@ static void michael_final(struct crypto_tfm *tfm, u8 *out) static int michael_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm); const __le32 *data = (const __le32 *)key; if (keylen != 8) { - if (flags) - *flags = CRYPTO_TFM_RES_BAD_KEY_LEN; + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/serpent.c b/crypto/serpent.c index de60cdddbf4a..465d091cd3ec 100644 --- a/crypto/serpent.c +++ b/crypto/serpent.c @@ -216,7 +216,7 @@ struct serpent_ctx { static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct serpent_ctx *ctx = crypto_tfm_ctx(tfm); u32 *k = ctx->expkey; @@ -224,13 +224,6 @@ static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, u32 r0,r1,r2,r3,r4; int i; - if ((keylen < SERPENT_MIN_KEY_SIZE) - || (keylen > SERPENT_MAX_KEY_SIZE)) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* Copy key, add padding */ for (i = 0; i < keylen; ++i) @@ -497,21 +490,15 @@ static struct crypto_alg serpent_alg = { }; static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { u8 rev_key[SERPENT_MAX_KEY_SIZE]; int i; - if ((keylen < SERPENT_MIN_KEY_SIZE) - || (keylen > SERPENT_MAX_KEY_SIZE)) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - for (i = 0; i < keylen; ++i) rev_key[keylen - i - 1] = key[i]; - return serpent_setkey(tfm, rev_key, keylen, flags); + return serpent_setkey(tfm, rev_key, keylen); } static void tnepres_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index bed225e83231..606777074671 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -118,10 +118,7 @@ static void test_hash(char *algo, struct hash_testvec *template, sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize); crypto_digest_init(tfm); - if (tfm->crt_u.digest.dit_setkey) { - crypto_digest_setkey(tfm, hash_tv[i].key, - hash_tv[i].ksize); - } + crypto_digest_setkey(tfm, hash_tv[i].key, hash_tv[i].ksize); crypto_digest_update(tfm, sg, 1); crypto_digest_final(tfm, result); diff --git a/crypto/tea.c b/crypto/tea.c index 5367adc82fc9..1c54e26fa529 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -46,16 +46,10 @@ struct xtea_ctx { }; static int tea_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct tea_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; - - if (key_len != 16) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } ctx->KEY[0] = le32_to_cpu(key[0]); ctx->KEY[1] = le32_to_cpu(key[1]); @@ -125,16 +119,10 @@ static void tea_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } static int xtea_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct xtea_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; - - if (key_len != 16) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } ctx->KEY[0] = le32_to_cpu(key[0]); ctx->KEY[1] = le32_to_cpu(key[1]); diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c index 1ae0280c2513..b4b9c0c3f4ae 100644 --- a/crypto/twofish_common.c +++ b/crypto/twofish_common.c @@ -580,11 +580,11 @@ static const u8 calc_sb_tbl[512] = { ctx->a[(j) + 1] = rol32(y, 9) /* Perform the key setup. */ -int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int key_len, u32 *flags) +int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; int i, j, k; @@ -600,7 +600,7 @@ int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, u8 tmp; /* Check key length. */ - if (key_len != 16 && key_len != 24 && key_len != 32) + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; /* unsupported key length */ diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 3a2a71108d35..3e683709243e 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -308,15 +308,16 @@ static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct aes_ctx *ctx = aes_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; uint32_t i, t, u, v, w; uint32_t P[AES_EXTENDED_KEY_SIZE]; uint32_t rounds; - if (key_len != 16 && key_len != 24 && key_len != 32) { + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/include/crypto/twofish.h b/include/crypto/twofish.h index e4328cfaaf64..c408522595c6 100644 --- a/include/crypto/twofish.h +++ b/include/crypto/twofish.h @@ -17,7 +17,6 @@ struct twofish_ctx { u32 s[4][256], w[8], k[32]; }; -int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int key_len, u32 *flags); +int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len); #endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index d6e184c876b5..053bfab43e8d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -106,7 +106,7 @@ struct cipher_alg { unsigned int cia_min_keysize; unsigned int cia_max_keysize; int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags); + unsigned int keylen); void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); @@ -131,7 +131,7 @@ struct digest_alg { unsigned int len); void (*dia_final)(struct crypto_tfm *tfm, u8 *out); int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags); + unsigned int keylen); }; struct compress_alg { @@ -397,8 +397,6 @@ static inline int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - if (tfm->crt_digest.dit_setkey == NULL) - return -ENOSYS; return tfm->crt_digest.dit_setkey(tfm, key, keylen); } -- cgit v1.2.3-71-gd317 From df89820ebd5bbf4f3c6b5f8ee7d9e983107f6a91 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 14 Jul 2006 10:42:27 +1000 Subject: [CRYPTO] cipher: Removed special IV checks for ECB This patch makes IV operations on ECB fail through nocrypt_iv rather than calling BUG(). This is needed to generalise CBC/ECB using the template mechanism. Signed-off-by: Herbert Xu --- crypto/cipher.c | 2 ++ include/linux/crypto.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/crypto/cipher.c b/crypto/cipher.c index 56406a4a88d4..aebc4a2adc80 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) case CRYPTO_TFM_MODE_ECB: ops->cit_encrypt = ecb_encrypt; ops->cit_decrypt = ecb_decrypt; + ops->cit_encrypt_iv = nocrypt_iv; + ops->cit_decrypt_iv = nocrypt_iv; break; case CRYPTO_TFM_MODE_CBC: diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 053bfab43e8d..dbdfc7c79367 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -422,7 +422,6 @@ static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB); return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv); } @@ -441,7 +440,6 @@ static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB); return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv); } -- cgit v1.2.3-71-gd317 From f3f632d61ae9af85d436706ee8e33af1a7fb9c28 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 23:12:59 +1000 Subject: [CRYPTO] api: Added asynchronous flag This patch adds the asynchronous flag and changes all existing users to only look up algorithms that are synchronous. Signed-off-by: Herbert Xu --- crypto/api.c | 5 +++-- include/linux/crypto.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 7e5522cf856e..1e4692a13474 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -355,7 +355,7 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) do { struct crypto_alg *alg; - alg = crypto_alg_mod_lookup(name, 0, 0); + alg = crypto_alg_mod_lookup(name, 0, CRYPTO_ALG_ASYNC); err = PTR_ERR(alg); if (IS_ERR(alg)) continue; @@ -394,7 +394,8 @@ void crypto_free_tfm(struct crypto_tfm *tfm) int crypto_alg_available(const char *name, u32 flags) { int ret = 0; - struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0); + struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, + CRYPTO_ALG_ASYNC); if (!IS_ERR(alg)) { crypto_mod_put(alg); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index dbdfc7c79367..530dc4bf363c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -37,6 +37,7 @@ #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 #define CRYPTO_ALG_DYING 0x00000040 +#define CRYPTO_ALG_ASYNC 0x00000080 /* * Transform masks and values (for crt_flags). -- cgit v1.2.3-71-gd317 From 6d7d684d635ac5a345f075015f2c84169c111c6a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 11:53:01 +1000 Subject: [CRYPTO] api: Added crypto_alloc_base Up until now all crypto transforms have been of the same type, struct crypto_tfm, regardless of whether they are ciphers, digests, or other types. As a result of that, we check the types at run-time before each crypto operation. This is rather cumbersome. We could instead use different C types for each crypto type to ensure that the correct types are used at compile time. That is, we would have crypto_cipher/crypto_digest instead of just crypto_tfm. The appropriate type would then be required for the actual operations such as crypto_digest_digest. Now that we have the type/mask fields when looking up algorithms, it is easy to request for an algorithm of the precise type that the user wants. However, crypto_alloc_tfm currently does not expose these new attributes. This patch introduces the function crypto_alloc_base which will carry these new parameters. It will be renamed to crypto_alloc_tfm once all existing users have been converted. Signed-off-by: Herbert Xu --- crypto/api.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/crypto.h | 14 +++--------- 2 files changed, 63 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 1e4692a13474..bc4b7901acdf 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -372,6 +372,66 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) return tfm; } +/* + * crypto_alloc_base - Locate algorithm and allocate transform + * @alg_name: Name of algorithm + * @type: Type of algorithm + * @mask: Mask for type comparison + * + * crypto_alloc_base() will first attempt to locate an already loaded + * algorithm. If that fails and the kernel supports dynamically loadable + * modules, it will then attempt to load a module of the same name or + * alias. If that fails it will send a query to any loaded crypto manager + * to construct an algorithm on the fly. A refcount is grabbed on the + * algorithm which is then associated with the new transform. + * + * The returned transform is of a non-determinate type. Most people + * should use one of the more specific allocation functions such as + * crypto_alloc_blkcipher. + * + * In case of error the return value is an error pointer. + */ +struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + err = PTR_ERR(alg); + tfm = ERR_PTR(err); + if (IS_ERR(alg)) + goto err; + + tfm = __crypto_alloc_tfm(alg, 0); + if (!IS_ERR(tfm)) + break; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (signal_pending(current)) { + err = -EINTR; + break; + } + }; + + return tfm; +} +EXPORT_SYMBOL_GPL(crypto_alloc_base); + +/* + * crypto_free_tfm - Free crypto transform + * @tfm: Transform to free + * + * crypto_free_tfm() frees up the transform and any associated resources, + * then drops the refcount on the associated algorithm. + */ void crypto_free_tfm(struct crypto_tfm *tfm) { struct crypto_alg *alg; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 530dc4bf363c..6847ab0ea30e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -194,8 +194,8 @@ static inline int crypto_alg_available(const char *name, u32 flags) /* * Transforms: user-instantiated objects which encapsulate algorithms - * and core processing logic. Managed via crypto_alloc_tfm() and - * crypto_free_tfm(), as well as the various helpers below. + * and core processing logic. Managed via crypto_alloc_*() and + * crypto_free_*(), as well as the various helpers below. */ struct cipher_tfm { @@ -278,16 +278,8 @@ struct crypto_attr_alg { * Transform user interface. */ -/* - * crypto_alloc_tfm() will first attempt to locate an already loaded algorithm. - * If that fails and the kernel supports dynamically loadable modules, it - * will then attempt to load a module of the same name or alias. A refcount - * is grabbed on the algorithm which is then associated with the new transform. - * - * crypto_free_tfm() frees up the transform and any associated resources, - * then drops the refcount on the associated algorithm. - */ struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags); +struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); void crypto_free_tfm(struct crypto_tfm *tfm); /* -- cgit v1.2.3-71-gd317 From e853c3cfa8cc24869ecd2526e589bcb176bc12e9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 00:06:54 +1000 Subject: [CRYPTO] api: Added crypto_type support This patch adds the crypto_type structure which will be used for all new crypto algorithm types, beginning with block ciphers. The primary purpose of this abstraction is to allow different crypto_type objects for crypto algorithms of the same type, in particular, there will be a different crypto_type objects for asynchronous algorithms. Signed-off-by: Herbert Xu --- crypto/api.c | 32 +++++++++++++++++++++++--------- crypto/proc.c | 5 ++++- include/crypto/algapi.h | 8 ++++++++ include/linux/crypto.h | 3 +++ 4 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index bc4b7901acdf..edaa843d8e83 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -226,17 +226,18 @@ static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) case CRYPTO_ALG_TYPE_COMPRESS: return crypto_init_compress_flags(tfm, flags); - - default: - break; } - BUG(); - return -EINVAL; + return 0; } static int crypto_init_ops(struct crypto_tfm *tfm) { + const struct crypto_type *type = tfm->__crt_alg->cra_type; + + if (type) + return type->init(tfm); + switch (crypto_tfm_alg_type(tfm)) { case CRYPTO_ALG_TYPE_CIPHER: return crypto_init_cipher_ops(tfm); @@ -257,6 +258,14 @@ static int crypto_init_ops(struct crypto_tfm *tfm) static void crypto_exit_ops(struct crypto_tfm *tfm) { + const struct crypto_type *type = tfm->__crt_alg->cra_type; + + if (type) { + if (type->exit) + type->exit(tfm); + return; + } + switch (crypto_tfm_alg_type(tfm)) { case CRYPTO_ALG_TYPE_CIPHER: crypto_exit_cipher_ops(tfm); @@ -278,26 +287,31 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) { + const struct crypto_type *type = alg->cra_type; unsigned int len; + len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1); + if (type) + return len + type->ctxsize(alg); + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { default: BUG(); case CRYPTO_ALG_TYPE_CIPHER: - len = crypto_cipher_ctxsize(alg, flags); + len += crypto_cipher_ctxsize(alg, flags); break; case CRYPTO_ALG_TYPE_DIGEST: - len = crypto_digest_ctxsize(alg, flags); + len += crypto_digest_ctxsize(alg, flags); break; case CRYPTO_ALG_TYPE_COMPRESS: - len = crypto_compress_ctxsize(alg, flags); + len += crypto_compress_ctxsize(alg, flags); break; } - return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1)); + return len; } void crypto_shoot_alg(struct crypto_alg *alg) diff --git a/crypto/proc.c b/crypto/proc.c index 9e573b17e887..dabce0676f63 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -78,7 +78,10 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "type : compression\n"); break; default: - seq_printf(m, "type : unknown\n"); + if (alg->cra_type && alg->cra_type->show) + alg->cra_type->show(m, alg); + else + seq_printf(m, "type : unknown\n"); break; } diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 1a598f829417..c533c0a291af 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -15,6 +15,14 @@ #include struct module; +struct seq_file; + +struct crypto_type { + unsigned int (*ctxsize)(struct crypto_alg *alg); + int (*init)(struct crypto_tfm *tfm); + void (*exit)(struct crypto_tfm *tfm); + void (*show)(struct seq_file *m, struct crypto_alg *alg); +}; struct crypto_instance { struct crypto_alg alg; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6847ab0ea30e..8e9c407b00d2 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -90,6 +90,7 @@ struct scatterlist; struct crypto_tfm; +struct crypto_type; struct cipher_desc { struct crypto_tfm *tfm; @@ -161,6 +162,8 @@ struct crypto_alg { char cra_name[CRYPTO_MAX_ALG_NAME]; char cra_driver_name[CRYPTO_MAX_ALG_NAME]; + const struct crypto_type *cra_type; + union { struct cipher_alg cipher; struct digest_alg digest; -- cgit v1.2.3-71-gd317 From f28776a369b12f9a03a822a8e1090ed670a41f4f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 20:58:18 +1000 Subject: [CRYPTO] cipher: Added encrypt_one/decrypt_one This patch adds two new operations for the simple cipher that encrypts or decrypts a single block at a time. This will be the main interface after the existing block operations have moved over to the new block ciphers. It also adds the crypto_cipher type which is currently only used on the new operations but will be extended to setkey as well once existing users have been converted to use block ciphers where applicable. Signed-off-by: Herbert Xu --- crypto/cipher.c | 48 +++++++++++++++++++++++++ include/crypto/algapi.h | 5 +++ include/linux/crypto.h | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) (limited to 'include/linux') diff --git a/crypto/cipher.c b/crypto/cipher.c index f573c59ed9dc..d8ca0ec8d0be 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -388,12 +388,60 @@ int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags) return 0; } +static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *, + const u8 *), + struct crypto_tfm *tfm, + u8 *dst, const u8 *src) +{ + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + unsigned int size = crypto_tfm_alg_blocksize(tfm); + u8 buffer[size + alignmask]; + u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + + memcpy(tmp, src, size); + fn(tfm, tmp, tmp); + memcpy(dst, tmp, size); +} + +static void cipher_encrypt_unaligned(struct crypto_tfm *tfm, + u8 *dst, const u8 *src) +{ + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + + if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) { + cipher_crypt_unaligned(cipher->cia_encrypt, tfm, dst, src); + return; + } + + cipher->cia_encrypt(tfm, dst, src); +} + +static void cipher_decrypt_unaligned(struct crypto_tfm *tfm, + u8 *dst, const u8 *src) +{ + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + + if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) { + cipher_crypt_unaligned(cipher->cia_decrypt, tfm, dst, src); + return; + } + + cipher->cia_decrypt(tfm, dst, src); +} + int crypto_init_cipher_ops(struct crypto_tfm *tfm) { int ret = 0; struct cipher_tfm *ops = &tfm->crt_cipher; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; ops->cit_setkey = setkey; + ops->cit_encrypt_one = crypto_tfm_alg_alignmask(tfm) ? + cipher_encrypt_unaligned : cipher->cia_encrypt; + ops->cit_decrypt_one = crypto_tfm_alg_alignmask(tfm) ? + cipher_decrypt_unaligned : cipher->cia_decrypt; switch (tfm->crt_cipher.cit_mode) { case CRYPTO_TFM_MODE_ECB: diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index c533c0a291af..6f9fb27b2071 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -69,5 +69,10 @@ static inline void *crypto_instance_ctx(struct crypto_instance *inst) return inst->__ctx; } +static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) +{ + return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8e9c407b00d2..fdecee83878c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -224,6 +224,8 @@ struct cipher_tfm { struct scatterlist *src, unsigned int nbytes, u8 *iv); void (*cit_xor_block)(u8 *dst, const u8 *src); + void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; struct digest_tfm { @@ -268,6 +270,8 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; +#define crypto_cipher crypto_tfm + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -347,6 +351,21 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_alignmask; } +static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) +{ + return tfm->crt_flags; +} + +static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags) +{ + tfm->crt_flags |= flags; +} + +static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags) +{ + tfm->crt_flags &= ~flags; +} + static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return tfm->__crt_ctx; @@ -361,6 +380,83 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) /* * API wrappers. */ +static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_cipher *)tfm; +} + +static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm) +{ + BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); + return __crypto_cipher_cast(tfm); +} + +static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) +{ + return tfm; +} + +static inline void crypto_free_cipher(struct crypto_cipher *tfm) +{ + crypto_free_tfm(crypto_cipher_tfm(tfm)); +} + +static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) +{ + return &crypto_cipher_tfm(tfm)->crt_cipher; +} + +static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); +} + +static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); +} + +static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) +{ + return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); +} + +static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); +} + +static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); +} + +static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src) +{ + crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm), + dst, src); +} + +static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src) +{ + crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm), + dst, src); +} + static inline void crypto_digest_init(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); -- cgit v1.2.3-71-gd317 From 5cde0af2a9825dd1edaca233bd9590566579ef21 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 00:07:53 +1000 Subject: [CRYPTO] cipher: Added block cipher type This patch adds the new type of block ciphers. Unlike current cipher algorithms which operate on a single block at a time, block ciphers operate on an arbitrarily long linear area of data. As it is block-based, it will skip any data remaining at the end which cannot form a block. The block cipher has one major difference when compared to the existing block cipher implementation. The sg walking is now performed by the algorithm rather than the cipher mid-layer. This is needed for drivers that directly support sg lists. It also improves performance for all algorithms as it reduces the total number of indirect calls by one. In future the existing cipher algorithm will be converted to only have a single-block interface. This will be done after all existing users have switched over to the new block cipher type. Signed-off-by: Herbert Xu --- crypto/Kconfig | 4 + crypto/Makefile | 2 + crypto/blkcipher.c | 405 ++++++++++++++++++++++++++++++++++++++++++++++++ include/crypto/algapi.h | 65 ++++++++ include/linux/crypto.h | 179 +++++++++++++++++++++ 5 files changed, 655 insertions(+) create mode 100644 crypto/blkcipher.c (limited to 'include/linux') diff --git a/crypto/Kconfig b/crypto/Kconfig index 4ce509dba329..68790ad7308d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -16,6 +16,10 @@ config CRYPTO_ALGAPI help This option provides the API for cryptographic algorithms. +config CRYPTO_BLKCIPHER + tristate + select CRYPTO_ALGAPI + config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index b8745f3d3595..b5051951c636 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -8,6 +8,8 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o crypto_algapi-objs := algapi.o $(crypto_algapi-y) obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o +obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o + obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c new file mode 100644 index 000000000000..034c939bf91a --- /dev/null +++ b/crypto/blkcipher.c @@ -0,0 +1,405 @@ +/* + * Block chaining cipher operations. + * + * Generic encrypt/decrypt wrapper for ciphers, handles operations across + * multiple page boundaries by using temporary blocks. In user context, + * the kernel is given a chance to schedule us once per page. + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" +#include "scatterwalk.h" + +enum { + BLKCIPHER_WALK_PHYS = 1 << 0, + BLKCIPHER_WALK_SLOW = 1 << 1, + BLKCIPHER_WALK_COPY = 1 << 2, + BLKCIPHER_WALK_DIFF = 1 << 3, +}; + +static int blkcipher_walk_next(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); +static int blkcipher_walk_first(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); + +static inline void blkcipher_map_src(struct blkcipher_walk *walk) +{ + walk->src.virt.addr = scatterwalk_map(&walk->in, 0); +} + +static inline void blkcipher_map_dst(struct blkcipher_walk *walk) +{ + walk->dst.virt.addr = scatterwalk_map(&walk->out, 1); +} + +static inline void blkcipher_unmap_src(struct blkcipher_walk *walk) +{ + scatterwalk_unmap(walk->src.virt.addr, 0); +} + +static inline void blkcipher_unmap_dst(struct blkcipher_walk *walk) +{ + scatterwalk_unmap(walk->dst.virt.addr, 1); +} + +static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) +{ + if (offset_in_page(start + len) < len) + return (u8 *)((unsigned long)(start + len) & PAGE_MASK); + return start; +} + +static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm, + struct blkcipher_walk *walk, + unsigned int bsize) +{ + u8 *addr; + unsigned int alignmask = crypto_blkcipher_alignmask(tfm); + + addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + addr = blkcipher_get_spot(addr, bsize); + scatterwalk_copychunks(addr, &walk->out, bsize, 1); + return bsize; +} + +static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, + unsigned int n) +{ + n = walk->nbytes - n; + + if (walk->flags & BLKCIPHER_WALK_COPY) { + blkcipher_map_dst(walk); + memcpy(walk->dst.virt.addr, walk->page, n); + blkcipher_unmap_dst(walk); + } else if (!(walk->flags & BLKCIPHER_WALK_PHYS)) { + blkcipher_unmap_src(walk); + if (walk->flags & BLKCIPHER_WALK_DIFF) + blkcipher_unmap_dst(walk); + } + + scatterwalk_advance(&walk->in, n); + scatterwalk_advance(&walk->out, n); + + return n; +} + +int blkcipher_walk_done(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, int err) +{ + struct crypto_blkcipher *tfm = desc->tfm; + unsigned int nbytes = 0; + + if (likely(err >= 0)) { + unsigned int bsize = crypto_blkcipher_blocksize(tfm); + unsigned int n; + + if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) + n = blkcipher_done_fast(walk, err); + else + n = blkcipher_done_slow(tfm, walk, bsize); + + nbytes = walk->total - n; + err = 0; + } + + scatterwalk_done(&walk->in, 0, nbytes); + scatterwalk_done(&walk->out, 1, nbytes); + + walk->total = nbytes; + walk->nbytes = nbytes; + + if (nbytes) { + crypto_yield(desc->flags); + return blkcipher_walk_next(desc, walk); + } + + if (walk->iv != desc->info) + memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm)); + if (walk->buffer != walk->page) + kfree(walk->buffer); + if (walk->page) + free_page((unsigned long)walk->page); + + return err; +} +EXPORT_SYMBOL_GPL(blkcipher_walk_done); + +static inline int blkcipher_next_slow(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + unsigned int bsize, + unsigned int alignmask) +{ + unsigned int n; + + if (walk->buffer) + goto ok; + + walk->buffer = walk->page; + if (walk->buffer) + goto ok; + + n = bsize * 2 + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); + walk->buffer = kmalloc(n, GFP_ATOMIC); + if (!walk->buffer) + return blkcipher_walk_done(desc, walk, -ENOMEM); + +ok: + walk->dst.virt.addr = (u8 *)ALIGN((unsigned long)walk->buffer, + alignmask + 1); + walk->dst.virt.addr = blkcipher_get_spot(walk->dst.virt.addr, bsize); + walk->src.virt.addr = blkcipher_get_spot(walk->dst.virt.addr + bsize, + bsize); + + scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0); + + walk->nbytes = bsize; + walk->flags |= BLKCIPHER_WALK_SLOW; + + return 0; +} + +static inline int blkcipher_next_copy(struct blkcipher_walk *walk) +{ + u8 *tmp = walk->page; + + blkcipher_map_src(walk); + memcpy(tmp, walk->src.virt.addr, walk->nbytes); + blkcipher_unmap_src(walk); + + walk->src.virt.addr = tmp; + walk->dst.virt.addr = tmp; + + return 0; +} + +static inline int blkcipher_next_fast(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + unsigned long diff; + + walk->src.phys.page = scatterwalk_page(&walk->in); + walk->src.phys.offset = offset_in_page(walk->in.offset); + walk->dst.phys.page = scatterwalk_page(&walk->out); + walk->dst.phys.offset = offset_in_page(walk->out.offset); + + if (walk->flags & BLKCIPHER_WALK_PHYS) + return 0; + + diff = walk->src.phys.offset - walk->dst.phys.offset; + diff |= walk->src.virt.page - walk->dst.virt.page; + + blkcipher_map_src(walk); + walk->dst.virt.addr = walk->src.virt.addr; + + if (diff) { + walk->flags |= BLKCIPHER_WALK_DIFF; + blkcipher_map_dst(walk); + } + + return 0; +} + +static int blkcipher_walk_next(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct crypto_blkcipher *tfm = desc->tfm; + unsigned int alignmask = crypto_blkcipher_alignmask(tfm); + unsigned int bsize = crypto_blkcipher_blocksize(tfm); + unsigned int n; + int err; + + n = walk->total; + if (unlikely(n < bsize)) { + desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; + return blkcipher_walk_done(desc, walk, -EINVAL); + } + + walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | + BLKCIPHER_WALK_DIFF); + if (!scatterwalk_aligned(&walk->in, alignmask) || + !scatterwalk_aligned(&walk->out, alignmask)) { + walk->flags |= BLKCIPHER_WALK_COPY; + if (!walk->page) { + walk->page = (void *)__get_free_page(GFP_ATOMIC); + if (!walk->page) + n = 0; + } + } + + n = scatterwalk_clamp(&walk->in, n); + n = scatterwalk_clamp(&walk->out, n); + + if (unlikely(n < bsize)) { + err = blkcipher_next_slow(desc, walk, bsize, alignmask); + goto set_phys_lowmem; + } + + walk->nbytes = n; + if (walk->flags & BLKCIPHER_WALK_COPY) { + err = blkcipher_next_copy(walk); + goto set_phys_lowmem; + } + + return blkcipher_next_fast(desc, walk); + +set_phys_lowmem: + if (walk->flags & BLKCIPHER_WALK_PHYS) { + walk->src.phys.page = virt_to_page(walk->src.virt.addr); + walk->dst.phys.page = virt_to_page(walk->dst.virt.addr); + walk->src.phys.offset &= PAGE_SIZE - 1; + walk->dst.phys.offset &= PAGE_SIZE - 1; + } + return err; +} + +static inline int blkcipher_copy_iv(struct blkcipher_walk *walk, + struct crypto_blkcipher *tfm, + unsigned int alignmask) +{ + unsigned bs = crypto_blkcipher_blocksize(tfm); + unsigned int ivsize = crypto_blkcipher_ivsize(tfm); + unsigned int size = bs * 2 + ivsize + max(bs, ivsize) - (alignmask + 1); + u8 *iv; + + size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + walk->buffer = kmalloc(size, GFP_ATOMIC); + if (!walk->buffer) + return -ENOMEM; + + iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + iv = blkcipher_get_spot(iv, bs) + bs; + iv = blkcipher_get_spot(iv, bs) + bs; + iv = blkcipher_get_spot(iv, ivsize); + + walk->iv = memcpy(iv, walk->iv, ivsize); + return 0; +} + +int blkcipher_walk_virt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + walk->flags &= ~BLKCIPHER_WALK_PHYS; + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_walk_virt); + +int blkcipher_walk_phys(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + walk->flags |= BLKCIPHER_WALK_PHYS; + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_walk_phys); + +static int blkcipher_walk_first(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct crypto_blkcipher *tfm = desc->tfm; + unsigned int alignmask = crypto_blkcipher_alignmask(tfm); + + walk->nbytes = walk->total; + if (unlikely(!walk->total)) + return 0; + + walk->buffer = NULL; + walk->iv = desc->info; + if (unlikely(((unsigned long)walk->iv & alignmask))) { + int err = blkcipher_copy_iv(walk, tfm, alignmask); + if (err) + return err; + } + + scatterwalk_start(&walk->in, walk->in.sg); + scatterwalk_start(&walk->out, walk->out.sg); + walk->page = NULL; + + return blkcipher_walk_next(desc, walk); +} + +static int setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher; + + if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + return cipher->setkey(tfm, key, keylen); +} + +static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg) +{ + struct blkcipher_alg *cipher = &alg->cra_blkcipher; + unsigned int len = alg->cra_ctxsize; + + if (cipher->ivsize) { + len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1); + len += cipher->ivsize; + } + + return len; +} + +static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm) +{ + struct blkcipher_tfm *crt = &tfm->crt_blkcipher; + struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher; + unsigned long align = crypto_tfm_alg_alignmask(tfm) + 1; + unsigned long addr; + + if (alg->ivsize > PAGE_SIZE / 8) + return -EINVAL; + + crt->setkey = setkey; + crt->encrypt = alg->encrypt; + crt->decrypt = alg->decrypt; + + addr = (unsigned long)crypto_tfm_ctx(tfm); + addr = ALIGN(addr, align); + addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align); + crt->iv = (void *)addr; + + return 0; +} + +static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) + __attribute_used__; +static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_printf(m, "type : blkcipher\n"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "min keysize : %u\n", alg->cra_blkcipher.min_keysize); + seq_printf(m, "max keysize : %u\n", alg->cra_blkcipher.max_keysize); + seq_printf(m, "ivsize : %u\n", alg->cra_blkcipher.ivsize); +} + +const struct crypto_type crypto_blkcipher_type = { + .ctxsize = crypto_blkcipher_ctxsize, + .init = crypto_init_blkcipher_ops, +#ifdef CONFIG_PROC_FS + .show = crypto_blkcipher_show, +#endif +}; +EXPORT_SYMBOL_GPL(crypto_blkcipher_type); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic block chaining cipher type"); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f21ae672e8a8..f3946baf0c07 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -55,6 +55,34 @@ struct scatter_walk { unsigned int offset; }; +struct blkcipher_walk { + union { + struct { + struct page *page; + unsigned long offset; + } phys; + + struct { + u8 *page; + u8 *addr; + } virt; + } src, dst; + + struct scatter_walk in; + unsigned int nbytes; + + struct scatter_walk out; + unsigned int total; + + void *page; + u8 *buffer; + u8 *iv; + + int flags; +}; + +extern const struct crypto_type crypto_blkcipher_type; + int crypto_register_template(struct crypto_template *tmpl); void crypto_unregister_template(struct crypto_template *tmpl); struct crypto_template *crypto_lookup_template(const char *name); @@ -69,15 +97,52 @@ struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len, struct crypto_instance *crypto_alloc_instance(const char *name, struct crypto_alg *alg); +int blkcipher_walk_done(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, int err); +int blkcipher_walk_virt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); +int blkcipher_walk_phys(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); + +static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm) +{ + unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); + unsigned long align = crypto_tfm_alg_alignmask(tfm); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (void *)ALIGN(addr, align); +} + static inline void *crypto_instance_ctx(struct crypto_instance *inst) { return inst->__ctx; } +static inline void *crypto_blkcipher_ctx(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_ctx(&tfm->base); +} + +static inline void *crypto_blkcipher_ctx_aligned(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_ctx_aligned(&tfm->base); +} + static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) { return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; } +static inline void blkcipher_walk_init(struct blkcipher_walk *walk, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + walk->in.sg = src; + walk->out.sg = dst; + walk->total = nbytes; +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fdecee83878c..5a5466d518e8 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -32,6 +32,7 @@ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_DIGEST 0x00000002 +#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000003 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 #define CRYPTO_ALG_LARVAL 0x00000010 @@ -89,9 +90,16 @@ #endif struct scatterlist; +struct crypto_blkcipher; struct crypto_tfm; struct crypto_type; +struct blkcipher_desc { + struct crypto_blkcipher *tfm; + void *info; + u32 flags; +}; + struct cipher_desc { struct crypto_tfm *tfm; void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); @@ -104,6 +112,21 @@ struct cipher_desc { * Algorithms: modular crypto algorithm implementations, managed * via crypto_register_alg() and crypto_unregister_alg(). */ +struct blkcipher_alg { + int (*setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes); + int (*decrypt)(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes); + + unsigned int min_keysize; + unsigned int max_keysize; + unsigned int ivsize; +}; + struct cipher_alg { unsigned int cia_min_keysize; unsigned int cia_max_keysize; @@ -143,6 +166,7 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; +#define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest #define cra_compress cra_u.compress @@ -165,6 +189,7 @@ struct crypto_alg { const struct crypto_type *cra_type; union { + struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct digest_alg digest; struct compress_alg compress; @@ -201,6 +226,16 @@ static inline int crypto_alg_available(const char *name, u32 flags) * crypto_free_*(), as well as the various helpers below. */ +struct blkcipher_tfm { + void *iv; + int (*setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); + int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); +}; + struct cipher_tfm { void *cit_iv; unsigned int cit_ivsize; @@ -251,6 +286,7 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; +#define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_digest crt_u.digest #define crt_compress crt_u.compress @@ -260,6 +296,7 @@ struct crypto_tfm { u32 crt_flags; union { + struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct digest_tfm digest; struct compress_tfm compress; @@ -272,6 +309,10 @@ struct crypto_tfm { #define crypto_cipher crypto_tfm +struct crypto_blkcipher { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -380,6 +421,144 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) /* * API wrappers. */ +static inline struct crypto_blkcipher *__crypto_blkcipher_cast( + struct crypto_tfm *tfm) +{ + return (struct crypto_blkcipher *)tfm; +} + +static inline struct crypto_blkcipher *crypto_blkcipher_cast( + struct crypto_tfm *tfm) +{ + BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_BLKCIPHER); + return __crypto_blkcipher_cast(tfm); +} + +static inline struct crypto_blkcipher *crypto_alloc_blkcipher( + const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_blkcipher_tfm( + struct crypto_blkcipher *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm) +{ + crypto_free_tfm(crypto_blkcipher_tfm(tfm)); +} + +static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm)); +} + +static inline struct blkcipher_tfm *crypto_blkcipher_crt( + struct crypto_blkcipher *tfm) +{ + return &crypto_blkcipher_tfm(tfm)->crt_blkcipher; +} + +static inline struct blkcipher_alg *crypto_blkcipher_alg( + struct crypto_blkcipher *tfm) +{ + return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher; +} + +static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm) +{ + return crypto_blkcipher_alg(tfm)->ivsize; +} + +static inline unsigned int crypto_blkcipher_blocksize( + struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm)); +} + +static inline unsigned int crypto_blkcipher_alignmask( + struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm)); +} + +static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm)); +} + +static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags); +} + +static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags); +} + +static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm), + key, keylen); +} + +static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + desc->info = crypto_blkcipher_crt(desc->tfm)->iv; + return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); +} + +static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); +} + +static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + desc->info = crypto_blkcipher_crt(desc->tfm)->iv; + return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); +} + +static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); +} + +static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm, + const u8 *src, unsigned int len) +{ + memcpy(crypto_blkcipher_crt(tfm)->iv, src, len); +} + +static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm, + u8 *dst, unsigned int len) +{ + memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len); +} + static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) { return (struct crypto_cipher *)tfm; -- cgit v1.2.3-71-gd317 From 378c6697a282c383d89428380a3405bf95189347 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 20:33:54 +1000 Subject: [SUNRPC] GSS: Use block ciphers where applicable This patch converts SUNRPC/GSS to use the new block cipher type where applicable. Signed-off-by: Herbert Xu --- include/linux/sunrpc/gss_krb5.h | 19 ++++++------ include/linux/sunrpc/gss_spkm3.h | 4 +-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 57 ++++++++++++++++++++--------------- net/sunrpc/auth_gss/gss_krb5_mech.c | 24 ++++++++------- net/sunrpc/auth_gss/gss_krb5_seqnum.c | 4 +-- net/sunrpc/auth_gss/gss_krb5_wrap.c | 4 +-- net/sunrpc/auth_gss/gss_spkm3_mech.c | 29 +++++++++--------- 7 files changed, 76 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 1279280d7196..e30ba201910a 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -46,8 +46,8 @@ struct krb5_ctx { unsigned char seed[16]; int signalg; int sealalg; - struct crypto_tfm *enc; - struct crypto_tfm *seq; + struct crypto_blkcipher *enc; + struct crypto_blkcipher *seq; s32 endtime; u32 seq_send; struct xdr_netobj mech_used; @@ -136,26 +136,27 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, u32 -krb5_encrypt(struct crypto_tfm * key, +krb5_encrypt(struct crypto_blkcipher *key, void *iv, void *in, void *out, int length); u32 -krb5_decrypt(struct crypto_tfm * key, +krb5_decrypt(struct crypto_blkcipher *key, void *iv, void *in, void *out, int length); int -gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, - struct page **pages); +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf, + int offset, struct page **pages); int -gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf, + int offset); s32 -krb5_make_seq_num(struct crypto_tfm * key, +krb5_make_seq_num(struct crypto_blkcipher *key, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf); s32 -krb5_get_seq_num(struct crypto_tfm * key, +krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, int *direction, s32 * seqnum); diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index 336e218c2782..2cf3fbb40b4f 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h @@ -19,9 +19,9 @@ struct spkm3_ctx { unsigned int req_flags ; struct xdr_netobj share_key; int conf_alg; - struct crypto_tfm* derived_conf_key; + struct crypto_blkcipher *derived_conf_key; int intg_alg; - struct crypto_tfm* derived_integ_key; + struct crypto_blkcipher *derived_integ_key; int keyestb_alg; /* alg used to get share_key */ int owf_alg; /* one way function */ }; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 76b969e6904f..57192dfe3065 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -49,7 +49,7 @@ u32 krb5_encrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -58,26 +58,27 @@ krb5_encrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_encrypt: output data:\n"); print_hexl((u32 *)out, length, 0); @@ -90,7 +91,7 @@ EXPORT_SYMBOL(krb5_encrypt); u32 krb5_decrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -99,25 +100,26 @@ krb5_decrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_decrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_decrypt: output_data:\n"); print_hexl((u32 *)out, length, 0); @@ -240,7 +242,7 @@ EXPORT_SYMBOL(make_checksum); struct encryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; int pos; struct xdr_buf *outbuf; struct page **pages; @@ -285,8 +287,8 @@ encryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, - thislen, desc->iv); + ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, + desc->infrags, thislen); if (ret) return ret; if (fraglen) { @@ -305,16 +307,18 @@ encryptor(struct scatterlist *sg, void *data) } int -gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, - struct page **pages) +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset, struct page **pages) { int ret; struct encryptor_desc desc; - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.pos = offset; desc.outbuf = buf; desc.pages = pages; @@ -329,7 +333,7 @@ EXPORT_SYMBOL(gss_encrypt_xdr_buf); struct decryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; struct scatterlist frags[4]; int fragno; int fraglen; @@ -355,8 +359,8 @@ decryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, - thislen, desc->iv); + ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, + desc->frags, thislen); if (ret) return ret; if (fraglen) { @@ -373,15 +377,18 @@ decryptor(struct scatterlist *sg, void *data) } int -gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset) { struct decryptor_desc desc; /* XXXJBF: */ - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 70e1e53a632b..325e72e4fd31 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -34,6 +34,7 @@ * */ +#include #include #include #include @@ -78,10 +79,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res) +get_key(const void *p, const void *end, struct crypto_blkcipher **res) { struct xdr_netobj key; - int alg, alg_mode; + int alg; char *alg_name; p = simple_get_bytes(p, end, &alg, sizeof(alg)); @@ -93,18 +94,19 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) switch (alg) { case ENCTYPE_DES_CBC_RAW: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; break; default: printk("gss_kerberos_mech: unsupported algorithm %d\n", alg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name); goto out_err_free_tfm; } @@ -113,7 +115,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: kfree(key.data); p = ERR_PTR(-EINVAL); @@ -172,9 +174,9 @@ gss_import_sec_context_kerberos(const void *p, return 0; out_err_free_key2: - crypto_free_tfm(ctx->seq); + crypto_free_blkcipher(ctx->seq); out_err_free_key1: - crypto_free_tfm(ctx->enc); + crypto_free_blkcipher(ctx->enc); out_err_free_mech: kfree(ctx->mech_used.data); out_err_free_ctx: @@ -187,8 +189,8 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - crypto_free_tfm(kctx->seq); - crypto_free_tfm(kctx->enc); + crypto_free_blkcipher(kctx->seq); + crypto_free_blkcipher(kctx->enc); kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c53ead39118d..c604baf3a5f6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -41,7 +41,7 @@ #endif s32 -krb5_make_seq_num(struct crypto_tfm *key, +krb5_make_seq_num(struct crypto_blkcipher *key, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) @@ -62,7 +62,7 @@ krb5_make_seq_num(struct crypto_tfm *key, } s32 -krb5_get_seq_num(struct crypto_tfm *key, +krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, int *direction, s32 * seqnum) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 89d1f3e14128..f179415d0c38 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, goto out_err; } - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; @@ -346,7 +346,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); data_start = ptr + 22 + blocksize; orig_start = buf->head[0].iov_base + offset; data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 88dcb52d171b..bdedf456bc17 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -34,6 +34,7 @@ * */ +#include #include #include #include @@ -83,10 +84,11 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) +get_key(const void *p, const void *end, struct crypto_blkcipher **res, + int *resalg) { struct xdr_netobj key = { 0 }; - int alg_mode,setkey = 0; + int setkey = 0; char *alg_name; p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); @@ -98,14 +100,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) switch (*resalg) { case NID_des_cbc: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; setkey = 1; break; case NID_cast5_cbc: /* XXXX here in name only, not used */ - alg_name = "cast5"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(cast5)"; setkey = 0; /* XXX will need to set to 1 */ break; case NID_md5: @@ -113,19 +113,20 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); } alg_name = "md5"; - alg_mode = 0; setkey = 0; break; default: dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } if (setkey) { - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); goto out_err_free_tfm; } @@ -136,7 +137,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: if(key.len > 0) kfree(key.data); @@ -204,9 +205,9 @@ gss_import_sec_context_spkm3(const void *p, size_t len, return 0; out_err_free_key2: - crypto_free_tfm(ctx->derived_integ_key); + crypto_free_blkcipher(ctx->derived_integ_key); out_err_free_key1: - crypto_free_tfm(ctx->derived_conf_key); + crypto_free_blkcipher(ctx->derived_conf_key); out_err_free_s_key: kfree(ctx->share_key.data); out_err_free_mech: @@ -223,8 +224,8 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - crypto_free_tfm(sctx->derived_integ_key); - crypto_free_tfm(sctx->derived_conf_key); + crypto_free_blkcipher(sctx->derived_integ_key); + crypto_free_blkcipher(sctx->derived_conf_key); kfree(sctx->share_key.data); kfree(sctx->mech_used.data); kfree(sctx); -- cgit v1.2.3-71-gd317 From 03fd9cee7f46dddcd2562bc175d2c348502ce281 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 14 Aug 2006 23:11:53 +1000 Subject: [PATCH] scatterlist: Add const to sg_set_buf/sg_init_one pointer argument This patch adds a const modifier to the buf argument of sg_set_buf and sg_init_one. This lets people call it with pointers that are const. Signed-off-by: Herbert Xu --- include/linux/scatterlist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 66ff545552f7..4efbd9c445f5 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -5,7 +5,7 @@ #include #include -static inline void sg_set_buf(struct scatterlist *sg, void *buf, +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { sg->page = virt_to_page(buf); @@ -13,7 +13,7 @@ static inline void sg_set_buf(struct scatterlist *sg, void *buf, sg->length = buflen; } -static inline void sg_init_one(struct scatterlist *sg, void *buf, +static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) { memset(sg, 0, sizeof(*sg)); -- cgit v1.2.3-71-gd317 From 7226bc877a22244e8003924031435a4bffd52654 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:40:49 +1000 Subject: [CRYPTO] api: Mark parts of cipher interface as deprecated Mark the parts of the cipher interface that have been replaced by block ciphers as deprecated. Thanks to Andrew Morton for suggesting doing this before removing them completely. Signed-off-by: Herbert Xu --- crypto/cipher.c | 34 ++++++++++++++++++++++++++++------ include/linux/crypto.h | 48 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 64 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/crypto/cipher.c b/crypto/cipher.c index 326461780673..9e03701cfdcc 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -23,6 +23,28 @@ #include "internal.h" #include "scatterwalk.h" +struct cipher_alg_compat { + unsigned int cia_min_keysize; + unsigned int cia_max_keysize; + int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + + unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); +}; + static inline void xor_64(u8 *a, const u8 *b) { ((u32 *)a)[0] ^= ((u32 *)b)[0]; @@ -276,7 +298,7 @@ static int ecb_encrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_encrypt; @@ -291,7 +313,7 @@ static int ecb_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_decrypt; @@ -306,7 +328,7 @@ static int cbc_encrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_encrypt; @@ -322,7 +344,7 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_encrypt; @@ -338,7 +360,7 @@ static int cbc_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_decrypt; @@ -354,7 +376,7 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_decrypt; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 5a5466d518e8..0be666b50463 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -137,16 +136,16 @@ struct cipher_alg { unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; }; struct digest_alg { @@ -358,18 +357,23 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } +static unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm) + __deprecated; static inline unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); return tfm->__crt_alg->cra_cipher.cia_min_keysize; } +static unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm) + __deprecated; static inline unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); return tfm->__crt_alg->cra_cipher.cia_max_keysize; } +static unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) __deprecated; static inline unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); @@ -622,6 +626,13 @@ static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); } +static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm), + key, keylen); +} + static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src) { @@ -671,13 +682,10 @@ static inline int crypto_digest_setkey(struct crypto_tfm *tfm, return tfm->crt_digest.dit_setkey(tfm, key, keylen); } -static inline int crypto_cipher_setkey(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - return tfm->crt_cipher.cit_setkey(tfm, key, keylen); -} - +static int crypto_cipher_encrypt(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) __deprecated; static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -687,6 +695,10 @@ static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_encrypt(tfm, dst, src, nbytes); } +static int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, u8 *iv) __deprecated; static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -696,6 +708,10 @@ static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv); } +static int crypto_cipher_decrypt(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) __deprecated; static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -705,6 +721,10 @@ static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_decrypt(tfm, dst, src, nbytes); } +static int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, u8 *iv) __deprecated; static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -714,6 +734,8 @@ static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv); } +static void crypto_cipher_set_iv(struct crypto_tfm *tfm, + const u8 *src, unsigned int len) __deprecated; static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm, const u8 *src, unsigned int len) { @@ -721,6 +743,8 @@ static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm, memcpy(tfm->crt_cipher.cit_iv, src, len); } +static void crypto_cipher_get_iv(struct crypto_tfm *tfm, + u8 *dst, unsigned int len) __deprecated; static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm, u8 *dst, unsigned int len) { -- cgit v1.2.3-71-gd317 From 055bcee3102dc35f019b69df9c2618e9d6dd1c09 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 19 Aug 2006 22:24:23 +1000 Subject: [CRYPTO] digest: Added user API for new hash type The existing digest user interface is inadequate for support asynchronous operations. For one it doesn't return a value to indicate success or failure, nor does it take a per-operation descriptor which is essential for the issuing of requests while other requests are still outstanding. This patch is the first in a series of steps to remodel the interface for asynchronous operations. For the ease of transition the new interface will be known as "hash" while the old one will remain as "digest". This patch also changes sg_next to allow chaining. Signed-off-by: Herbert Xu --- crypto/Kconfig | 4 ++ crypto/Makefile | 3 + crypto/digest.c | 129 ++++++++++++++++++++++++++++-------- crypto/hash.c | 61 +++++++++++++++++ crypto/hmac.c | 12 ++-- crypto/scatterwalk.h | 4 +- include/crypto/algapi.h | 6 ++ include/linux/crypto.h | 172 ++++++++++++++++++++++++++++++++++++++---------- 8 files changed, 321 insertions(+), 70 deletions(-) create mode 100644 crypto/hash.c (limited to 'include/linux') diff --git a/crypto/Kconfig b/crypto/Kconfig index be5eb0cb7c30..69c5f992bcd4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -20,6 +20,10 @@ config CRYPTO_BLKCIPHER tristate select CRYPTO_ALGAPI +config CRYPTO_HASH + tristate + select CRYPTO_ALGAPI + config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 5e1ff4e0b1fc..72366208e291 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -10,6 +10,9 @@ obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o +crypto_hash-objs := hash.o +obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o + obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o diff --git a/crypto/digest.c b/crypto/digest.c index 96244a528844..5873063db840 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -11,29 +11,89 @@ * any later version. * */ -#include + #include #include #include -#include +#include +#include + #include "internal.h" +#include "scatterwalk.h" -static void init(struct crypto_tfm *tfm) +void crypto_digest_init(struct crypto_tfm *tfm) { - tfm->__crt_alg->cra_digest.dia_init(tfm); + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + + crypto_hash_init(&desc); } +EXPORT_SYMBOL_GPL(crypto_digest_init); -static void update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) +void crypto_digest_update(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg) { + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + unsigned int nbytes = 0; unsigned int i; + + for (i = 0; i < nsg; i++) + nbytes += sg[i].length; + + crypto_hash_update(&desc, sg, nbytes); +} +EXPORT_SYMBOL_GPL(crypto_digest_update); + +void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) +{ + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + + crypto_hash_final(&desc, out); +} +EXPORT_SYMBOL_GPL(crypto_digest_final); + +void crypto_digest_digest(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg, u8 *out) +{ + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + unsigned int nbytes = 0; + unsigned int i; + + for (i = 0; i < nsg; i++) + nbytes += sg[i].length; + + crypto_hash_digest(&desc, sg, nbytes, out); +} +EXPORT_SYMBOL_GPL(crypto_digest_digest); + +static int init(struct hash_desc *desc) +{ + struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); + + tfm->__crt_alg->cra_digest.dia_init(tfm); + return 0; +} + +static int update(struct hash_desc *desc, + struct scatterlist *sg, unsigned int nbytes) +{ + struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); - for (i = 0; i < nsg; i++) { + if (!nbytes) + return 0; + + for (;;) { + struct page *pg = sg->page; + unsigned int offset = sg->offset; + unsigned int l = sg->length; - struct page *pg = sg[i].page; - unsigned int offset = sg[i].offset; - unsigned int l = sg[i].length; + if (unlikely(l > nbytes)) + l = nbytes; + nbytes -= l; do { unsigned int bytes_from_page = min(l, ((unsigned int) @@ -55,16 +115,23 @@ static void update(struct crypto_tfm *tfm, tfm->__crt_alg->cra_digest.dia_update(tfm, p, bytes_from_page); crypto_kunmap(src, 0); - crypto_yield(tfm->crt_flags); + crypto_yield(desc->flags); offset = 0; pg++; l -= bytes_from_page; } while (l > 0); + + if (!nbytes) + break; + sg = sg_next(sg); } + + return 0; } -static void final(struct crypto_tfm *tfm, u8 *out) +static int final(struct hash_desc *desc, u8 *out) { + struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); struct digest_alg *digest = &tfm->__crt_alg->cra_digest; @@ -78,26 +145,30 @@ static void final(struct crypto_tfm *tfm, u8 *out) memcpy(out, dst, digest->dia_digestsize); } else digest->dia_final(tfm, out); + + return 0; } -static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK); return -ENOSYS; } -static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + struct crypto_tfm *tfm = crypto_hash_tfm(hash); + + crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK); return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); } -static void digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out) +static int digest(struct hash_desc *desc, + struct scatterlist *sg, unsigned int nbytes, u8 *out) { - init(tfm); - update(tfm, sg, nsg); - final(tfm, out); + init(desc); + update(desc, sg, nbytes); + return final(desc, out); } int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags) @@ -107,14 +178,18 @@ int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags) int crypto_init_digest_ops(struct crypto_tfm *tfm) { - struct digest_tfm *ops = &tfm->crt_digest; + struct hash_tfm *ops = &tfm->crt_hash; struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; + + if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm)) + return -EINVAL; - ops->dit_init = init; - ops->dit_update = update; - ops->dit_final = final; - ops->dit_digest = digest; - ops->dit_setkey = dalg->dia_setkey ? setkey : nosetkey; + ops->init = init; + ops->update = update; + ops->final = final; + ops->digest = digest; + ops->setkey = dalg->dia_setkey ? setkey : nosetkey; + ops->digestsize = dalg->dia_digestsize; return crypto_alloc_hmac_block(tfm); } diff --git a/crypto/hash.c b/crypto/hash.c new file mode 100644 index 000000000000..cdec23d885fe --- /dev/null +++ b/crypto/hash.c @@ -0,0 +1,61 @@ +/* + * Cryptographic Hash operations. + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include +#include + +#include "internal.h" + +static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg) +{ + return alg->cra_ctxsize; +} + +static int crypto_init_hash_ops(struct crypto_tfm *tfm) +{ + struct hash_tfm *crt = &tfm->crt_hash; + struct hash_alg *alg = &tfm->__crt_alg->cra_hash; + + if (alg->digestsize > crypto_tfm_alg_blocksize(tfm)) + return -EINVAL; + + crt->init = alg->init; + crt->update = alg->update; + crt->final = alg->final; + crt->digest = alg->digest; + crt->setkey = alg->setkey; + crt->digestsize = alg->digestsize; + + return 0; +} + +static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) + __attribute_used__; +static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_printf(m, "type : hash\n"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "digestsize : %u\n", alg->cra_hash.digestsize); +} + +const struct crypto_type crypto_hash_type = { + .ctxsize = crypto_hash_ctxsize, + .init = crypto_init_hash_ops, +#ifdef CONFIG_PROC_FS + .show = crypto_hash_show, +#endif +}; +EXPORT_SYMBOL_GPL(crypto_hash_type); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic cryptographic hash type"); diff --git a/crypto/hmac.c b/crypto/hmac.c index 46120dee5ada..ecf7b0a95b56 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -35,9 +35,9 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm) BUG_ON(!crypto_tfm_alg_blocksize(tfm)); - tfm->crt_digest.dit_hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm), - GFP_KERNEL); - if (tfm->crt_digest.dit_hmac_block == NULL) + tfm->crt_hash.hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm), + GFP_KERNEL); + if (tfm->crt_hash.hmac_block == NULL) ret = -ENOMEM; return ret; @@ -46,14 +46,14 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm) void crypto_free_hmac_block(struct crypto_tfm *tfm) { - kfree(tfm->crt_digest.dit_hmac_block); + kfree(tfm->crt_hash.hmac_block); } void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen) { unsigned int i; struct scatterlist tmp; - char *ipad = tfm->crt_digest.dit_hmac_block; + char *ipad = tfm->crt_hash.hmac_block; if (*keylen > crypto_tfm_alg_blocksize(tfm)) { hash_key(tfm, key, *keylen); @@ -83,7 +83,7 @@ void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key, { unsigned int i; struct scatterlist tmp; - char *opad = tfm->crt_digest.dit_hmac_block; + char *opad = tfm->crt_hash.hmac_block; if (*keylen > crypto_tfm_alg_blocksize(tfm)) { hash_key(tfm, key, *keylen); diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index ace595a2e119..f1592cc2d0f4 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -20,11 +20,9 @@ #include "internal.h" -/* Define sg_next is an inline routine now in case we want to change - scatterlist to a linked list later. */ static inline struct scatterlist *sg_next(struct scatterlist *sg) { - return sg + 1; + return (++sg)->length ? sg : (void *)sg->page; } static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in, diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 444f602724db..5748aecdb414 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -82,6 +82,7 @@ struct blkcipher_walk { }; extern const struct crypto_type crypto_blkcipher_type; +extern const struct crypto_type crypto_hash_type; void crypto_mod_put(struct crypto_alg *alg); @@ -136,6 +137,11 @@ static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; } +static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm) +{ + return crypto_tfm_ctx_aligned(&tfm->base); +} + static inline void blkcipher_walk_init(struct blkcipher_walk *walk, struct scatterlist *dst, struct scatterlist *src, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 0be666b50463..40c0aab8ad4c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -31,8 +31,11 @@ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_DIGEST 0x00000002 -#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000003 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 +#define CRYPTO_ALG_TYPE_HASH 0x00000003 +#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 +#define CRYPTO_ALG_TYPE_COMPRESS 0x00000005 + +#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 @@ -90,6 +93,7 @@ struct scatterlist; struct crypto_blkcipher; +struct crypto_hash; struct crypto_tfm; struct crypto_type; @@ -107,6 +111,11 @@ struct cipher_desc { void *info; }; +struct hash_desc { + struct crypto_hash *tfm; + u32 flags; +}; + /* * Algorithms: modular crypto algorithm implementations, managed * via crypto_register_alg() and crypto_unregister_alg(). @@ -158,6 +167,19 @@ struct digest_alg { unsigned int keylen); }; +struct hash_alg { + int (*init)(struct hash_desc *desc); + int (*update)(struct hash_desc *desc, struct scatterlist *sg, + unsigned int nbytes); + int (*final)(struct hash_desc *desc, u8 *out); + int (*digest)(struct hash_desc *desc, struct scatterlist *sg, + unsigned int nbytes, u8 *out); + int (*setkey)(struct crypto_hash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; +}; + struct compress_alg { int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); @@ -168,6 +190,7 @@ struct compress_alg { #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest +#define cra_hash cra_u.hash #define cra_compress cra_u.compress struct crypto_alg { @@ -191,6 +214,7 @@ struct crypto_alg { struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct digest_alg digest; + struct hash_alg hash; struct compress_alg compress; } cra_u; @@ -262,18 +286,19 @@ struct cipher_tfm { void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct digest_tfm { - void (*dit_init)(struct crypto_tfm *tfm); - void (*dit_update)(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg); - void (*dit_final)(struct crypto_tfm *tfm, u8 *out); - void (*dit_digest)(struct crypto_tfm *tfm, struct scatterlist *sg, - unsigned int nsg, u8 *out); - int (*dit_setkey)(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen); +struct hash_tfm { + int (*init)(struct hash_desc *desc); + int (*update)(struct hash_desc *desc, + struct scatterlist *sg, unsigned int nsg); + int (*final)(struct hash_desc *desc, u8 *out); + int (*digest)(struct hash_desc *desc, struct scatterlist *sg, + unsigned int nsg, u8 *out); + int (*setkey)(struct crypto_hash *tfm, const u8 *key, + unsigned int keylen); #ifdef CONFIG_CRYPTO_HMAC - void *dit_hmac_block; + void *hmac_block; #endif + unsigned int digestsize; }; struct compress_tfm { @@ -287,7 +312,7 @@ struct compress_tfm { #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher -#define crt_digest crt_u.digest +#define crt_hash crt_u.hash #define crt_compress crt_u.compress struct crypto_tfm { @@ -297,7 +322,7 @@ struct crypto_tfm { union { struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; - struct digest_tfm digest; + struct hash_tfm hash; struct compress_tfm compress; } crt_u; @@ -312,6 +337,10 @@ struct crypto_blkcipher { struct crypto_tfm base; }; +struct crypto_hash { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -647,39 +676,114 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, dst, src); } -static inline void crypto_digest_init(struct crypto_tfm *tfm) +void crypto_digest_init(struct crypto_tfm *tfm); +void crypto_digest_update(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg); +void crypto_digest_final(struct crypto_tfm *tfm, u8 *out); +void crypto_digest_digest(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg, u8 *out); + +static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_init(tfm); + return (struct crypto_hash *)tfm; } -static inline void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, - unsigned int nsg) +static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_update(tfm, sg, nsg); + BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_HASH) & + CRYPTO_ALG_TYPE_HASH_MASK); + return __crypto_hash_cast(tfm); } -static inline void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) +static inline int crypto_digest_setkey(struct crypto_tfm *tfm, + const u8 *key, unsigned int keylen) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_final(tfm, out); + return tfm->crt_hash.setkey(crypto_hash_cast(tfm), key, keylen); } -static inline void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, - unsigned int nsg, u8 *out) +static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name, + u32 type, u32 mask) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_digest(tfm, sg, nsg, out); + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_HASH; + mask |= CRYPTO_ALG_TYPE_HASH_MASK; + + return __crypto_hash_cast(crypto_alloc_base(alg_name, type, mask)); } -static inline int crypto_digest_setkey(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen) +static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - return tfm->crt_digest.dit_setkey(tfm, key, keylen); + return &tfm->base; +} + +static inline void crypto_free_hash(struct crypto_hash *tfm) +{ + crypto_free_tfm(crypto_hash_tfm(tfm)); +} + +static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm) +{ + return &crypto_hash_tfm(tfm)->crt_hash; +} + +static inline unsigned int crypto_hash_blocksize(struct crypto_hash *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_hash_tfm(tfm)); +} + +static inline unsigned int crypto_hash_alignmask(struct crypto_hash *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_hash_tfm(tfm)); +} + +static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm) +{ + return crypto_hash_crt(tfm)->digestsize; +} + +static inline u32 crypto_hash_get_flags(struct crypto_hash *tfm) +{ + return crypto_tfm_get_flags(crypto_hash_tfm(tfm)); +} + +static inline void crypto_hash_set_flags(struct crypto_hash *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_hash_tfm(tfm), flags); +} + +static inline void crypto_hash_clear_flags(struct crypto_hash *tfm, u32 flags) +{ + crypto_tfm_clear_flags(crypto_hash_tfm(tfm), flags); +} + +static inline int crypto_hash_init(struct hash_desc *desc) +{ + return crypto_hash_crt(desc->tfm)->init(desc); +} + +static inline int crypto_hash_update(struct hash_desc *desc, + struct scatterlist *sg, + unsigned int nbytes) +{ + return crypto_hash_crt(desc->tfm)->update(desc, sg, nbytes); +} + +static inline int crypto_hash_final(struct hash_desc *desc, u8 *out) +{ + return crypto_hash_crt(desc->tfm)->final(desc, out); +} + +static inline int crypto_hash_digest(struct hash_desc *desc, + struct scatterlist *sg, + unsigned int nbytes, u8 *out) +{ + return crypto_hash_crt(desc->tfm)->digest(desc, sg, nbytes, out); +} + +static inline int crypto_hash_setkey(struct crypto_hash *hash, + const u8 *key, unsigned int keylen) +{ + return crypto_hash_crt(hash)->setkey(hash, key, keylen); } static int crypto_cipher_encrypt(struct crypto_tfm *tfm, -- cgit v1.2.3-71-gd317 From 8425165dfed27945e8509c141cea245d1739e372 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 15:25:22 +1000 Subject: [CRYPTO] digest: Remove old HMAC implementation This patch removes the old HMAC implementation now that nobody uses it anymore. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/Kconfig | 2 +- crypto/digest.c | 3 +- crypto/hmac.c | 101 ------------------------------------------------- crypto/internal.h | 13 ------- include/linux/crypto.h | 16 -------- 5 files changed, 2 insertions(+), 133 deletions(-) (limited to 'include/linux') diff --git a/crypto/Kconfig b/crypto/Kconfig index f07d9237950f..1e2f39c21180 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -33,7 +33,7 @@ config CRYPTO_MANAGER cbc(aes). config CRYPTO_HMAC - bool "HMAC support" + tristate "HMAC support" select CRYPTO_HASH help HMAC: Keyed-Hashing for Message Authentication (RFC2104). diff --git a/crypto/digest.c b/crypto/digest.c index 5873063db840..0155a94e4b15 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -191,10 +191,9 @@ int crypto_init_digest_ops(struct crypto_tfm *tfm) ops->setkey = dalg->dia_setkey ? setkey : nosetkey; ops->digestsize = dalg->dia_digestsize; - return crypto_alloc_hmac_block(tfm); + return 0; } void crypto_exit_digest_ops(struct crypto_tfm *tfm) { - crypto_free_hmac_block(tfm); } diff --git a/crypto/hmac.c b/crypto/hmac.c index eac77e294740..f403b6946047 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -29,107 +29,6 @@ struct hmac_ctx { struct crypto_hash *child; }; -static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen) -{ - struct scatterlist tmp; - - sg_set_buf(&tmp, key, keylen); - crypto_digest_digest(tfm, &tmp, 1, key); -} - -int crypto_alloc_hmac_block(struct crypto_tfm *tfm) -{ - int ret = 0; - - BUG_ON(!crypto_tfm_alg_blocksize(tfm)); - - tfm->crt_hash.hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm), - GFP_KERNEL); - if (tfm->crt_hash.hmac_block == NULL) - ret = -ENOMEM; - - return ret; - -} - -void crypto_free_hmac_block(struct crypto_tfm *tfm) -{ - kfree(tfm->crt_hash.hmac_block); -} - -void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen) -{ - unsigned int i; - struct scatterlist tmp; - char *ipad = tfm->crt_hash.hmac_block; - - if (*keylen > crypto_tfm_alg_blocksize(tfm)) { - hash_key(tfm, key, *keylen); - *keylen = crypto_tfm_alg_digestsize(tfm); - } - - memset(ipad, 0, crypto_tfm_alg_blocksize(tfm)); - memcpy(ipad, key, *keylen); - - for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++) - ipad[i] ^= 0x36; - - sg_set_buf(&tmp, ipad, crypto_tfm_alg_blocksize(tfm)); - - crypto_digest_init(tfm); - crypto_digest_update(tfm, &tmp, 1); -} - -void crypto_hmac_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) -{ - crypto_digest_update(tfm, sg, nsg); -} - -void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key, - unsigned int *keylen, u8 *out) -{ - unsigned int i; - struct scatterlist tmp; - char *opad = tfm->crt_hash.hmac_block; - - if (*keylen > crypto_tfm_alg_blocksize(tfm)) { - hash_key(tfm, key, *keylen); - *keylen = crypto_tfm_alg_digestsize(tfm); - } - - crypto_digest_final(tfm, out); - - memset(opad, 0, crypto_tfm_alg_blocksize(tfm)); - memcpy(opad, key, *keylen); - - for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++) - opad[i] ^= 0x5c; - - sg_set_buf(&tmp, opad, crypto_tfm_alg_blocksize(tfm)); - - crypto_digest_init(tfm); - crypto_digest_update(tfm, &tmp, 1); - - sg_set_buf(&tmp, out, crypto_tfm_alg_digestsize(tfm)); - - crypto_digest_update(tfm, &tmp, 1); - crypto_digest_final(tfm, out); -} - -void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen, - struct scatterlist *sg, unsigned int nsg, u8 *out) -{ - crypto_hmac_init(tfm, key, keylen); - crypto_hmac_update(tfm, sg, nsg); - crypto_hmac_final(tfm, key, keylen, out); -} - -EXPORT_SYMBOL_GPL(crypto_hmac_init); -EXPORT_SYMBOL_GPL(crypto_hmac_update); -EXPORT_SYMBOL_GPL(crypto_hmac_final); -EXPORT_SYMBOL_GPL(crypto_hmac); - static inline void *align_ptr(void *p, unsigned int align) { return (void *)ALIGN((unsigned long)p, align); diff --git a/crypto/internal.h b/crypto/internal.h index 93d9b10ff914..2da6ad4f3593 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -73,19 +73,6 @@ static inline void crypto_yield(u32 flags) cond_resched(); } -#ifdef CONFIG_CRYPTO_HMAC -int crypto_alloc_hmac_block(struct crypto_tfm *tfm); -void crypto_free_hmac_block(struct crypto_tfm *tfm); -#else -static inline int crypto_alloc_hmac_block(struct crypto_tfm *tfm) -{ - return 0; -} - -static inline void crypto_free_hmac_block(struct crypto_tfm *tfm) -{ } -#endif - #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); void __exit crypto_exit_proc(void); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 40c0aab8ad4c..929fb9ad1314 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -295,9 +295,6 @@ struct hash_tfm { unsigned int nsg, u8 *out); int (*setkey)(struct crypto_hash *tfm, const u8 *key, unsigned int keylen); -#ifdef CONFIG_CRYPTO_HMAC - void *hmac_block; -#endif unsigned int digestsize; }; @@ -872,18 +869,5 @@ static inline int crypto_comp_decompress(struct crypto_tfm *tfm, return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen); } -/* - * HMAC support. - */ -#ifdef CONFIG_CRYPTO_HMAC -void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen); -void crypto_hmac_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg); -void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key, - unsigned int *keylen, u8 *out); -void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen, - struct scatterlist *sg, unsigned int nsg, u8 *out); -#endif /* CONFIG_CRYPTO_HMAC */ - #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3-71-gd317 From fce32d70ba834129b164c40c2d4260e5a7a7d850 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 17:35:45 +1000 Subject: [CRYPTO] api: Add crypto_comp and crypto_has_* This patch adds the crypto_comp type to complete the compile-time checking conversion. The functions crypto_has_alg and crypto_has_cipher, etc. are also added to replace crypto_alg_available. Signed-off-by: Herbert Xu --- crypto/api.c | 14 ++++++++ include/linux/crypto.h | 90 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index edaa843d8e83..2e84d4b54790 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -482,3 +482,17 @@ int crypto_alg_available(const char *name, u32 flags) EXPORT_SYMBOL_GPL(crypto_alloc_tfm); EXPORT_SYMBOL_GPL(crypto_free_tfm); EXPORT_SYMBOL_GPL(crypto_alg_available); + +int crypto_has_alg(const char *name, u32 type, u32 mask) +{ + int ret = 0; + struct crypto_alg *alg = crypto_alg_mod_lookup(name, type, mask); + + if (!IS_ERR(alg)) { + crypto_mod_put(alg); + ret = 1; + } + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_has_alg); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 929fb9ad1314..cf91c4c0638b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -236,11 +236,17 @@ int crypto_unregister_alg(struct crypto_alg *alg); */ #ifdef CONFIG_CRYPTO int crypto_alg_available(const char *name, u32 flags); +int crypto_has_alg(const char *name, u32 type, u32 mask); #else static inline int crypto_alg_available(const char *name, u32 flags) { return 0; } + +static inline int crypto_has_alg(const char *name, u32 type, u32 mask) +{ + return 0; +} #endif /* @@ -329,6 +335,7 @@ struct crypto_tfm { }; #define crypto_cipher crypto_tfm +#define crypto_comp crypto_tfm struct crypto_blkcipher { struct crypto_tfm base; @@ -485,6 +492,15 @@ static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm) crypto_free_tfm(crypto_blkcipher_tfm(tfm)); } +static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm) { return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm)); @@ -620,6 +636,15 @@ static inline void crypto_free_cipher(struct crypto_cipher *tfm) crypto_free_tfm(crypto_cipher_tfm(tfm)); } +static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) { return &crypto_cipher_tfm(tfm)->crt_cipher; @@ -718,6 +743,15 @@ static inline void crypto_free_hash(struct crypto_hash *tfm) crypto_free_tfm(crypto_hash_tfm(tfm)); } +static inline int crypto_has_hash(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_HASH; + mask |= CRYPTO_ALG_TYPE_HASH_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm) { return &crypto_hash_tfm(tfm)->crt_hash; @@ -853,20 +887,64 @@ static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm, memcpy(dst, tfm->crt_cipher.cit_iv, len); } -static inline int crypto_comp_compress(struct crypto_tfm *tfm, +static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_comp *)tfm; +} + +static inline struct crypto_comp *crypto_comp_cast(struct crypto_tfm *tfm) +{ + BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_COMPRESS) & + CRYPTO_ALG_TYPE_MASK); + return __crypto_comp_cast(tfm); +} + +static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_COMPRESS; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm) +{ + return tfm; +} + +static inline void crypto_free_comp(struct crypto_comp *tfm) +{ + crypto_free_tfm(crypto_comp_tfm(tfm)); +} + +static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_COMPRESS; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) +{ + return &crypto_comp_tfm(tfm)->crt_compress; +} + +static inline int crypto_comp_compress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS); - return tfm->crt_compress.cot_compress(tfm, src, slen, dst, dlen); + return crypto_comp_crt(tfm)->cot_compress(tfm, src, slen, dst, dlen); } -static inline int crypto_comp_decompress(struct crypto_tfm *tfm, +static inline int crypto_comp_decompress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS); - return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen); + return crypto_comp_crt(tfm)->cot_decompress(tfm, src, slen, dst, dlen); } #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3-71-gd317 From e4d5b79c661c7cfca9d8d5afd040a295f128d3cb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:12:40 +1000 Subject: [CRYPTO] users: Use crypto_comp and crypto_has_* This patch converts all users to use the new crypto_comp type and the crypto_has_* functions. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 8 ++++---- drivers/crypto/padlock.c | 6 +++--- drivers/net/ppp_mppe.c | 4 ++-- include/linux/crypto.h | 5 +++++ include/net/ipcomp.h | 5 ++--- net/ipv4/ipcomp.c | 25 +++++++++++++------------ net/ipv6/ipcomp6.c | 25 +++++++++++++------------ net/xfrm/xfrm_algo.c | 27 ++++++++++++++++++--------- 8 files changed, 60 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 840ab8be0b96..83307420d31c 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -749,7 +749,7 @@ static void test_deflate(void) { unsigned int i; char result[COMP_BUF_SIZE]; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; struct comp_testvec *tv; unsigned int tsize; @@ -821,7 +821,7 @@ static void test_deflate(void) ilen, dlen); } out: - crypto_free_tfm(tfm); + crypto_free_comp(tfm); } static void test_available(void) @@ -830,8 +830,8 @@ static void test_available(void) while (*name) { printk("alg %s ", *name); - printk((crypto_alg_available(*name, 0)) ? - "found\n" : "not found\n"); + printk(crypto_has_alg(*name, 0, CRYPTO_ALG_ASYNC) ? + "found\n" : "not found\n"); name++; } } diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c index ce581684f4b4..d6d7dd5bb98c 100644 --- a/drivers/crypto/padlock.c +++ b/drivers/crypto/padlock.c @@ -26,13 +26,13 @@ static int __init padlock_init(void) { int success = 0; - if (crypto_alg_available("aes-padlock", 0)) + if (crypto_has_cipher("aes-padlock", 0, 0)) success++; - if (crypto_alg_available("sha1-padlock", 0)) + if (crypto_has_hash("sha1-padlock", 0, 0)) success++; - if (crypto_alg_available("sha256-padlock", 0)) + if (crypto_has_hash("sha256-padlock", 0, 0)) success++; if (!success) { diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index e7a0eb4fca60..f3655fd772f5 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -710,8 +710,8 @@ static struct compressor ppp_mppe = { static int __init ppp_mppe_init(void) { int answer; - if (!(crypto_alg_available("ecb(arc4)", 0) && - crypto_alg_available("sha1", 0))) + if (!(crypto_has_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC) && + crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC))) return -ENODEV; sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index cf91c4c0638b..d4f9948b64b1 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -928,6 +928,11 @@ static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) return crypto_has_alg(alg_name, type, mask); } +static inline const char *crypto_comp_name(struct crypto_comp *tfm) +{ + return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); +} + static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) { return &crypto_comp_tfm(tfm)->crt_compress; diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index b94e3047b4d9..87c1af3e5e82 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -1,15 +1,14 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include #include #define IPCOMP_SCRATCH_SIZE 65400 -struct crypto_tfm; - struct ipcomp_data { u16 threshold; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; }; #endif diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a0c28b2b756e..5bb9c9f03fb6 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -32,7 +32,7 @@ struct ipcomp_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -46,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) int err, plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; plen = skb->len; @@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; ihlen = iph->ihl * 4; @@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void) return scratches; } -static void ipcomp_free_tfms(struct crypto_tfm **tfms) +static void ipcomp_free_tfms(struct crypto_comp **tfms) { struct ipcomp_tfms *pos; int cpu; @@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) { struct ipcomp_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 7e4d1c17bfbc..a81e9e9d93bd 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -53,7 +53,7 @@ struct ipcomp6_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -70,7 +70,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; if (skb_linearize_cow(skb)) @@ -129,7 +129,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) struct ipcomp_data *ipcd = x->data; int plen, dlen; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; hdr_len = skb->h.raw - skb->data; @@ -301,7 +301,7 @@ static void **ipcomp6_alloc_scratches(void) return scratches; } -static void ipcomp6_free_tfms(struct crypto_tfm **tfms) +static void ipcomp6_free_tfms(struct crypto_comp **tfms) { struct ipcomp6_tfms *pos; int cpu; @@ -323,28 +323,28 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) { struct ipcomp6_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -358,12 +358,13 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp6_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 87918f281bb4..5a0dbeb6bbe8 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -363,8 +363,8 @@ struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, - int entries, char *name, - int probe) + int entries, u32 type, u32 mask, + char *name, int probe) { int i, status; @@ -382,7 +382,7 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_alg_available(name, 0); + status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC); if (!status) break; @@ -394,19 +394,25 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) { - return xfrm_get_byname(aalg_list, aalg_entries(), name, probe); + return xfrm_get_byname(aalg_list, aalg_entries(), + CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) { - return xfrm_get_byname(ealg_list, ealg_entries(), name, probe); + return xfrm_get_byname(ealg_list, ealg_entries(), + CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) { - return xfrm_get_byname(calg_list, calg_entries(), name, probe); + return xfrm_get_byname(calg_list, calg_entries(), + CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); @@ -441,19 +447,22 @@ void xfrm_probe_algs(void) BUG_ON(in_softirq()); for (i = 0; i < aalg_entries(); i++) { - status = crypto_alg_available(aalg_list[i].name, 0); + status = crypto_has_hash(aalg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (aalg_list[i].available != status) aalg_list[i].available = status; } for (i = 0; i < ealg_entries(); i++) { - status = crypto_alg_available(ealg_list[i].name, 0); + status = crypto_has_blkcipher(ealg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (ealg_list[i].available != status) ealg_list[i].available = status; } for (i = 0; i < calg_entries(); i++) { - status = crypto_alg_available(calg_list[i].name, 0); + status = crypto_has_comp(calg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (calg_list[i].available != status) calg_list[i].available = status; } -- cgit v1.2.3-71-gd317 From 6010439f47e6b308c031dad7d99686030ef942dd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:34:10 +1000 Subject: [CRYPTO] padlock: Convert padlock-sha to use crypto_hash This patch converts padlock-sha to use crypto_hash for its fallback. It also changes the fallback selection to use selection by type instead of name. This is done through the new CRYPTO_ALG_NEED_FALLBACK bit, which is set if and only if an algorithm needs a fallback of the same type. Signed-off-by: Herbert Xu --- drivers/crypto/padlock-sha.c | 91 +++++++++++++++++--------------------------- include/linux/crypto.h | 6 +++ 2 files changed, 41 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index b028db61c301..a781fd23b607 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -12,10 +12,11 @@ * */ +#include +#include #include #include #include -#include #include #include #include @@ -30,28 +31,17 @@ #define SHA256_DIGEST_SIZE 32 #define SHA256_HMAC_BLOCK_SIZE 64 -static char *sha1_fallback = SHA1_DEFAULT_FALLBACK; -static char *sha256_fallback = SHA256_DEFAULT_FALLBACK; - -module_param(sha1_fallback, charp, 0644); -module_param(sha256_fallback, charp, 0644); - -MODULE_PARM_DESC(sha1_fallback, "Fallback driver for SHA1. Default is " - SHA1_DEFAULT_FALLBACK); -MODULE_PARM_DESC(sha256_fallback, "Fallback driver for SHA256. Default is " - SHA256_DEFAULT_FALLBACK); - struct padlock_sha_ctx { char *data; size_t used; int bypass; void (*f_sha_padlock)(const char *in, char *out, int count); - struct crypto_tfm *fallback_tfm; + struct hash_desc fallback; }; static inline struct padlock_sha_ctx *ctx(struct crypto_tfm *tfm) { - return (struct padlock_sha_ctx *)(crypto_tfm_ctx(tfm)); + return crypto_tfm_ctx(tfm); } /* We'll need aligned address on the stack */ @@ -65,14 +55,12 @@ static void padlock_sha_bypass(struct crypto_tfm *tfm) if (ctx(tfm)->bypass) return; - BUG_ON(!ctx(tfm)->fallback_tfm); - - crypto_digest_init(ctx(tfm)->fallback_tfm); + crypto_hash_init(&ctx(tfm)->fallback); if (ctx(tfm)->data && ctx(tfm)->used) { struct scatterlist sg; sg_set_buf(&sg, ctx(tfm)->data, ctx(tfm)->used); - crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1); + crypto_hash_update(&ctx(tfm)->fallback, &sg, sg.length); } ctx(tfm)->used = 0; @@ -95,9 +83,8 @@ static void padlock_sha_update(struct crypto_tfm *tfm, if (unlikely(ctx(tfm)->bypass)) { struct scatterlist sg; - BUG_ON(!ctx(tfm)->fallback_tfm); sg_set_buf(&sg, (uint8_t *)data, length); - crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1); + crypto_hash_update(&ctx(tfm)->fallback, &sg, length); return; } @@ -160,8 +147,7 @@ static void padlock_do_sha256(const char *in, char *out, int count) static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out) { if (unlikely(ctx(tfm)->bypass)) { - BUG_ON(!ctx(tfm)->fallback_tfm); - crypto_digest_final(ctx(tfm)->fallback_tfm, out); + crypto_hash_final(&ctx(tfm)->fallback, out); ctx(tfm)->bypass = 0; return; } @@ -172,8 +158,11 @@ static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out) ctx(tfm)->used = 0; } -static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_name) +static int padlock_cra_init(struct crypto_tfm *tfm) { + const char *fallback_driver_name = tfm->__crt_alg->cra_name; + struct crypto_hash *fallback_tfm; + /* For now we'll allocate one page. This * could eventually be configurable one day. */ ctx(tfm)->data = (char *)__get_free_page(GFP_KERNEL); @@ -181,14 +170,17 @@ static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_ return -ENOMEM; /* Allocate a fallback and abort if it failed. */ - ctx(tfm)->fallback_tfm = crypto_alloc_tfm(fallback_driver_name, 0); - if (!ctx(tfm)->fallback_tfm) { + fallback_tfm = crypto_alloc_hash(fallback_driver_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback_tfm)) { printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n", fallback_driver_name); free_page((unsigned long)(ctx(tfm)->data)); - return -ENOENT; + return PTR_ERR(fallback_tfm); } + ctx(tfm)->fallback.tfm = fallback_tfm; return 0; } @@ -196,14 +188,14 @@ static int padlock_sha1_cra_init(struct crypto_tfm *tfm) { ctx(tfm)->f_sha_padlock = padlock_do_sha1; - return padlock_cra_init(tfm, sha1_fallback); + return padlock_cra_init(tfm); } static int padlock_sha256_cra_init(struct crypto_tfm *tfm) { ctx(tfm)->f_sha_padlock = padlock_do_sha256; - return padlock_cra_init(tfm, sha256_fallback); + return padlock_cra_init(tfm); } static void padlock_cra_exit(struct crypto_tfm *tfm) @@ -213,16 +205,16 @@ static void padlock_cra_exit(struct crypto_tfm *tfm) ctx(tfm)->data = NULL; } - BUG_ON(!ctx(tfm)->fallback_tfm); - crypto_free_tfm(ctx(tfm)->fallback_tfm); - ctx(tfm)->fallback_tfm = NULL; + crypto_free_hash(ctx(tfm)->fallback.tfm); + ctx(tfm)->fallback.tfm = NULL; } static struct crypto_alg sha1_alg = { .cra_name = "sha1", .cra_driver_name = "sha1-padlock", .cra_priority = PADLOCK_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_flags = CRYPTO_ALG_TYPE_DIGEST | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA1_HMAC_BLOCK_SIZE, .cra_ctxsize = sizeof(struct padlock_sha_ctx), .cra_module = THIS_MODULE, @@ -243,7 +235,8 @@ static struct crypto_alg sha256_alg = { .cra_name = "sha256", .cra_driver_name = "sha256-padlock", .cra_priority = PADLOCK_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_flags = CRYPTO_ALG_TYPE_DIGEST | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA256_HMAC_BLOCK_SIZE, .cra_ctxsize = sizeof(struct padlock_sha_ctx), .cra_module = THIS_MODULE, @@ -262,29 +255,15 @@ static struct crypto_alg sha256_alg = { static void __init padlock_sha_check_fallbacks(void) { - struct crypto_tfm *tfm; - - /* We'll try to allocate one TFM for each fallback - * to test that the modules are available. */ - tfm = crypto_alloc_tfm(sha1_fallback, 0); - if (!tfm) { - printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", - sha1_alg.cra_name, sha1_fallback); - } else { - printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name, - crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm)); - crypto_free_tfm(tfm); - } - - tfm = crypto_alloc_tfm(sha256_fallback, 0); - if (!tfm) { - printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", - sha256_alg.cra_name, sha256_fallback); - } else { - printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name, - crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm)); - crypto_free_tfm(tfm); - } + if (!crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK)) + printk(KERN_WARNING PFX + "Couldn't load fallback module for sha1.\n"); + + if (!crypto_has_hash("sha256", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK)) + printk(KERN_WARNING PFX + "Couldn't load fallback module for sha256.\n"); } static int __init padlock_init(void) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index d4f9948b64b1..187c6ea91959 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -42,6 +42,12 @@ #define CRYPTO_ALG_DYING 0x00000040 #define CRYPTO_ALG_ASYNC 0x00000080 +/* + * Set this bit if and only if the algorithm requires another algorithm of + * the same type to handle corner cases. + */ +#define CRYPTO_ALG_NEED_FALLBACK 0x00000100 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3-71-gd317 From 3ad819c61f5f8347f39cdcbe652b3c60ec615888 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:44:31 +1000 Subject: [CRYPTO] api: Deprecate crypto_digest_* and crypto_alg_available This patch marks the crypto_digest_* functions and crypto_alg_available as deprecated. They've been replaced by crypto_hash_* and crypto_has_* respectively. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 187c6ea91959..8f2ffa4caabf 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -241,9 +241,12 @@ int crypto_unregister_alg(struct crypto_alg *alg); * Algorithm query interface. */ #ifdef CONFIG_CRYPTO -int crypto_alg_available(const char *name, u32 flags); +int crypto_alg_available(const char *name, u32 flags) + __deprecated_for_modules; int crypto_has_alg(const char *name, u32 type, u32 mask); #else +static int crypto_alg_available(const char *name, u32 flags); + __deprecated_for_modules; static inline int crypto_alg_available(const char *name, u32 flags) { return 0; @@ -704,12 +707,15 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, dst, src); } -void crypto_digest_init(struct crypto_tfm *tfm); +void crypto_digest_init(struct crypto_tfm *tfm) __deprecated_for_modules; void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg); -void crypto_digest_final(struct crypto_tfm *tfm, u8 *out); + struct scatterlist *sg, unsigned int nsg) + __deprecated_for_modules; +void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) + __deprecated_for_modules; void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out); + struct scatterlist *sg, unsigned int nsg, u8 *out) + __deprecated_for_modules; static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm) { @@ -723,6 +729,8 @@ static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm) return __crypto_hash_cast(tfm); } +static int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) __deprecated; static inline int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { -- cgit v1.2.3-71-gd317 From 1694176a210189312e31b083bac1e1688981219a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 22 Sep 2006 08:00:42 +0100 Subject: Remove offsetof() from user-visible It's not used by anything user-visible, and it make g++ unhappy. Signed-off-by: David Woodhouse --- include/linux/Kbuild | 2 +- include/linux/stddef.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 7d076d97b2f7..10d2ca07562a 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -143,7 +143,6 @@ header-y += snmp.h header-y += sockios.h header-y += som.h header-y += sound.h -header-y += stddef.h header-y += synclink.h header-y += telephony.h header-y += termios.h @@ -318,6 +317,7 @@ unifdef-y += sonet.h unifdef-y += sonypi.h unifdef-y += soundcard.h unifdef-y += stat.h +unifdef-y += stddef.h unifdef-y += sysctl.h unifdef-y += tcp.h unifdef-y += time.h diff --git a/include/linux/stddef.h b/include/linux/stddef.h index b3a2cadf90f2..ea65dfb60cd8 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -10,11 +10,13 @@ #define NULL ((void *)0) #endif +#ifdef __KERNEL__ #undef offsetof #ifdef __compiler_offsetof #define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) #else #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif +#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-71-gd317 From 892c141e62982272b9c738b5520ad0e5e1ad7b42 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:08:56 -0700 Subject: [MLSXFRM]: Add security sid to sock This adds security for IP sockets at the sock level. Security at the sock level is needed to enforce the SELinux security policy for security associations even when a sock is orphaned (such as in the TCP LAST_ACK state). This will also be used to enforce SELinux controls over data arriving at or leaving a child socket while it's still waiting to be accepted. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 12 ++++++++++++ include/net/sock.h | 13 +++++++++++++ net/core/sock.c | 2 +- security/dummy.c | 5 +++++ security/selinux/hooks.c | 38 +++++++++++++++++++++----------------- security/selinux/include/objsec.h | 1 + 6 files changed, 53 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 6bc2aad494ff..4d7fb59996b0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -812,6 +812,8 @@ struct swap_info_struct; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. * @sk_getsid: * Retrieve the LSM-specific sid for the sock to enable caching of network * authorizations. @@ -1332,6 +1334,7 @@ struct security_operations { int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); + void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ @@ -2885,6 +2888,11 @@ static inline void security_sk_free(struct sock *sk) return security_ops->sk_free_security(sk); } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ + return security_ops->sk_clone_security(sk, newsk); +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return security_ops->sk_getsid(sk, fl, dir); @@ -3011,6 +3019,10 @@ static inline void security_sk_free(struct sock *sk) { } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; diff --git a/include/net/sock.h b/include/net/sock.h index 324b3ea233d6..91cdceb3c028 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -972,6 +972,19 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } +static inline void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 51fcfbc041a7..b67d868649cd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -911,7 +911,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) if (newsk != NULL) { struct sk_filter *filter; - memcpy(newsk, sk, sk->sk_prot->obj_size); + sock_copy(newsk, sk); /* SANITY */ sk_node_init(&newsk->sk_node); diff --git a/security/dummy.c b/security/dummy.c index 58c6d399c844..bd3bc5faa9a8 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -805,6 +805,10 @@ static inline void dummy_sk_free_security (struct sock *sk) { } +static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk) +{ +} + static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; @@ -1060,6 +1064,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, socket_getpeersec_dgram); set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); + set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5d1b8c733199..d67abf77584a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -269,15 +269,13 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) { struct sk_security_struct *ssec; - if (family != PF_UNIX) - return 0; - ssec = kzalloc(sizeof(*ssec), priority); if (!ssec) return -ENOMEM; ssec->sk = sk; ssec->peer_sid = SECINITSID_UNLABELED; + ssec->sid = SECINITSID_UNLABELED; sk->sk_security = ssec; return 0; @@ -287,9 +285,6 @@ static void sk_free_security(struct sock *sk) { struct sk_security_struct *ssec = sk->sk_security; - if (sk->sk_family != PF_UNIX) - return; - sk->sk_security = NULL; kfree(ssec); } @@ -3068,6 +3063,7 @@ static void selinux_socket_post_create(struct socket *sock, int family, { struct inode_security_struct *isec; struct task_security_struct *tsec; + struct sk_security_struct *sksec; u32 newsid; isec = SOCK_INODE(sock)->i_security; @@ -3078,6 +3074,11 @@ static void selinux_socket_post_create(struct socket *sock, int family, isec->sid = kern ? SECINITSID_KERNEL : newsid; isec->initialized = 1; + if (sock->sk) { + sksec = sock->sk->sk_security; + sksec->sid = isec->sid; + } + return; } @@ -3551,22 +3552,24 @@ static void selinux_sk_free_security(struct sock *sk) sk_free_security(sk); } -static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) { - struct inode_security_struct *isec; - u32 sock_sid = SECINITSID_ANY_SOCKET; + struct sk_security_struct *ssec = sk->sk_security; + struct sk_security_struct *newssec = newsk->sk_security; + newssec->sid = ssec->sid; + newssec->peer_sid = ssec->peer_sid; +} + +static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +{ if (!sk) return selinux_no_sk_sid(fl); + else { + struct sk_security_struct *sksec = sk->sk_security; - read_lock_bh(&sk->sk_callback_lock); - isec = get_sock_isec(sk); - - if (isec) - sock_sid = isec->sid; - - read_unlock_bh(&sk->sk_callback_lock); - return sock_sid; + return sksec->sid; + } } static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) @@ -4618,6 +4621,7 @@ static struct security_operations selinux_ops = { .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, + .sk_clone_security = selinux_sk_clone_security, .sk_getsid = selinux_sk_getsid_security, #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 940178865fc7..79b9e0af19a0 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -99,6 +99,7 @@ struct netif_security_struct { struct sk_security_struct { struct sock *sk; /* back pointer to sk object */ + u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ }; -- cgit v1.2.3-71-gd317 From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:29:07 -0700 Subject: [MLSXFRM]: Flow based matching of xfrm policy and state This implements a seemless mechanism for xfrm policy selection and state matching based on the flow sid. This also includes the necessary SELinux enforcement pieces. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 106 +++++++++++++++++---- include/net/flow.h | 4 +- net/core/flow.c | 7 +- net/xfrm/xfrm_policy.c | 28 +++--- net/xfrm/xfrm_state.c | 12 ++- security/dummy.c | 23 ++++- security/selinux/hooks.c | 7 +- security/selinux/include/xfrm.h | 23 +++-- security/selinux/xfrm.c | 199 +++++++++++++++++++++++++++++++++------- 9 files changed, 329 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 4d7fb59996b0..2c4921d79d19 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct ctl_table; @@ -825,9 +826,8 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field. - * The security field is initialized to NULL when the xfrm_policy is - * allocated. + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -846,9 +846,14 @@ struct swap_info_struct; * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field. The - * security field is initialized to NULL when the xfrm_state is - * allocated. + * @polsec contains the security context information associated with a xfrm + * policy rule from which to take the base context. polsec must be NULL + * when sec_ctx is specified. + * @secid contains the secid from which to take the mls portion of the context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to either sec_ctx or polsec, with the mls portion + * taken from secid in the latter case. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. @@ -859,13 +864,26 @@ struct swap_info_struct; * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. - * @sk_sid contains the sock security label that is used to authorize + * @fl_secid contains the flow security label that is used to authorize * access to the policy xp. * @dir contains the direction of the flow (input or output). - * Check permission when a sock selects a xfrm_policy for processing + * Check permission when a flow selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. * Return 0 if permission is granted. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_flow_state_match: + * @fl contains the flow key to match. + * @xfrm points to the xfrm_state to match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @fl points to the flow key to set. + * Return 0 if successful decoding. * * Security hooks affecting all Key Management operations * @@ -1343,10 +1361,16 @@ struct security_operations { int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec, + u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); + int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); + int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -3050,9 +3074,18 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return security_ops->xfrm_policy_delete_security(xp); } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0); +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx); + if (!polsec) + return 0; + return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid); } static inline int security_xfrm_state_delete(struct xfrm_state *x) @@ -3065,9 +3098,25 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) security_ops->xfrm_state_free_security(x); } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +{ + return security_ops->xfrm_policy_lookup(xp, fl_secid, dir); +} + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return security_ops->xfrm_state_pol_flow_match(x, xp, fl); +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return security_ops->xfrm_flow_state_match(fl, xfrm); +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) { - return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); + return security_ops->xfrm_decode_session(skb, fl); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3089,7 +3138,14 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return 0; } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { return 0; } @@ -3103,10 +3159,28 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x) return 0; } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { return 0; } + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, + struct xfrm_state *xfrm) +{ + return 1; +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/include/net/flow.h b/include/net/flow.h index 1cee5a83433a..21d988b2058a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -86,10 +86,10 @@ struct flowi { #define FLOW_DIR_FWD 2 struct sock; -typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/net/core/flow.c b/net/core/flow.c index 2191af5f26ac..645241165e6c 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -32,7 +32,6 @@ struct flow_cache_entry { u8 dir; struct flowi key; u32 genid; - u32 sk_sid; void *object; atomic_t *object_ref; }; @@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && - fle->sk_sid == sk_sid && flow_key_compare(key, &fle->key) == 0) { if (fle->genid == atomic_read(&flow_cache_genid)) { void *ret = fle->object; @@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, *head = fle; fle->family = family; fle->dir = dir; - fle->sk_sid = sk_sid; memcpy(&fle->key, key, sizeof(*key)); fle->object = NULL; flow_count(cpu)++; @@ -226,7 +223,7 @@ nocache: void *obj; atomic_t *obj_ref; - resolver(key, sk_sid, family, dir, &obj, &obj_ref); + resolver(key, family, dir, &obj, &obj_ref); if (fle) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3da67ca2c3ce..79405daadc52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -597,7 +597,7 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; @@ -613,7 +613,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, match = xfrm_selector_match(sel, fl, family); if (match) { - if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) { xfrm_pol_hold(pol); break; } @@ -641,7 +641,7 @@ static inline int policy_to_flow_dir(int dir) }; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) { struct xfrm_policy *pol; @@ -652,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) - err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); + err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); if (match && !err) xfrm_pol_hold(pol); @@ -862,19 +862,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - u32 sk_sid = security_sk_sid(sk, fl, dir); + + fl->secid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, + policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); } @@ -1032,13 +1033,15 @@ int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); + err = security_xfrm_decode_session(skb, fl); xfrm_policy_put_afinfo(afinfo); - return 0; + return err; } EXPORT_SYMBOL(xfrm_decode_session); @@ -1058,14 +1061,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_policy *pol; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); - u32 sk_sid; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); - sk_sid = security_sk_sid(sk, &fl, fl_dir); - /* First, check used SA against their selectors. */ if (skb->sp) { int i; @@ -1079,10 +1079,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); + pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (!pol) - pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, + pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); if (!pol) @@ -1298,6 +1298,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; + if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) + return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0021aad5db43..be02bd981d12 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) + !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || best->km.dying > x->km.dying || @@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) + security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } } @@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + goto out; + } + if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); diff --git a/security/dummy.c b/security/dummy.c index bd3bc5faa9a8..c1f10654871e 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -835,7 +835,8 @@ static int dummy_xfrm_policy_delete_security(struct xfrm_policy *xp) return 0; } -static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid) { return 0; } @@ -853,6 +854,23 @@ static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; } + +static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return 1; +} + +static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ static int dummy_register_security (const char *name, struct security_operations *ops) { @@ -1076,6 +1094,9 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, xfrm_state_free_security); set_to_dummy_if_null(ops, xfrm_state_delete_security); set_to_dummy_if_null(ops, xfrm_policy_lookup); + set_to_dummy_if_null(ops, xfrm_state_pol_flow_match); + set_to_dummy_if_null(ops, xfrm_flow_state_match); + set_to_dummy_if_null(ops, xfrm_decode_session); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS set_to_dummy_if_null(ops, key_alloc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d67abf77584a..5c189da07bc9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3468,7 +3468,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb); + err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); out: return err; } @@ -3720,7 +3720,7 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb); + err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4633,6 +4633,9 @@ static struct security_operations selinux_ops = { .xfrm_state_free_security = selinux_xfrm_state_free, .xfrm_state_delete_security = selinux_xfrm_state_delete, .xfrm_policy_lookup = selinux_xfrm_policy_lookup, + .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, + .xfrm_flow_state_match = selinux_xfrm_flow_state_match, + .xfrm_decode_session = selinux_xfrm_decode_session, #endif #ifdef CONFIG_KEYS diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index c96498a10eb8..f51a3e84bd9b 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -2,6 +2,7 @@ * SELinux support for the XFRM LSM hooks * * Author : Trent Jaeger, + * Updated : Venkat Yekkirala, */ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ @@ -10,10 +11,16 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx * int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir); +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); +int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl); + /* * Extract the security blob from the sock (it's actually on the socket) @@ -39,17 +46,21 @@ static inline u32 selinux_no_sk_sid(struct flowi *fl) } #ifdef CONFIG_SECURITY_NETWORK_XFRM -int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb); -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb); +int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, + struct avc_audit_data *ad); +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad); u32 selinux_socket_getpeer_stream(struct sock *sk); u32 selinux_socket_getpeer_dgram(struct sk_buff *skb); #else -static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) +static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { return 0; } -static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) +static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { return 0; } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 6c985ced8102..a502b0540e3d 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -6,7 +6,12 @@ * Authors: Serge Hallyn * Trent Jaeger * + * Updated: Venkat Yekkirala + * + * Granular IPSec Associations for use in MLS environments. + * * Copyright (C) 2005 International Business Machines Corporation + * Copyright (C) 2006 Trusted Computer Solutions, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -67,10 +72,10 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) } /* - * LSM hook implementation that authorizes that a socket can be used - * with the corresponding xfrm_sec_ctx and direction. + * LSM hook implementation that authorizes that a flow can use + * a xfrm policy rule. */ -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { int rc = 0; u32 sel_sid = SECINITSID_UNLABELED; @@ -84,27 +89,129 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) sel_sid = ctx->ctx_sid; } - rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION, - ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM : - ((dir == FLOW_DIR_OUT) ? ASSOCIATION__SENDTO : - (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))), + rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__POLMATCH, NULL); return rc; } +/* + * LSM hook implementation that authorizes that a state matches + * the given policy, flow combo. + */ + +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, + struct flowi *fl) +{ + u32 state_sid; + u32 pol_sid; + int err; + + if (x->security) + state_sid = x->security->ctx_sid; + else + state_sid = SECINITSID_UNLABELED; + + if (xp->security) + pol_sid = xp->security->ctx_sid; + else + pol_sid = SECINITSID_UNLABELED; + + err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__POLMATCH, + NULL); + + if (err) + return 0; + + return selinux_xfrm_flow_state_match(fl, x); +} + +/* + * LSM hook implementation that authorizes that a particular outgoing flow + * can use a given security association. + */ + +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + int rc = 0; + u32 sel_sid = SECINITSID_UNLABELED; + struct xfrm_sec_ctx *ctx; + + /* Context sid is either set to label or ANY_ASSOC */ + if ((ctx = xfrm->security)) { + if (!selinux_authorizable_ctx(ctx)) + return 0; + + sel_sid = ctx->ctx_sid; + } + + rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__SENDTO, + NULL)? 0:1; + + return rc; +} + +/* + * LSM hook implementation that determines the sid for the session. + */ + +int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct sec_path *sp; + + fl->secid = SECSID_NULL; + + if (skb == NULL) + return 0; + + sp = skb->sp; + if (sp) { + int i, sid_set = 0; + + for (i = sp->len-1; i >= 0; i--) { + struct xfrm_state *x = sp->xvec[i]; + if (selinux_authorizable_xfrm(x)) { + struct xfrm_sec_ctx *ctx = x->security; + + if (!sid_set) { + fl->secid = ctx->ctx_sid; + sid_set = 1; + } + else if (fl->secid != ctx->ctx_sid) + return -EINVAL; + } + } + } + + return 0; +} + /* * Security blob allocation for xfrm_policy and xfrm_state * CTX does not have a meaningful value on input */ -static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx) +static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid) { int rc = 0; struct task_security_struct *tsec = current->security; - struct xfrm_sec_ctx *ctx; + struct xfrm_sec_ctx *ctx = NULL; + char *ctx_str = NULL; + u32 str_len; + u32 ctx_sid; + + BUG_ON(uctx && pol); + + if (pol) + goto from_policy; BUG_ON(!uctx); - BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX); + + if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) + return -EINVAL; if (uctx->ctx_len >= PAGE_SIZE) return -ENOMEM; @@ -141,9 +248,41 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us return rc; +from_policy: + BUG_ON(!pol); + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + + rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); + if (rc) + goto out; + + *ctxp = ctx = kmalloc(sizeof(*ctx) + + str_len, + GFP_ATOMIC); + + if (!ctx) { + rc = -ENOMEM; + goto out; + } + + + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_sid = ctx_sid; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, + ctx_str, + str_len); + + goto out2; + out: *ctxp = NULL; kfree(ctx); +out2: + kfree(ctx_str); return rc; } @@ -157,7 +296,7 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx * BUG_ON(!xp); - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); return err; } @@ -217,13 +356,14 @@ int selinux_xfrm_policy_delete(struct xfrm_policy *xp) * LSM hook implementation that allocs and transfers sec_ctx spec to * xfrm_state. */ -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx, + struct xfrm_sec_ctx *pol, u32 secid) { int err; BUG_ON(!x); - err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx); + err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid); return err; } @@ -329,38 +469,30 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * we need to check for unlabelled access since this may not have * gone thru the IPSec process. */ -int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) +int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { int i, rc = 0; struct sec_path *sp; + u32 sel_sid = SECINITSID_UNLABELED; sp = skb->sp; if (sp) { - /* - * __xfrm_policy_check does not approve unless xfrm_policy_ok - * says that spi's match for policy and the socket. - * - * Only need to verify the existence of an authorizable sp. - */ for (i = 0; i < sp->len; i++) { struct xfrm_state *x = sp->xvec[i]; - if (x && selinux_authorizable_xfrm(x)) - goto accept; + if (x && selinux_authorizable_xfrm(x)) { + struct xfrm_sec_ctx *ctx = x->security; + sel_sid = ctx->ctx_sid; + break; + } } } - /* check SELinux sock for unlabelled access */ - rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__RECVFROM, NULL); - if (rc) - goto drop; - -accept: - return 0; + rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__RECVFROM, ad); -drop: return rc; } @@ -371,7 +503,8 @@ drop: * If we do have a authorizable security association, then it has already been * checked in xfrm_policy_lookup hook. */ -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { struct dst_entry *dst; int rc = 0; @@ -391,7 +524,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) } rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, NULL); + ASSOCIATION__SENDTO, ad); out: return rc; } -- cgit v1.2.3-71-gd317 From beb8d13bed80f8388f1a9a107d07ddd342e627e8 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:12:42 -0700 Subject: [MLSXFRM]: Add flow labeling This labels the flows that could utilize IPSec xfrms at the points the flows are defined so that IPSec policy and SAs at the right label can be used. The following protos are currently not handled, but they should continue to be able to use single-labeled IPSec like they currently do. ipmr ip_gre ipip igmp sit sctp ip6_tunnel (IPv6 over IPv6 tunnel device) decnet Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 38 +++++++++++++++++++++++++------------- include/net/route.h | 3 +++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 6 ++++++ net/ipv4/af_inet.c | 1 + net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 2 ++ net/ipv4/netfilter/ipt_REJECT.c | 1 + net/ipv4/raw.c | 1 + net/ipv4/syncookies.c | 1 + net/ipv4/udp.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/datagram.c | 2 ++ net/ipv6/icmp.c | 2 ++ net/ipv6/inet6_connection_sock.c | 1 + net/ipv6/ndisc.c | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/tcp_ipv6.c | 7 +++++++ net/ipv6/udp.c | 2 ++ net/xfrm/xfrm_policy.c | 3 +-- security/dummy.c | 7 +++---- security/selinux/hooks.c | 8 ++++---- security/selinux/include/xfrm.h | 14 +------------- security/selinux/xfrm.c | 11 +++++++---- 26 files changed, 79 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 2c4921d79d19..f3909d189fe0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include #include #include +#include struct ctl_table; @@ -815,8 +816,8 @@ struct swap_info_struct; * Deallocate security structure. * @sk_clone_security: * Clone/copy security structure. - * @sk_getsid: - * Retrieve the LSM-specific sid for the sock to enable caching of network + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. * * Security hooks for XFRM operations. @@ -882,8 +883,9 @@ struct swap_info_struct; * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. - * @fl points to the flow key to set. - * Return 0 if successful decoding. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. * * Security hooks affecting all Key Management operations * @@ -1353,7 +1355,7 @@ struct security_operations { int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); + void (*sk_getsecid) (struct sock *sk, u32 *secid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1370,7 +1372,7 @@ struct security_operations { int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); - int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -2917,9 +2919,9 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) return security_ops->sk_clone_security(sk, newsk); } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return security_ops->sk_getsid(sk, fl, dir); + security_ops->sk_getsecid(sk, &fl->secid); } #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, @@ -3047,9 +3049,8 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -3114,9 +3115,16 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_s return security_ops->xfrm_flow_state_match(fl, xfrm); } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) +{ + return security_ops->xfrm_decode_session(skb, secid, 1); +} + +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) { - return security_ops->xfrm_decode_session(skb, fl); + int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0); + + BUG_ON(rc); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3176,11 +3184,15 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, return 1; } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { return 0; } +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +{ +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/include/net/route.h b/include/net/route.h index c4a068692dcc..7f93ac0e0899 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -32,6 +32,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -166,6 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst, ip_rt_put(*rp); *rp = NULL; } + security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } @@ -182,6 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.proto = protocol; ip_rt_put(*rp); *rp = NULL; + security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7f56f7e8f571..386498053b1c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -678,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, } }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 610c722ac27f..53d255c01431 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (dst == NULL) { opt = np->opt; @@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { @@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -842,6 +847,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c84a32070f8d..fc40da3b6d39 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1074,6 +1074,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4c86ac3d882d..6ad797c14163 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) } } }; + security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e50a1bfd7ccc..772b4eac78bc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2ede167e045..308bdeac3455 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * keep trying until route appears or the connection times * itself out. */ + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } @@ -1366,6 +1367,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { .sport = skb->h.th->dest, .dport = skb->h.th->source } }, .proto = sk->sk_protocol }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) return; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 269bc2067cb8..7f905bf2bde5 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.proto = IPPROTO_TCP; fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; + security_skb_classify_flow(skb, &fl); xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 62b2762a2420..fe44cb50a1c5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -484,6 +484,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e20be3331f67..307dc3c0d635 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -259,6 +259,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; + security_sk_classify_flow(sk, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f136cec96d95..a4d005eccc7f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -603,6 +603,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ac85e9c532c2..82a1b1a328db 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,6 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 3b55b4c8e2d1..c73508e090a6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -156,6 +156,8 @@ ipv4_connected: if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); + if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 356a8a7ef22a..dbfce089e916 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -358,6 +358,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -472,6 +473,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bf491077b822..7a51a258615d 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_sport = inet->sport; fl.fl_ip_dport = inet->dport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b50055b9278d..67cfc3813c32 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -419,6 +419,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; + security_sk_classify_flow(ndisc_socket->sk, fl); } static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 8629ba195d2d..c4eba1aeb323 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb) ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; + security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 15b862d8acab..d5040e172292 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -759,6 +759,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 802a1a6b1037..46922e57e311 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = &final; } + security_sk_classify_flow(sk, &fl); + err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; @@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; @@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (dst == NULL) { opt = np->opt; @@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { @@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { @@ -923,6 +929,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d54f246411e..82c7c9cde2a8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,6 +782,8 @@ do_udp_sendmsg: connected = 0; } + security_sk_classify_flow(sk, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 79405daadc52..32c963c90573 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -863,7 +863,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - fl->secid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; @@ -1039,7 +1038,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); - err = security_xfrm_decode_session(skb, fl); + err = security_xfrm_decode_session(skb, &fl->secid); xfrm_policy_put_afinfo(afinfo); return err; } diff --git a/security/dummy.c b/security/dummy.c index c1f10654871e..c0ff6b9bfd7d 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -809,9 +809,8 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * { } -static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -866,7 +865,7 @@ static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm return 1; } -static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall) { return 0; } @@ -1083,7 +1082,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); - set_to_dummy_if_null(ops, sk_getsid); + set_to_dummy_if_null(ops, sk_getsecid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5c189da07bc9..4e5989d584ce 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3561,14 +3561,14 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) newssec->peer_sid = ssec->peer_sid; } -static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +static void selinux_sk_getsecid(struct sock *sk, u32 *secid) { if (!sk) - return selinux_no_sk_sid(fl); + *secid = SECINITSID_ANY_SOCKET; else { struct sk_security_struct *sksec = sk->sk_security; - return sksec->sid; + *secid = sksec->sid; } } @@ -4622,7 +4622,7 @@ static struct security_operations selinux_ops = { .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, - .sk_getsid = selinux_sk_getsid_security, + .sk_getsecid = selinux_sk_getsecid, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index f51a3e84bd9b..8e45c1d588a8 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -19,7 +19,7 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); -int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl); +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall); /* @@ -33,18 +33,6 @@ static inline struct inode_security_struct *get_sock_isec(struct sock *sk) return SOCK_INODE(sk->sk_socket)->i_security; } - -static inline u32 selinux_no_sk_sid(struct flowi *fl) -{ - /* NOTE: no sock occurs on ICMP reply, forwards, ... */ - /* icmp_reply: authorize as kernel packet */ - if (fl && fl->proto == IPPROTO_ICMP) { - return SECINITSID_KERNEL; - } - - return SECINITSID_ANY_SOCKET; -} - #ifdef CONFIG_SECURITY_NETWORK_XFRM int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, struct avc_audit_data *ad); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index a502b0540e3d..c750ef7af66f 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -158,11 +158,11 @@ int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) * LSM hook implementation that determines the sid for the session. */ -int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) { struct sec_path *sp; - fl->secid = SECSID_NULL; + *sid = SECSID_NULL; if (skb == NULL) return 0; @@ -177,10 +177,13 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) struct xfrm_sec_ctx *ctx = x->security; if (!sid_set) { - fl->secid = ctx->ctx_sid; + *sid = ctx->ctx_sid; sid_set = 1; + + if (!ckall) + break; } - else if (fl->secid != ctx->ctx_sid) + else if (*sid != ctx->ctx_sid) return -EINVAL; } } -- cgit v1.2.3-71-gd317 From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:20 -0700 Subject: [MLSXFRM]: Default labeling of socket specific IPSec policies This defaults the label of socket-specific IPSec policies to be the same as the socket they are set on. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 19 ++++++++++++++++--- include/net/xfrm.h | 2 +- net/key/af_key.c | 15 +++++++++++---- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 13 +++++++++++-- security/dummy.c | 3 ++- security/selinux/include/xfrm.h | 3 ++- security/selinux/xfrm.c | 33 ++++++++++++++++++++++----------- 8 files changed, 66 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f3909d189fe0..8e3dc6c51a6d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -827,8 +827,10 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). + * @sk refers to the sock from which to derive the security context. * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. + * field is initialized to NULL when the xfrm_policy is allocated. Only + * one of sec_ctx or sock can be specified. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -1359,7 +1361,8 @@ struct security_operations { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); @@ -3057,7 +3060,12 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); +} + +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); } static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) @@ -3132,6 +3140,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return 0; +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3ecd9fa1ed4b..00bf86e6e82b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -362,7 +362,7 @@ struct xfrm_mgr char *id; int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); - struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); + struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; diff --git a/net/key/af_key.c b/net/key/af_key.c index a065e1a67773..797c744a8438 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2843,14 +2843,14 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } -static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, +static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; @@ -2891,7 +2891,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; - xp->family = family; + xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && @@ -2907,8 +2907,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) + sec_ctx->sadb_x_sec_len) { + *dir = -EINVAL; goto out; + } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2918,6 +2920,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, if (*dir) goto out; } + else { + *dir = security_xfrm_sock_policy_alloc(xp, sk); + if (*dir) + goto out; + } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index be02bd981d12..1c796087ee78 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dac8db1088bc..f70e158874d2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1757,7 +1757,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, +static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; @@ -1765,7 +1765,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, struct xfrm_policy *xp; int nr; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1807,6 +1807,15 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, copy_from_user_policy(xp, p); copy_templates(xp, ut, nr); + if (!xp->security) { + int err = security_xfrm_sock_policy_alloc(xp, sk); + if (err) { + kfree(xp); + *dir = err; + return NULL; + } + } + *dir = p->dir; return xp; diff --git a/security/dummy.c b/security/dummy.c index c0ff6b9bfd7d..66cc06404930 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -815,7 +815,8 @@ static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk) { return 0; } diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 8e45c1d588a8..1822c73e5085 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -7,7 +7,8 @@ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index c750ef7af66f..d3690f985135 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -208,10 +208,8 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, BUG_ON(uctx && pol); - if (pol) - goto from_policy; - - BUG_ON(!uctx); + if (!uctx) + goto not_from_user; if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) return -EINVAL; @@ -251,11 +249,14 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, return rc; -from_policy: - BUG_ON(!pol); - rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); - if (rc) - goto out; +not_from_user: + if (pol) { + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + } + else + ctx_sid = sid; rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); if (rc) @@ -293,13 +294,23 @@ out2: * LSM hook implementation that allocs and transfers uctx spec to * xfrm_policy. */ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *uctx, struct sock *sk) { int err; + u32 sid; BUG_ON(!xp); + BUG_ON(uctx && sk); + + if (sk) { + struct sk_security_struct *ssec = sk->sk_security; + sid = ssec->sid; + } + else + sid = SECSID_NULL; - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid); return err; } -- cgit v1.2.3-71-gd317 From 4237c75c0a35535d7f9f2bfeeb4b4df1e068a0bf Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:50 -0700 Subject: [MLSXFRM]: Auto-labeling of child sockets This automatically labels the TCP, Unix stream, and dccp child sockets as well as openreqs to be at the same MLS level as the peer. This will result in the selection of appropriately labeled IPSec Security Associations. This also uses the sock's sid (as opposed to the isec sid) in SELinux enforcement of secmark in rcv_skb and postroute_last hooks. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 55 ++++++++++++++++ include/net/request_sock.h | 1 + include/net/sock.h | 1 + net/dccp/ipv4.c | 3 + net/dccp/ipv6.c | 7 +- net/ipv4/inet_connection_sock.c | 4 +- net/ipv4/syncookies.c | 6 +- net/ipv4/tcp_ipv4.c | 3 + net/ipv6/tcp_ipv6.c | 6 +- security/dummy.c | 24 +++++++ security/selinux/hooks.c | 137 +++++++++++++++++++++++++++------------- security/selinux/xfrm.c | 1 - 12 files changed, 197 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 8e3dc6c51a6d..bb4c80fdfe7a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -90,6 +90,7 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap); struct nfsctl_arg; struct sched_param; struct swap_info_struct; +struct request_sock; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -819,6 +820,14 @@ struct swap_info_struct; * @sk_getsecid: * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. * * Security hooks for XFRM operations. * @@ -1358,6 +1367,11 @@ struct security_operations { void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); void (*sk_getsecid) (struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock* sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req); + void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2926,6 +2940,28 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { security_ops->sk_getsecid(sk, &fl->secid); } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + security_ops->req_classify_flow(req, fl); +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ + security_ops->sock_graft(sk, parent); +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return security_ops->inet_conn_request(sk, skb, req); +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ + security_ops->inet_csk_clone(newsk, req); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -3055,6 +3091,25 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c5d7f920c352..8e165ca16bd8 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -53,6 +53,7 @@ struct request_sock { unsigned long expires; struct request_sock_ops *rsk_ops; struct sock *sk; + u32 secid; }; static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) diff --git a/include/net/sock.h b/include/net/sock.h index 91cdceb3c028..337ebec84c70 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -969,6 +969,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; + security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 386498053b1c..171d363876ee 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 53d255c01431..231bc7c7e749 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -424,7 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -626,7 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + security_req_classify_flow(req, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -709,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq6 = inet6_rsk(req); ireq = inet_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 772b4eac78bc..07204391d083 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,7 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + + security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 307dc3c0d635..661e0a4bca72 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -259,7 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3edd4a9..43f6740244f8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 46922e57e311..302786a11cd6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -470,7 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -826,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; + security_inet_conn_request(sk, skb, req); + if (tcp_v6_send_synack(sk, req, NULL)) goto drop; @@ -929,7 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/security/dummy.c b/security/dummy.c index 66cc06404930..1c45f8e4aad1 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -812,6 +812,26 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { } + +static inline void dummy_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int dummy_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void dummy_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} + +static inline void dummy_req_classify_flow(const struct request_sock *req, + struct flowi *fl) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1084,6 +1104,10 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsecid); + set_to_dummy_if_null(ops, sock_graft); + set_to_dummy_if_null(ops, inet_conn_request); + set_to_dummy_if_null(ops, inet_csk_clone); + set_to_dummy_if_null(ops, req_classify_flow); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4e5989d584ce..1dc935f7b919 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3328,8 +3328,9 @@ static int selinux_socket_unix_stream_connect(struct socket *sock, /* server child socket */ ssec = newsk->sk_security; ssec->peer_sid = isec->sid; - - return 0; + err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid); + + return err; } static int selinux_socket_unix_may_send(struct socket *sock, @@ -3355,11 +3356,29 @@ static int selinux_socket_unix_may_send(struct socket *sock, } static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, - struct avc_audit_data *ad, u32 sock_sid, u16 sock_class, - u16 family, char *addrp, int len) + struct avc_audit_data *ad, u16 family, char *addrp, int len) { int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0; + struct socket *sock; + u16 sock_class = 0; + u32 sock_sid = 0; + + read_lock_bh(&sk->sk_callback_lock); + sock = sk->sk_socket; + if (sock) { + struct inode *inode; + inode = SOCK_INODE(sock); + if (inode) { + struct inode_security_struct *isec; + isec = inode->i_security; + sock_sid = isec->sid; + sock_class = isec->sclass; + } + } + read_unlock_bh(&sk->sk_callback_lock); + if (!sock_sid) + goto out; if (!skb->dev) goto out; @@ -3419,12 +3438,10 @@ out: static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { u16 family; - u16 sock_class = 0; char *addrp; int len, err = 0; - u32 sock_sid = 0; - struct socket *sock; struct avc_audit_data ad; + struct sk_security_struct *sksec = sk->sk_security; family = sk->sk_family; if (family != PF_INET && family != PF_INET6) @@ -3434,22 +3451,6 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP)) family = PF_INET; - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (sock) { - struct inode *inode; - inode = SOCK_INODE(sock); - if (inode) { - struct inode_security_struct *isec; - isec = inode->i_security; - sock_sid = isec->sid; - sock_class = isec->sclass; - } - } - read_unlock_bh(&sk->sk_callback_lock); - if (!sock_sid) - goto out; - AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]"; ad.u.net.family = family; @@ -3459,16 +3460,15 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; if (selinux_compat_net) - err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid, - sock_class, family, + err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family, addrp, len); else - err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); out: return err; } @@ -3572,6 +3572,49 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid) } } +void selinux_sock_graft(struct sock* sk, struct socket *parent) +{ + struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; + struct sk_security_struct *sksec = sk->sk_security; + + isec->sid = sksec->sid; +} + +int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct sk_security_struct *sksec = sk->sk_security; + int err; + u32 newsid = 0; + u32 peersid; + + err = selinux_xfrm_decode_session(skb, &peersid, 0); + BUG_ON(err); + + err = security_sid_mls_copy(sksec->sid, peersid, &newsid); + if (err) + return err; + + req->secid = newsid; + return 0; +} + +void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) +{ + struct sk_security_struct *newsksec = newsk->sk_security; + + newsksec->sid = req->secid; + /* NOTE: Ideally, we should also get the isec->sid for the + new socket in sync, but we don't have the isec available yet. + So we will wait until sock_graft to do it, by which + time it will have been created and available. */ +} + +void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + fl->secid = req->secid; +} + static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) { int err = 0; @@ -3611,12 +3654,24 @@ out: #ifdef CONFIG_NETFILTER static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev, - struct inode_security_struct *isec, struct avc_audit_data *ad, u16 family, char *addrp, int len) { - int err; + int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0; + struct socket *sock; + struct inode *inode; + struct inode_security_struct *isec; + + sock = sk->sk_socket; + if (!sock) + goto out; + + inode = SOCK_INODE(sock); + if (!inode) + goto out; + + isec = inode->i_security; err = sel_netif_sids(dev, &if_sid, NULL); if (err) @@ -3681,26 +3736,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; - struct socket *sock; - struct inode *inode; struct sk_buff *skb = *pskb; - struct inode_security_struct *isec; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; + struct sk_security_struct *sksec; sk = skb->sk; if (!sk) goto out; - sock = sk->sk_socket; - if (!sock) - goto out; - - inode = SOCK_INODE(sock); - if (!inode) - goto out; - - isec = inode->i_security; + sksec = sk->sk_security; AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = dev->name; @@ -3711,16 +3756,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, goto out; if (selinux_compat_net) - err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad, + err = selinux_ip_postroute_last_compat(sk, dev, &ad, family, addrp, len); else - err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad); if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); + err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4623,6 +4668,10 @@ static struct security_operations selinux_ops = { .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, .sk_getsecid = selinux_sk_getsecid, + .sock_graft = selinux_sock_graft, + .inet_conn_request = selinux_inet_conn_request, + .inet_csk_clone = selinux_inet_csk_clone, + .req_classify_flow = selinux_req_classify_flow, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d3690f985135..3e742b850af6 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -271,7 +271,6 @@ not_from_user: goto out; } - ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; ctx->ctx_sid = ctx_sid; -- cgit v1.2.3-71-gd317 From 11a03f78fbf15a866ba3bf6359a75cdfd1ced703 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:46:20 -0700 Subject: [NetLabel]: core network changes Changes to the core network stack to support the NetLabel subsystem. This includes changes to the IPv4 option handling to support CIPSO labels. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/ip.h | 1 + include/net/cipso_ipv4.h | 250 ++++++++++++++++++++++++++++++++++++++++ include/net/inet_sock.h | 2 +- include/net/netlabel.h | 291 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ah4.c | 2 +- net/ipv4/ip_options.c | 19 ++++ 6 files changed, 563 insertions(+), 2 deletions(-) create mode 100644 include/net/cipso_ipv4.h create mode 100644 include/net/netlabel.h (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 4b55cf1df732..2f4600146f83 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -57,6 +57,7 @@ #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT) +#define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_RR (7 |IPOPT_CONTROL) #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h new file mode 100644 index 000000000000..c7175e725804 --- /dev/null +++ b/include/net/cipso_ipv4.h @@ -0,0 +1,250 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _CIPSO_IPV4_H +#define _CIPSO_IPV4_H + +#include +#include +#include +#include + +/* known doi values */ +#define CIPSO_V4_DOI_UNKNOWN 0x00000000 + +/* tag types */ +#define CIPSO_V4_TAG_INVALID 0 +#define CIPSO_V4_TAG_RBITMAP 1 +#define CIPSO_V4_TAG_ENUM 2 +#define CIPSO_V4_TAG_RANGE 5 +#define CIPSO_V4_TAG_PBITMAP 6 +#define CIPSO_V4_TAG_FREEFORM 7 + +/* doi mapping types */ +#define CIPSO_V4_MAP_UNKNOWN 0 +#define CIPSO_V4_MAP_STD 1 +#define CIPSO_V4_MAP_PASS 2 + +/* limits */ +#define CIPSO_V4_MAX_REM_LVLS 256 +#define CIPSO_V4_INV_LVL 0x80000000 +#define CIPSO_V4_MAX_LOC_LVLS (CIPSO_V4_INV_LVL - 1) +#define CIPSO_V4_MAX_REM_CATS 65536 +#define CIPSO_V4_INV_CAT 0x80000000 +#define CIPSO_V4_MAX_LOC_CATS (CIPSO_V4_INV_CAT - 1) + +/* + * CIPSO DOI definitions + */ + +/* DOI definition struct */ +#define CIPSO_V4_TAG_MAXCNT 5 +struct cipso_v4_doi { + u32 doi; + u32 type; + union { + struct cipso_v4_std_map_tbl *std; + } map; + u8 tags[CIPSO_V4_TAG_MAXCNT]; + + u32 valid; + struct list_head list; + struct rcu_head rcu; + struct list_head dom_list; +}; + +/* Standard CIPSO mapping table */ +/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the + * bit is set then consider that value as unspecified, meaning the + * mapping for that particular level/category is invalid */ +struct cipso_v4_std_map_tbl { + struct { + u32 *cipso; + u32 *local; + u32 cipso_size; + u32 local_size; + } lvl; + struct { + u32 *cipso; + u32 *local; + u32 cipso_size; + u32 local_size; + } cat; +}; + +/* + * Sysctl Variables + */ + +#ifdef CONFIG_NETLABEL +extern int cipso_v4_cache_enabled; +extern int cipso_v4_cache_bucketsize; +extern int cipso_v4_rbm_optfmt; +extern int cipso_v4_rbm_strictvalid; +#endif + +/* + * Helper Functions + */ + +#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0) +#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso) + +/* + * DOI List Functions + */ + +#ifdef CONFIG_NETLABEL +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); +int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); +struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); +struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain); +#else +static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + return -ENOSYS; +} + +static inline int cipso_v4_doi_remove(u32 doi, + void (*callback) (struct rcu_head * head)) +{ + return 0; +} + +static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return NULL; +} + +static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +{ + return NULL; +} + +static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) +{ + return NULL; +} + +static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, + const char *domain) +{ + return -ENOSYS; +} + +static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +/* + * Label Mapping Cache Functions + */ + +#ifdef CONFIG_NETLABEL +void cipso_v4_cache_invalidate(void); +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr); +#else +static inline void cipso_v4_cache_invalidate(void) +{ + return; +} + +static inline int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +/* + * Protocol Handling Functions + */ + +#ifdef CONFIG_NETLABEL +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); +int cipso_v4_socket_setopt(struct socket *sock, + unsigned char *opt, + u32 opt_len); +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); +int cipso_v4_socket_getopt(const struct socket *sock, + unsigned char **opt, + u32 *opt_len); +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr); +int cipso_v4_validate(unsigned char **option); +#else +static inline void cipso_v4_error(struct sk_buff *skb, + int error, + u32 gateway) +{ + return; +} + +static inline int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_validate(unsigned char **option) +{ + return -ENOSYS; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _CIPSO_IPV4_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1f4a9a60d4cc..f4caad56cd03 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -51,7 +51,7 @@ struct ip_options { ts_needtime:1, ts_needaddr:1; unsigned char router_alert; - unsigned char __pad1; + unsigned char cipso; unsigned char __pad2; unsigned char __data[0]; }; diff --git a/include/net/netlabel.h b/include/net/netlabel.h new file mode 100644 index 000000000000..7cae730832c7 --- /dev/null +++ b/include/net/netlabel.h @@ -0,0 +1,291 @@ +/* + * NetLabel System + * + * The NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_H +#define _NETLABEL_H + +#include +#include +#include + +/* + * NetLabel - A management interface for maintaining network packet label + * mapping tables for explicit packet labling protocols. + * + * Network protocols such as CIPSO and RIPSO require a label translation layer + * to convert the label on the packet into something meaningful on the host + * machine. In the current Linux implementation these mapping tables live + * inside the kernel; NetLabel provides a mechanism for user space applications + * to manage these mapping tables. + * + * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to + * send messages between kernel and user space. The general format of a + * NetLabel message is shown below: + * + * +-----------------+-------------------+--------- --- -- - + * | struct nlmsghdr | struct genlmsghdr | payload + * +-----------------+-------------------+--------- --- -- - + * + * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal. + * The payload is dependent on the subsystem specified in the + * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions + * should be defined in the corresponding net/netlabel/netlabel_.h|c + * file. All of the fields in the NetLabel payload are NETLINK attributes, the + * length of each field is the length of the NETLINK attribute payload, see + * include/net/netlink.h for more information on NETLINK attributes. + * + */ + +/* + * NetLabel NETLINK protocol + */ + +#define NETLBL_PROTO_VERSION 1 + +/* NetLabel NETLINK types/families */ +#define NETLBL_NLTYPE_NONE 0 +#define NETLBL_NLTYPE_MGMT 1 +#define NETLBL_NLTYPE_MGMT_NAME "NLBL_MGMT" +#define NETLBL_NLTYPE_RIPSO 2 +#define NETLBL_NLTYPE_RIPSO_NAME "NLBL_RIPSO" +#define NETLBL_NLTYPE_CIPSOV4 3 +#define NETLBL_NLTYPE_CIPSOV4_NAME "NLBL_CIPSOv4" +#define NETLBL_NLTYPE_CIPSOV6 4 +#define NETLBL_NLTYPE_CIPSOV6_NAME "NLBL_CIPSOv6" +#define NETLBL_NLTYPE_UNLABELED 5 +#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" + +/* NetLabel return codes */ +#define NETLBL_E_OK 0 + +/* + * Helper functions + */ + +#define NETLBL_LEN_U8 nla_total_size(sizeof(u8)) +#define NETLBL_LEN_U16 nla_total_size(sizeof(u16)) +#define NETLBL_LEN_U32 nla_total_size(sizeof(u32)) + +/** + * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer + * @head: the amount of headroom in bytes + * @body: the desired size (minus headroom) in bytes + * @gfp_flags: the alloc flags to pass to alloc_skb() + * + * Description: + * Allocate a NETLINK message buffer based on the sizes given in @head and + * @body. If @head is greater than zero skb_reserve() is called to reserve + * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on + * success, NULL on failure. + * + */ +static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, + size_t body, + int gfp_flags) +{ + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags); + if (skb == NULL) + return NULL; + if (head > 0) { + skb_reserve(skb, head); + if (skb_tailroom(skb) < body) { + kfree_skb(skb); + return NULL; + } + } + + return skb; +} + +/* + * NetLabel - Kernel API for accessing the network packet label mappings. + * + * The following functions are provided for use by other kernel modules, + * specifically kernel LSM modules, to provide a consistent, transparent API + * for dealing with explicit packet labeling protocols such as CIPSO and + * RIPSO. The functions defined here are implemented in the + * net/netlabel/netlabel_kapi.c file. + * + */ + +/* Domain mapping definition struct */ +struct netlbl_dom_map; + +/* Domain mapping operations */ +int netlbl_domhsh_remove(const char *domain); + +/* LSM security attributes */ +struct netlbl_lsm_cache { + void (*free) (const void *data); + void *data; +}; +struct netlbl_lsm_secattr { + char *domain; + + u32 mls_lvl; + u32 mls_lvl_vld; + unsigned char *mls_cat; + size_t mls_cat_len; + + struct netlbl_lsm_cache cache; +}; + +/* + * LSM security attribute operations + */ + + +/** + * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct + * @secattr: the struct to initialize + * + * Description: + * Initialize an already allocated netlbl_lsm_secattr struct. Returns zero on + * success, negative values on error. + * + */ +static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) +{ + memset(secattr, 0, sizeof(*secattr)); + return 0; +} + +/** + * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct + * @secattr: the struct to clear + * @clear_cache: cache clear flag + * + * Description: + * Destroys the @secattr struct, including freeing all of the internal buffers. + * If @clear_cache is true then free the cache fields, otherwise leave them + * intact. The struct must be reset with a call to netlbl_secattr_init() + * before reuse. + * + */ +static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) +{ + if (clear_cache && secattr->cache.data != NULL && secattr->cache.free) + secattr->cache.free(secattr->cache.data); + kfree(secattr->domain); + kfree(secattr->mls_cat); +} + +/** + * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct + * @flags: the memory allocation flags + * + * Description: + * Allocate and initialize a netlbl_lsm_secattr struct. Returns a valid + * pointer on success, or NULL on failure. + * + */ +static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags) +{ + return kzalloc(sizeof(struct netlbl_lsm_secattr), flags); +} + +/** + * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct + * @secattr: the struct to free + * @clear_cache: cache clear flag + * + * Description: + * Frees @secattr including all of the internal buffers. If @clear_cache is + * true then free the cache fields, otherwise leave them intact. + * + */ +static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) +{ + netlbl_secattr_destroy(secattr, clear_cache); + kfree(secattr); +} + +/* + * LSM protocol operations + */ + +#ifdef CONFIG_NETLABEL +int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr); +int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr); +int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr); +void netlbl_skbuff_err(struct sk_buff *skb, int error); +#else +static inline int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline void netlbl_skbuff_err(struct sk_buff *skb, int error) +{ + return; +} +#endif /* CONFIG_NETLABEL */ + +/* + * LSM label mapping cache operations + */ + +#ifdef CONFIG_NETLABEL +void netlbl_cache_invalidate(void); +int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr); +#else +static inline void netlbl_cache_invalidate(void) +{ + return; +} + +static inline int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _NETLABEL_H */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2b98943e6b02..008e69d2e423 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ - case 0x86: /* Another "Commercial Security" crap. */ + case IPOPT_CIPSO: case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 406056edc02b..e0a93b4fa8cc 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to @@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->is_strictroute = sopt->is_strictroute; } } + if (sopt->cipso) { + optlen = sptr[sopt->cipso+1]; + dopt->cipso = dopt->optlen+sizeof(struct iphdr); + memcpy(dptr, sptr+sopt->cipso, optlen); + dptr += optlen; + dopt->optlen += optlen; + } while (dopt->optlen & 3) { *dptr++ = IPOPT_END; dopt->optlen++; @@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; + case IPOPT_CIPSO: + if (opt->cipso) { + pp_ptr = optptr; + goto error; + } + opt->cipso = optptr - iph; + if (cipso_v4_validate(&optptr)) { + pp_ptr = optptr; + goto error; + } + break; case IPOPT_SEC: case IPOPT_SID: default: -- cgit v1.2.3-71-gd317 From 446fda4f26822b2d42ab3396aafcedf38a9ff2b6 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:48:06 -0700 Subject: [NetLabel]: CIPSOv4 engine Add support for the Commercial IP Security Option (CIPSO) to the IPv4 network stack. CIPSO has become a de-facto standard for trusted/labeled networking amongst existing Trusted Operating Systems such as Trusted Solaris, HP-UX CMW, etc. This implementation is designed to be used with the NetLabel subsystem to provide explicit packet labeling to LSM developers. The CIPSO/IPv4 packet labeling works by the LSM calling a NetLabel API function which attaches a CIPSO label (IPv4 option) to a given socket; this in turn attaches the CIPSO label to every packet leaving the socket without any extra processing on the outbound side. On the inbound side the individual packet's sk_buff is examined through a call to a NetLabel API function to determine if a CIPSO/IPv4 label is present and if so the security attributes of the CIPSO label are returned to the caller of the NetLabel API function. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/sysctl.h | 4 + net/ipv4/Makefile | 1 + net/ipv4/cipso_ipv4.c | 1607 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 35 + 4 files changed, 1647 insertions(+) create mode 100644 net/ipv4/cipso_ipv4.c (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e4b1a4d4dcf3..af61d9235409 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -411,6 +411,10 @@ enum NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, NET_TCP_SLOW_START_AFTER_IDLE=117, + NET_CIPSOV4_CACHE_ENABLE=118, + NET_CIPSOV4_CACHE_BUCKET_SIZE=119, + NET_CIPSOV4_RBM_OPTFMT=120, + NET_CIPSOV4_RBM_STRICTVALID=121, }; enum { diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4878fc5be85f..f66049e28aeb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o +obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c new file mode 100644 index 000000000000..b82a101c95c5 --- /dev/null +++ b/net/ipv4/cipso_ipv4.c @@ -0,0 +1,1607 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cipso_v4_domhsh_entry { + char *domain; + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* List of available DOI definitions */ +/* XXX - Updates should be minimal so having a single lock for the + * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be + * okay. */ +/* XXX - This currently assumes a minimal number of different DOIs in use, + * if in practice there are a lot of different DOIs this list should + * probably be turned into a hash table or something similar so we + * can do quick lookups. */ +DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); + +/* Label mapping cache */ +int cipso_v4_cache_enabled = 1; +int cipso_v4_cache_bucketsize = 10; +#define CIPSO_V4_CACHE_BUCKETBITS 7 +#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) +#define CIPSO_V4_CACHE_REORDERLIMIT 10 +struct cipso_v4_map_cache_bkt { + spinlock_t lock; + u32 size; + struct list_head list; +}; +struct cipso_v4_map_cache_entry { + u32 hash; + unsigned char *key; + size_t key_len; + + struct netlbl_lsm_cache lsm_data; + + u32 activity; + struct list_head list; +}; +static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL; + +/* Restricted bitmap (tag #1) flags */ +int cipso_v4_rbm_optfmt = 0; +int cipso_v4_rbm_strictvalid = 1; + +/* + * Helper Functions + */ + +/** + * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit + * @bitmap: the bitmap + * @bitmap_len: length in bits + * @offset: starting offset + * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit + * + * Description: + * Starting at @offset, walk the bitmap from left to right until either the + * desired bit is found or we reach the end. Return the bit offset, -1 if + * not found, or -2 if error. + */ +static int cipso_v4_bitmap_walk(const unsigned char *bitmap, + u32 bitmap_len, + u32 offset, + u8 state) +{ + u32 bit_spot; + u32 byte_offset; + unsigned char bitmask; + unsigned char byte; + + /* gcc always rounds to zero when doing integer division */ + byte_offset = offset / 8; + byte = bitmap[byte_offset]; + bit_spot = offset; + bitmask = 0x80 >> (offset % 8); + + while (bit_spot < bitmap_len) { + if ((state && (byte & bitmask) == bitmask) || + (state == 0 && (byte & bitmask) == 0)) + return bit_spot; + + bit_spot++; + bitmask >>= 1; + if (bitmask == 0) { + byte = bitmap[++byte_offset]; + bitmask = 0x80; + } + } + + return -1; +} + +/** + * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap + * @bitmap: the bitmap + * @bit: the bit + * @state: if non-zero, set the bit (1) else clear the bit (0) + * + * Description: + * Set a single bit in the bitmask. Returns zero on success, negative values + * on error. + */ +static void cipso_v4_bitmap_setbit(unsigned char *bitmap, + u32 bit, + u8 state) +{ + u32 byte_spot; + u8 bitmask; + + /* gcc always rounds to zero when doing integer division */ + byte_spot = bit / 8; + bitmask = 0x80 >> (bit % 8); + if (state) + bitmap[byte_spot] |= bitmask; + else + bitmap[byte_spot] &= ~bitmask; +} + +/** + * cipso_v4_doi_domhsh_free - Frees a domain list entry + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to a domain list entry can be released + * safely. + * + */ +static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) +{ + struct cipso_v4_domhsh_entry *ptr; + + ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); + kfree(ptr->domain); + kfree(ptr); +} + +/** + * cipso_v4_cache_entry_free - Frees a cache entry + * @entry: the entry to free + * + * Description: + * This function frees the memory associated with a cache entry. + * + */ +static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) +{ + if (entry->lsm_data.free) + entry->lsm_data.free(entry->lsm_data.data); + kfree(entry->key); + kfree(entry); +} + +/** + * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache + * @key: the hash key + * @key_len: the length of the key in bytes + * + * Description: + * The CIPSO tag hashing function. Returns a 32-bit hash value. + * + */ +static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) +{ + return jhash(key, key_len, 0); +} + +/* + * Label Mapping Cache Functions + */ + +/** + * cipso_v4_cache_init - Initialize the CIPSO cache + * + * Description: + * Initializes the CIPSO label mapping cache, this function should be called + * before any of the other functions defined in this file. Returns zero on + * success, negative values on error. + * + */ +static int cipso_v4_cache_init(void) +{ + u32 iter; + + cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, + sizeof(struct cipso_v4_map_cache_bkt), + GFP_KERNEL); + if (cipso_v4_cache == NULL) + return -ENOMEM; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock_init(&cipso_v4_cache[iter].lock); + cipso_v4_cache[iter].size = 0; + INIT_LIST_HEAD(&cipso_v4_cache[iter].list); + } + + return 0; +} + +/** + * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache + * + * Description: + * Invalidates and frees any entries in the CIPSO cache. Returns zero on + * success and negative values on failure. + * + */ +void cipso_v4_cache_invalidate(void) +{ + struct cipso_v4_map_cache_entry *entry, *tmp_entry; + u32 iter; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock(&cipso_v4_cache[iter].lock); + list_for_each_entry_safe(entry, + tmp_entry, + &cipso_v4_cache[iter].list, list) { + list_del(&entry->list); + cipso_v4_cache_entry_free(entry); + } + cipso_v4_cache[iter].size = 0; + spin_unlock(&cipso_v4_cache[iter].lock); + } + + return; +} + +/** + * cipso_v4_cache_check - Check the CIPSO cache for a label mapping + * @key: the buffer to check + * @key_len: buffer length in bytes + * @secattr: the security attribute struct to use + * + * Description: + * This function checks the cache to see if a label mapping already exists for + * the given key. If there is a match then the cache is adjusted and the + * @secattr struct is populated with the correct LSM security attributes. The + * cache is adjusted in the following manner if the entry is not already the + * first in the cache bucket: + * + * 1. The cache entry's activity counter is incremented + * 2. The previous (higher ranking) entry's activity counter is decremented + * 3. If the difference between the two activity counters is geater than + * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped + * + * Returns zero on success, -ENOENT for a cache miss, and other negative values + * on error. + * + */ +static int cipso_v4_cache_check(const unsigned char *key, + u32 key_len, + struct netlbl_lsm_secattr *secattr) +{ + u32 bkt; + struct cipso_v4_map_cache_entry *entry; + struct cipso_v4_map_cache_entry *prev_entry = NULL; + u32 hash; + + if (!cipso_v4_cache_enabled) + return -ENOENT; + + hash = cipso_v4_map_cache_hash(key, key_len); + bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock(&cipso_v4_cache[bkt].lock); + list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { + if (entry->hash == hash && + entry->key_len == key_len && + memcmp(entry->key, key, key_len) == 0) { + entry->activity += 1; + secattr->cache.free = entry->lsm_data.free; + secattr->cache.data = entry->lsm_data.data; + if (prev_entry == NULL) { + spin_unlock(&cipso_v4_cache[bkt].lock); + return 0; + } + + if (prev_entry->activity > 0) + prev_entry->activity -= 1; + if (entry->activity > prev_entry->activity && + entry->activity - prev_entry->activity > + CIPSO_V4_CACHE_REORDERLIMIT) { + __list_del(entry->list.prev, entry->list.next); + __list_add(&entry->list, + prev_entry->list.prev, + &prev_entry->list); + } + + spin_unlock(&cipso_v4_cache[bkt].lock); + return 0; + } + prev_entry = entry; + } + spin_unlock(&cipso_v4_cache[bkt].lock); + + return -ENOENT; +} + +/** + * cipso_v4_cache_add - Add an entry to the CIPSO cache + * @skb: the packet + * @secattr: the packet's security attributes + * + * Description: + * Add a new entry into the CIPSO label mapping cache. Add the new entry to + * head of the cache bucket's list, if the cache bucket is out of room remove + * the last entry in the list first. It is important to note that there is + * currently no checking for duplicate keys. Returns zero on success, + * negative values on failure. + * + */ +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 bkt; + struct cipso_v4_map_cache_entry *entry = NULL; + struct cipso_v4_map_cache_entry *old_entry = NULL; + unsigned char *cipso_ptr; + u32 cipso_ptr_len; + + if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + return 0; + + cipso_ptr = CIPSO_V4_OPTPTR(skb); + cipso_ptr_len = cipso_ptr[1]; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + return -ENOMEM; + entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); + if (entry->key == NULL) { + ret_val = -ENOMEM; + goto cache_add_failure; + } + memcpy(entry->key, cipso_ptr, cipso_ptr_len); + entry->key_len = cipso_ptr_len; + entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); + entry->lsm_data.free = secattr->cache.free; + entry->lsm_data.data = secattr->cache.data; + + bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock(&cipso_v4_cache[bkt].lock); + if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache[bkt].size += 1; + } else { + old_entry = list_entry(cipso_v4_cache[bkt].list.prev, + struct cipso_v4_map_cache_entry, list); + list_del(&old_entry->list); + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache_entry_free(old_entry); + } + spin_unlock(&cipso_v4_cache[bkt].lock); + + return 0; + +cache_add_failure: + if (entry) + cipso_v4_cache_entry_free(entry); + return ret_val; +} + +/* + * DOI List Functions + */ + +/** + * cipso_v4_doi_search - Searches for a DOI definition + * @doi: the DOI to search for + * + * Description: + * Search the DOI definition list for a DOI definition with a DOI value that + * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). + * Returns a pointer to the DOI definition on success and NULL on failure. + */ +static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) +{ + struct cipso_v4_doi *iter; + + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->doi == doi && iter->valid) + return iter; + return NULL; +} + +/** + * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine + * @doi_def: the DOI structure + * + * Description: + * The caller defines a new DOI for use by the CIPSO engine and calls this + * function to add it to the list of acceptable domains. The caller must + * ensure that the mapping table specified in @doi_def->map meets all of the + * requirements of the mapping type (see cipso_ipv4.h for details). Returns + * zero on success and non-zero on failure. + * + */ +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) + return -EINVAL; + + doi_def->valid = 1; + INIT_RCU_HEAD(&doi_def->rcu); + INIT_LIST_HEAD(&doi_def->dom_list); + + rcu_read_lock(); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_rlock; + spin_lock(&cipso_v4_doi_list_lock); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_slock; + list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; + +doi_add_failure_slock: + spin_unlock(&cipso_v4_doi_list_lock); +doi_add_failure_rlock: + rcu_read_unlock(); + return -EEXIST; +} + +/** + * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine + * @doi: the DOI value + * @callback: the DOI cleanup/free callback + * + * Description: + * Removes a DOI definition from the CIPSO engine, @callback is called to + * free any memory. The NetLabel routines will be called to release their own + * LSM domain mappings as well as our own domain list. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) +{ + struct cipso_v4_doi *doi_def; + struct cipso_v4_domhsh_entry *dom_iter; + + rcu_read_lock(); + if (cipso_v4_doi_search(doi) != NULL) { + spin_lock(&cipso_v4_doi_list_lock); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + return -ENOENT; + } + doi_def->valid = 0; + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) + if (dom_iter->valid) + netlbl_domhsh_remove(dom_iter->domain); + cipso_v4_cache_invalidate(); + rcu_read_unlock(); + + call_rcu(&doi_def->rcu, callback); + return 0; + } + rcu_read_unlock(); + + return -ENOENT; +} + +/** + * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * @doi: the DOI value + * + * Description: + * Searches for a valid DOI definition and if one is found it is returned to + * the caller. Otherwise NULL is returned. The caller must ensure that + * rcu_read_lock() is held while accessing the returned definition. + * + */ +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return cipso_v4_doi_search(doi); +} + +/** + * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff + * @headroom: the amount of headroom to allocate for the sk_buff + * + * Description: + * Dump a list of all the configured DOI values into a sk_buff. The returned + * sk_buff has room at the front of the sk_buff for @headroom bytes. See + * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This + * function may fail if another process is changing the DOI list at the same + * time. Returns a pointer to a sk_buff on success, NULL on error. + * + */ +struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +{ + struct sk_buff *skb = NULL; + struct cipso_v4_doi *iter; + u32 doi_cnt = 0; + ssize_t buf_len; + + buf_len = NETLBL_LEN_U32; + rcu_read_lock(); + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->valid) { + doi_cnt += 1; + buf_len += 2 * NETLBL_LEN_U32; + } + + skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); + if (skb == NULL) + goto doi_dump_all_failure; + + if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) + goto doi_dump_all_failure; + buf_len -= NETLBL_LEN_U32; + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->valid) { + if (buf_len < 2 * NETLBL_LEN_U32) + goto doi_dump_all_failure; + if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) + goto doi_dump_all_failure; + if (nla_put_u32(skb, NLA_U32, iter->type) != 0) + goto doi_dump_all_failure; + buf_len -= 2 * NETLBL_LEN_U32; + } + rcu_read_unlock(); + + return skb; + +doi_dump_all_failure: + rcu_read_unlock(); + kfree(skb); + return NULL; +} + +/** + * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff + * @doi: the DOI value + * @headroom: the amount of headroom to allocate for the sk_buff + * + * Description: + * Lookup the DOI definition matching @doi and dump it's contents into a + * sk_buff. The returned sk_buff has room at the front of the sk_buff for + * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message + * format. This function may fail if another process is changing the DOI list + * at the same time. Returns a pointer to a sk_buff on success, NULL on error. + * + */ +struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) +{ + struct sk_buff *skb = NULL; + struct cipso_v4_doi *iter; + u32 tag_cnt = 0; + u32 lvl_cnt = 0; + u32 cat_cnt = 0; + ssize_t buf_len; + ssize_t tmp; + + rcu_read_lock(); + iter = cipso_v4_doi_getdef(doi); + if (iter == NULL) + goto doi_dump_failure; + buf_len = NETLBL_LEN_U32; + switch (iter->type) { + case CIPSO_V4_MAP_PASS: + buf_len += NETLBL_LEN_U32; + while(tag_cnt < CIPSO_V4_TAG_MAXCNT && + iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { + tag_cnt += 1; + buf_len += NETLBL_LEN_U8; + } + break; + case CIPSO_V4_MAP_STD: + buf_len += 3 * NETLBL_LEN_U32; + while (tag_cnt < CIPSO_V4_TAG_MAXCNT && + iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { + tag_cnt += 1; + buf_len += NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) + if (iter->map.std->lvl.local[tmp] != + CIPSO_V4_INV_LVL) { + lvl_cnt += 1; + buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) + if (iter->map.std->cat.local[tmp] != + CIPSO_V4_INV_CAT) { + cat_cnt += 1; + buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; + } + break; + } + + skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); + if (skb == NULL) + goto doi_dump_failure; + + if (nla_put_u32(skb, NLA_U32, iter->type) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32; + if (iter != cipso_v4_doi_getdef(doi)) + goto doi_dump_failure; + switch (iter->type) { + case CIPSO_V4_MAP_PASS: + if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32; + for (tmp = 0; + tmp < CIPSO_V4_TAG_MAXCNT && + iter->tags[tmp] != CIPSO_V4_TAG_INVALID; + tmp++) { + if (buf_len < NETLBL_LEN_U8) + goto doi_dump_failure; + if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U8; + } + break; + case CIPSO_V4_MAP_STD: + if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) + goto doi_dump_failure; + buf_len -= 3 * NETLBL_LEN_U32; + for (tmp = 0; + tmp < CIPSO_V4_TAG_MAXCNT && + iter->tags[tmp] != CIPSO_V4_TAG_INVALID; + tmp++) { + if (buf_len < NETLBL_LEN_U8) + goto doi_dump_failure; + if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) + if (iter->map.std->lvl.local[tmp] != + CIPSO_V4_INV_LVL) { + if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, tmp) != 0) + goto doi_dump_failure; + if (nla_put_u8(skb, + NLA_U8, + iter->map.std->lvl.local[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) + if (iter->map.std->cat.local[tmp] != + CIPSO_V4_INV_CAT) { + if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, tmp) != 0) + goto doi_dump_failure; + if (nla_put_u16(skb, + NLA_U16, + iter->map.std->cat.local[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; + } + break; + } + rcu_read_unlock(); + + return skb; + +doi_dump_failure: + rcu_read_unlock(); + kfree(skb); + return NULL; +} + +/** + * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to add + * + * Description: + * Adds the @domain to the the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). This function + * does allocate memory. Returns zero on success, negative values on failure. + * + */ +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + struct cipso_v4_domhsh_entry *new_dom; + + new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); + if (new_dom == NULL) + return -ENOMEM; + if (domain) { + new_dom->domain = kstrdup(domain, GFP_KERNEL); + if (new_dom->domain == NULL) { + kfree(new_dom); + return -ENOMEM; + } + } + new_dom->valid = 1; + INIT_RCU_HEAD(&new_dom->rcu); + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + kfree(new_dom->domain); + kfree(new_dom); + return -EEXIST; + } + list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; +} + +/** + * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to remove + * + * Description: + * Removes the @domain from the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). Returns zero + * on success and negative values on error. + * + */ +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + iter->valid = 0; + list_del_rcu(&iter->list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); + + return 0; + } + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return -ENOENT; +} + +/* + * Label Mapping Functions + */ + +/** + * cipso_v4_map_lvl_valid - Checks to see if the given level is understood + * @doi_def: the DOI definition + * @level: the level to check + * + * Description: + * Checks the given level against the given DOI definition and returns a + * negative value if the level does not have a valid mapping and a zero value + * if the level is defined by the DOI. + * + */ +static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network + * @doi_def: the DOI definition + * @host_lvl: the host MLS level + * @net_lvl: the network/CIPSO MLS level + * + * Description: + * Perform a label mapping to translate a local MLS level to the correct + * CIPSO level using the given DOI definition. Returns zero on success, + * negative values otherwise. + * + */ +static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, + u32 host_lvl, + u32 *net_lvl) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *net_lvl = host_lvl; + return 0; + case CIPSO_V4_MAP_STD: + if (host_lvl < doi_def->map.std->lvl.local_size) { + *net_lvl = doi_def->map.std->lvl.local[host_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host + * @doi_def: the DOI definition + * @net_lvl: the network/CIPSO MLS level + * @host_lvl: the host MLS level + * + * Description: + * Perform a label mapping to translate a CIPSO level to the correct local MLS + * level using the given DOI definition. Returns zero on success, negative + * values otherwise. + * + */ +static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, + u32 net_lvl, + u32 *host_lvl) +{ + struct cipso_v4_std_map_tbl *map_tbl; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *host_lvl = net_lvl; + return 0; + case CIPSO_V4_MAP_STD: + map_tbl = doi_def->map.std; + if (net_lvl < map_tbl->lvl.cipso_size && + map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { + *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid + * @doi_def: the DOI definition + * @bitmap: category bitmap + * @bitmap_len: bitmap length in bytes + * + * Description: + * Checks the given category bitmap against the given DOI definition and + * returns a negative value if any of the categories in the bitmap do not have + * a valid mapping and a zero value if all of the categories are valid. + * + */ +static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, + const unsigned char *bitmap, + u32 bitmap_len) +{ + int cat = -1; + u32 bitmap_len_bits = bitmap_len * 8; + u32 cipso_cat_size = doi_def->map.std->cat.cipso_size; + u32 *cipso_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + for (;;) { + cat = cipso_v4_bitmap_walk(bitmap, + bitmap_len_bits, + cat + 1, + 1); + if (cat < 0) + break; + if (cat >= cipso_cat_size || + cipso_array[cat] >= CIPSO_V4_INV_CAT) + return -EFAULT; + } + + if (cat == -1) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @host_cat: the category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * @net_cat: the zero'd out category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CIPSO bitmap using the given DOI definition. Returns the minimum + * size in bytes of the network bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, + const unsigned char *host_cat, + u32 host_cat_len, + unsigned char *net_cat, + u32 net_cat_len) +{ + int host_spot = -1; + u32 net_spot; + u32 net_spot_max = 0; + u32 host_clen_bits = host_cat_len * 8; + u32 net_clen_bits = net_cat_len * 8; + u32 host_cat_size = doi_def->map.std->cat.local_size; + u32 *host_cat_array = doi_def->map.std->cat.local; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + net_spot_max = host_cat_len - 1; + while (net_spot_max > 0 && host_cat[net_spot_max] == 0) + net_spot_max--; + if (net_spot_max > net_cat_len) + return -EINVAL; + memcpy(net_cat, host_cat, net_spot_max); + return net_spot_max; + case CIPSO_V4_MAP_STD: + for (;;) { + host_spot = cipso_v4_bitmap_walk(host_cat, + host_clen_bits, + host_spot + 1, + 1); + if (host_spot < 0) + break; + if (host_spot >= host_cat_size) + return -EPERM; + + net_spot = host_cat_array[host_spot]; + if (net_spot >= net_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(net_cat, net_spot, 1); + + if (net_spot > net_spot_max) + net_spot_max = net_spot; + } + + if (host_spot == -2) + return -EFAULT; + + if (++net_spot_max % 8) + return net_spot_max / 8 + 1; + return net_spot_max / 8; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * @host_cat: the zero'd out category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * + * Description: + * Perform a label mapping to translate a CIPSO bitmap to the correct local + * MLS category bitmap using the given DOI definition. Returns the minimum + * size in bytes of the host bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + unsigned char *host_cat, + u32 host_cat_len) +{ + u32 host_spot; + u32 host_spot_max = 0; + int net_spot = -1; + u32 net_clen_bits = net_cat_len * 8; + u32 host_clen_bits = host_cat_len * 8; + u32 net_cat_size = doi_def->map.std->cat.cipso_size; + u32 *net_cat_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + if (net_cat_len > host_cat_len) + return -EINVAL; + memcpy(host_cat, net_cat, net_cat_len); + return net_cat_len; + case CIPSO_V4_MAP_STD: + for (;;) { + net_spot = cipso_v4_bitmap_walk(net_cat, + net_clen_bits, + net_spot + 1, + 1); + if (net_spot < 0) + break; + if (net_spot >= net_cat_size || + net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) + return -EPERM; + + host_spot = net_cat_array[net_spot]; + if (host_spot >= host_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(host_cat, host_spot, 1); + + if (host_spot > host_spot_max) + host_spot_max = host_spot; + } + + if (net_spot == -2) + return -EFAULT; + + if (++host_spot_max % 8) + return host_spot_max / 8 + 1; + return host_spot_max / 8; + } + + return -EINVAL; +} + +/* + * Protocol Handling Functions + */ + +#define CIPSO_V4_HDR_LEN 6 + +/** + * cipso_v4_gentag_hdr - Generate a CIPSO option header + * @doi_def: the DOI definition + * @len: the total tag length in bytes + * @buf: the CIPSO option buffer + * + * Description: + * Write a CIPSO header into the beginning of @buffer. Return zero on success, + * negative values on failure. + * + */ +static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, + u32 len, + unsigned char *buf) +{ + if (CIPSO_V4_HDR_LEN + len > 40) + return -ENOSPC; + + buf[0] = IPOPT_CIPSO; + buf[1] = CIPSO_V4_HDR_LEN + len; + *(u32 *)&buf[2] = htonl(doi_def->doi); + + return 0; +} + +#define CIPSO_V4_TAG1_CAT_LEN 30 + +/** + * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The + * actual buffer length may be larger than the indicated size due to + * translation between host and network category bitmaps. Returns zero on + * success, negative values on failure. + * + */ +static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char **buffer, + u32 *buffer_len) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 level; + + if (secattr->mls_cat) { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, + GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_hton(doi_def, + secattr->mls_cat, + secattr->mls_cat_len, + &buf[CIPSO_V4_HDR_LEN + 4], + CIPSO_V4_TAG1_CAT_LEN); + if (ret_val < 0) + goto gentag_failure; + + /* This will send packets using the "optimized" format when + * possibile as specified in section 3.4.2.6 of the + * CIPSO draft. */ + if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) + ret_val = 10; + + buf_len = 4 + ret_val; + } else { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + buf_len = 4; + } + + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + if (ret_val != 0) + goto gentag_failure; + + ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); + if (ret_val != 0) + goto gentag_failure; + + buf[CIPSO_V4_HDR_LEN] = 0x01; + buf[CIPSO_V4_HDR_LEN + 1] = buf_len; + buf[CIPSO_V4_HDR_LEN + 3] = level; + + *buffer = buf; + *buffer_len = CIPSO_V4_HDR_LEN + buf_len; + + return 0; + +gentag_failure: + kfree(buf); + return ret_val; +} + +/** + * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security + * attributes in @secattr. Return zero on success, negatives values on + * failure. + * + */ +static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u8 tag_len = tag[1]; + u32 level; + + ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); + if (ret_val != 0) + return ret_val; + secattr->mls_lvl = level; + secattr->mls_lvl_vld = 1; + + if (tag_len > 4) { + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + secattr->mls_cat_len = tag_len - 4; + break; + case CIPSO_V4_MAP_STD: + secattr->mls_cat_len = + doi_def->map.std->cat.local_size; + break; + } + secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); + if (secattr->mls_cat == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, + &tag[4], + tag_len - 4, + secattr->mls_cat, + secattr->mls_cat_len); + if (ret_val < 0) { + kfree(secattr->mls_cat); + return ret_val; + } + secattr->mls_cat_len = ret_val; + } + + return 0; +} + +/** + * cipso_v4_validate - Validate a CIPSO option + * @option: the start of the option, on error it is set to point to the error + * + * Description: + * This routine is called to validate a CIPSO option, it checks all of the + * fields to ensure that they are at least valid, see the draft snippet below + * for details. If the option is valid then a zero value is returned and + * the value of @option is unchanged. If the option is invalid then a + * non-zero value is returned and @option is adjusted to point to the + * offending portion of the option. From the IETF draft ... + * + * "If any field within the CIPSO options, such as the DOI identifier, is not + * recognized the IP datagram is discarded and an ICMP 'parameter problem' + * (type 12) is generated and returned. The ICMP code field is set to 'bad + * parameter' (code 0) and the pointer is set to the start of the CIPSO field + * that is unrecognized." + * + */ +int cipso_v4_validate(unsigned char **option) +{ + unsigned char *opt = *option; + unsigned char *tag; + unsigned char opt_iter; + unsigned char err_offset = 0; + u8 opt_len; + u8 tag_len; + struct cipso_v4_doi *doi_def = NULL; + u32 tag_iter; + + /* caller already checks for length values that are too large */ + opt_len = opt[1]; + if (opt_len < 8) { + err_offset = 1; + goto validate_return; + } + + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); + if (doi_def == NULL) { + err_offset = 2; + goto validate_return_locked; + } + + opt_iter = 6; + tag = opt + opt_iter; + while (opt_iter < opt_len) { + for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) + if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || + ++tag_iter == CIPSO_V4_TAG_MAXCNT) { + err_offset = opt_iter; + goto validate_return_locked; + } + + tag_len = tag[1]; + if (tag_len > (opt_len - opt_iter)) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + switch (tag[0]) { + case CIPSO_V4_TAG_RBITMAP: + if (tag_len < 4) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + /* We are already going to do all the verification + * necessary at the socket layer so from our point of + * view it is safe to turn these checks off (and less + * work), however, the CIPSO draft says we should do + * all the CIPSO validations here but it doesn't + * really specify _exactly_ what we need to validate + * ... so, just make it a sysctl tunable. */ + if (cipso_v4_rbm_strictvalid) { + if (cipso_v4_map_lvl_valid(doi_def, + tag[3]) < 0) { + err_offset = opt_iter + 3; + goto validate_return_locked; + } + if (tag_len > 4 && + cipso_v4_map_cat_rbm_valid(doi_def, + &tag[4], + tag_len - 4) < 0) { + err_offset = opt_iter + 4; + goto validate_return_locked; + } + } + break; + default: + err_offset = opt_iter; + goto validate_return_locked; + } + + tag += tag_len; + opt_iter += tag_len; + } + +validate_return_locked: + rcu_read_unlock(); +validate_return: + *option = opt + err_offset; + return err_offset; +} + +/** + * cipso_v4_error - Send the correct reponse for a bad packet + * @skb: the packet + * @error: the error code + * @gateway: CIPSO gateway flag + * + * Description: + * Based on the error code given in @error, send an ICMP error message back to + * the originating host. From the IETF draft ... + * + * "If the contents of the CIPSO [option] are valid but the security label is + * outside of the configured host or port label range, the datagram is + * discarded and an ICMP 'destination unreachable' (type 3) is generated and + * returned. The code field of the ICMP is set to 'communication with + * destination network administratively prohibited' (code 9) or to + * 'communication with destination host administratively prohibited' + * (code 10). The value of the code is dependent on whether the originator + * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The + * recipient of the ICMP message MUST be able to handle either value. The + * same procedure is performed if a CIPSO [option] can not be added to an + * IP packet because it is too large to fit in the IP options area." + * + * "If the error is triggered by receipt of an ICMP message, the message is + * discarded and no response is permitted (consistent with general ICMP + * processing rules)." + * + */ +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) +{ + if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) + return; + + if (gateway) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); +} + +/** + * cipso_v4_socket_setattr - Add a CIPSO option to a socket + * @sock: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sock->sk, which means it either needs to be in the + * process of being created or locked via lock_sock(sock->sk). Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 iter; + unsigned char *buf = NULL; + u32 buf_len = 0; + u32 opt_len; + struct ip_options *opt = NULL; + struct sock *sk; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + sk = sock->sk; + if (sk == NULL) + return 0; + + /* XXX - This code assumes only one tag per CIPSO option which isn't + * really a good assumption to make but since we only support the MAC + * tags right now it is a safe assumption. */ + iter = 0; + do { + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_gentag_rbm(doi_def, + secattr, + &buf, + &buf_len); + break; + default: + ret_val = -EPERM; + goto socket_setattr_failure; + } + + iter++; + } while (ret_val != 0 && + iter < CIPSO_V4_TAG_MAXCNT && + doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); + if (ret_val != 0) + goto socket_setattr_failure; + + /* We can't use ip_options_get() directly because it makes a call to + * ip_options_get_alloc() which allocates memory with GFP_KERNEL and + * we can't block here. */ + opt_len = (buf_len + 3) & ~3; + opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); + if (opt == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + memcpy(opt->__data, buf, buf_len); + opt->optlen = opt_len; + opt->is_data = 1; + kfree(buf); + buf = NULL; + ret_val = ip_options_compile(opt, NULL); + if (ret_val != 0) + goto socket_setattr_failure; + + sk_inet = inet_sk(sk); + if (sk_inet->is_icsk) { + sk_conn = inet_csk(sk); + if (sk_inet->opt) + sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; + sk_conn->icsk_ext_hdr_len += opt->optlen; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } + opt = xchg(&sk_inet->opt, opt); + kfree(opt); + + return 0; + +socket_setattr_failure: + kfree(buf); + kfree(opt); + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + struct sock *sk; + struct inet_sock *sk_inet; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + sk = sock->sk; + lock_sock(sk); + sk_inet = inet_sk(sk); + if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) + goto socket_getattr_return; + cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - + sizeof(struct iphdr); + ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); + if (ret_val == 0) + goto socket_getattr_return; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) { + rcu_read_unlock(); + goto socket_getattr_return; + } + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + rcu_read_unlock(); + +socket_getattr_return: + release_sock(sk); + return ret_val; +} + +/** + * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Parse the given packet's CIPSO option and return the security attributes. + * Returns zero on success and negative values on failure. + * + */ +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + if (!CIPSO_V4_OPTEXIST(skb)) + return -ENOMSG; + cipso_ptr = CIPSO_V4_OPTPTR(skb); + if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) + return 0; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) + goto skbuff_getattr_return; + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + +skbuff_getattr_return: + rcu_read_unlock(); + return ret_val; +} + +/* + * Setup Functions + */ + +/** + * cipso_v4_init - Initialize the CIPSO module + * + * Description: + * Initialize the CIPSO module and prepare it for use. Returns zero on success + * and negative values on failure. + * + */ +static int __init cipso_v4_init(void) +{ + int ret_val; + + ret_val = cipso_v4_cache_init(); + if (ret_val != 0) + panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", + ret_val); + + return 0; +} + +subsys_initcall(cipso_v4_init); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 70cea9d08a38..19b2071ff319 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -17,6 +17,7 @@ #include #include #include +#include /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -697,6 +698,40 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#ifdef CONFIG_NETLABEL + { + .ctl_name = NET_CIPSOV4_CACHE_ENABLE, + .procname = "cipso_cache_enable", + .data = &cipso_v4_cache_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, + .procname = "cipso_cache_bucket_size", + .data = &cipso_v4_cache_bucketsize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_OPTFMT, + .procname = "cipso_rbm_optfmt", + .data = &cipso_v4_rbm_optfmt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, + .procname = "cipso_rbm_strictvalid", + .data = &cipso_v4_rbm_strictvalid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* CONFIG_NETLABEL */ { .ctl_name = 0 } }; -- cgit v1.2.3-71-gd317 From 7420ed23a4f77480b5b7b3245e5da30dd24b7575 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:17:57 -0700 Subject: [NetLabel]: SELinux support Add NetLabel support to the SELinux LSM and modify the socket_post_create() LSM hook to return an error code. The most significant part of this patch is the addition of NetLabel hooks into the following SELinux LSM hooks: * selinux_file_permission() * selinux_socket_sendmsg() * selinux_socket_post_create() * selinux_socket_sock_rcv_skb() * selinux_socket_getpeersec_stream() * selinux_socket_getpeersec_dgram() * selinux_sock_graft() * selinux_inet_conn_request() The basic reasoning behind this patch is that outgoing packets are "NetLabel'd" by labeling their socket and the NetLabel security attributes are checked via the additional hook in selinux_socket_sock_rcv_skb(). NetLabel itself is only a labeling mechanism, similar to filesystem extended attributes, it is up to the SELinux enforcement mechanism to perform the actual access checks. In addition to the changes outlined above this patch also includes some changes to the extended bitmap (ebitmap) and multi-level security (mls) code to import and export SELinux TE/MLS attributes into and out of NetLabel. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/security.h | 25 +- net/socket.c | 13 +- security/dummy.c | 6 +- security/selinux/hooks.c | 56 +++- security/selinux/include/objsec.h | 8 + security/selinux/include/selinux_netlabel.h | 125 +++++++ security/selinux/ss/ebitmap.c | 144 ++++++++ security/selinux/ss/ebitmap.h | 6 + security/selinux/ss/mls.c | 156 +++++++++ security/selinux/ss/mls.h | 21 ++ security/selinux/ss/services.c | 488 ++++++++++++++++++++++++++++ 11 files changed, 1020 insertions(+), 28 deletions(-) create mode 100644 security/selinux/include/selinux_netlabel.h (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index bb4c80fdfe7a..9f56fb8a4a6c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1341,8 +1341,8 @@ struct security_operations { int (*unix_may_send) (struct socket * sock, struct socket * other); int (*socket_create) (int family, int type, int protocol, int kern); - void (*socket_post_create) (struct socket * sock, int family, - int type, int protocol, int kern); + int (*socket_post_create) (struct socket * sock, int family, + int type, int protocol, int kern); int (*socket_bind) (struct socket * sock, struct sockaddr * address, int addrlen); int (*socket_connect) (struct socket * sock, @@ -2824,13 +2824,13 @@ static inline int security_socket_create (int family, int type, return security_ops->socket_create(family, type, protocol, kern); } -static inline void security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline int security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { - security_ops->socket_post_create(sock, family, type, - protocol, kern); + return security_ops->socket_post_create(sock, family, type, + protocol, kern); } static inline int security_socket_bind(struct socket * sock, @@ -2982,11 +2982,12 @@ static inline int security_socket_create (int family, int type, return 0; } -static inline void security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline int security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { + return 0; } static inline int security_socket_bind(struct socket * sock, diff --git a/net/socket.c b/net/socket.c index 6d261bf206fc..6756e57e1ff0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -973,11 +973,18 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) goto out; } - security_socket_post_create(sock, family, type, protocol, 1); sock->type = type; + err = security_socket_post_create(sock, family, type, protocol, 1); + if (err) + goto out_release; + out: *res = sock; return err; +out_release: + sock_release(sock); + sock = NULL; + goto out; } /* No kernel lock held - perfect */ @@ -1214,7 +1221,9 @@ static int __sock_create(int family, int type, int protocol, struct socket **res */ module_put(net_families[family]->owner); *res = sock; - security_socket_post_create(sock, family, type, protocol, kern); + err = security_socket_post_create(sock, family, type, protocol, kern); + if (err) + goto out_release; out: net_family_read_unlock(); diff --git a/security/dummy.c b/security/dummy.c index 1c45f8e4aad1..aeee70565509 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -709,10 +709,10 @@ static int dummy_socket_create (int family, int type, return 0; } -static void dummy_socket_post_create (struct socket *sock, int family, int type, - int protocol, int kern) +static int dummy_socket_post_create (struct socket *sock, int family, int type, + int protocol, int kern) { - return; + return 0; } static int dummy_socket_bind (struct socket *sock, struct sockaddr *address, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 33028b3b19ce..2a6bbb921e1e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -12,6 +12,8 @@ * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * + * Copyright (C) 2006 Hewlett-Packard Development Company, L.P. + * Paul Moore, * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -74,6 +76,7 @@ #include "objsec.h" #include "netif.h" #include "xfrm.h" +#include "selinux_netlabel.h" #define XATTR_SELINUX_SUFFIX "selinux" #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX @@ -2395,6 +2398,7 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t static int selinux_file_permission(struct file *file, int mask) { + int rc; struct inode *inode = file->f_dentry->d_inode; if (!mask) { @@ -2406,8 +2410,12 @@ static int selinux_file_permission(struct file *file, int mask) if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) mask |= MAY_APPEND; - return file_has_perm(current, file, - file_mask_to_av(inode->i_mode, mask)); + rc = file_has_perm(current, file, + file_mask_to_av(inode->i_mode, mask)); + if (rc) + return rc; + + return selinux_netlbl_inode_permission(inode, mask); } static int selinux_file_alloc_security(struct file *file) @@ -3058,9 +3066,10 @@ out: return err; } -static void selinux_socket_post_create(struct socket *sock, int family, - int type, int protocol, int kern) +static int selinux_socket_post_create(struct socket *sock, int family, + int type, int protocol, int kern) { + int err = 0; struct inode_security_struct *isec; struct task_security_struct *tsec; struct sk_security_struct *sksec; @@ -3077,9 +3086,12 @@ static void selinux_socket_post_create(struct socket *sock, int family, if (sock->sk) { sksec = sock->sk->sk_security; sksec->sid = isec->sid; + err = selinux_netlbl_socket_post_create(sock, + family, + isec->sid); } - return; + return err; } /* Range of port numbers used to automatically bind. @@ -3260,7 +3272,13 @@ static int selinux_socket_accept(struct socket *sock, struct socket *newsock) static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { - return socket_has_perm(current, sock, SOCKET__WRITE); + int rc; + + rc = socket_has_perm(current, sock, SOCKET__WRITE); + if (rc) + return rc; + + return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE); } static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, @@ -3468,6 +3486,10 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; + err = selinux_netlbl_sock_rcv_skb(sksec, skb, &ad); + if (err) + goto out; + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); out: return err; @@ -3491,8 +3513,9 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op peer_sid = ssec->peer_sid; } else if (isec->sclass == SECCLASS_TCP_SOCKET) { - peer_sid = selinux_socket_getpeer_stream(sock->sk); - + peer_sid = selinux_netlbl_socket_getpeersec_stream(sock); + if (peer_sid == SECSID_NULL) + peer_sid = selinux_socket_getpeer_stream(sock->sk); if (peer_sid == SECSID_NULL) { err = -ENOPROTOOPT; goto out; @@ -3532,8 +3555,11 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff * if (sock && (sock->sk->sk_family == PF_UNIX)) selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid); - else if (skb) - peer_secid = selinux_socket_getpeer_dgram(skb); + else if (skb) { + peer_secid = selinux_netlbl_socket_getpeersec_dgram(skb); + if (peer_secid == SECSID_NULL) + peer_secid = selinux_socket_getpeer_dgram(skb); + } if (peer_secid == SECSID_NULL) err = -EINVAL; @@ -3578,6 +3604,8 @@ void selinux_sock_graft(struct sock* sk, struct socket *parent) struct sk_security_struct *sksec = sk->sk_security; isec->sid = sksec->sid; + + selinux_netlbl_sock_graft(sk, parent); } int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, @@ -3585,9 +3613,15 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, { struct sk_security_struct *sksec = sk->sk_security; int err; - u32 newsid = 0; + u32 newsid; u32 peersid; + newsid = selinux_netlbl_inet_conn_request(skb, sksec->sid); + if (newsid != SECSID_NULL) { + req->secid = newsid; + return 0; + } + err = selinux_xfrm_decode_session(skb, &peersid, 0); BUG_ON(err); diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 79b9e0af19a0..0a39bfd1319f 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -101,6 +101,14 @@ struct sk_security_struct { struct sock *sk; /* back pointer to sk object */ u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ +#ifdef CONFIG_NETLABEL + u16 sclass; /* sock security class */ + enum { /* NetLabel state */ + NLBL_UNSET = 0, + NLBL_REQUIRE, + NLBL_LABELED, + } nlbl_state; +#endif }; struct key_security_struct { diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h new file mode 100644 index 000000000000..88c463eef1e1 --- /dev/null +++ b/security/selinux/include/selinux_netlabel.h @@ -0,0 +1,125 @@ +/* + * SELinux interface to the NetLabel subsystem + * + * Author : Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _SELINUX_NETLABEL_H_ +#define _SELINUX_NETLABEL_H_ + +#ifdef CONFIG_NETLABEL +void selinux_netlbl_cache_invalidate(void); +int selinux_netlbl_socket_post_create(struct socket *sock, + int sock_family, + u32 sid); +void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); +u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid); +int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, + struct sk_buff *skb, + struct avc_audit_data *ad); +u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock); +u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb); + +int __selinux_netlbl_inode_permission(struct inode *inode, int mask); +/** + * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled + * @inode: the file descriptor's inode + * @mask: the permission mask + * + * Description: + * Looks at a file's inode and if it is marked as a socket protected by + * NetLabel then verify that the socket has been labeled, if not try to label + * the socket now with the inode's SID. Returns zero on success, negative + * values on failure. + * + */ +static inline int selinux_netlbl_inode_permission(struct inode *inode, + int mask) +{ + int rc = 0; + struct inode_security_struct *isec; + struct sk_security_struct *sksec; + + if (!S_ISSOCK(inode->i_mode)) + return 0; + + isec = inode->i_security; + sksec = SOCKET_I(inode)->sk->sk_security; + down(&isec->sem); + if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && + (mask & (MAY_WRITE | MAY_APPEND)))) + rc = __selinux_netlbl_inode_permission(inode, mask); + up(&isec->sem); + + return rc; +} +#else +static inline void selinux_netlbl_cache_invalidate(void) +{ + return; +} + +static inline int selinux_netlbl_socket_post_create(struct socket *sock, + int sock_family, + u32 sid) +{ + return 0; +} + +static inline void selinux_netlbl_sock_graft(struct sock *sk, + struct socket *sock) +{ + return; +} + +static inline u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, + u32 sock_sid) +{ + return SECSID_NULL; +} + +static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, + struct sk_buff *skb, + struct avc_audit_data *ad) +{ + return 0; +} + +static inline u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) +{ + return SECSID_NULL; +} + +static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) +{ + return SECSID_NULL; +} + +static inline int selinux_netlbl_inode_permission(struct inode *inode, + int mask) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +#endif diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 47024a6e1844..4b915eb60c45 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -3,6 +3,14 @@ * * Author : Stephen Smalley, */ +/* + * Updated: Hewlett-Packard + * + * Added ebitmap_export() and ebitmap_import() + * + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + */ + #include #include #include @@ -59,6 +67,142 @@ int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src) return 0; } +/** + * ebitmap_export - Export an ebitmap to a unsigned char bitmap string + * @src: the ebitmap to export + * @dst: the resulting bitmap string + * @dst_len: length of dst in bytes + * + * Description: + * Allocate a buffer at least src->highbit bits long and export the extensible + * bitmap into the buffer. The bitmap string will be in little endian format, + * i.e. LSB first. The value returned in dst_len may not the true size of the + * buffer as the length of the buffer is rounded up to a multiple of MAPTYPE. + * The caller must free the buffer when finished. Returns zero on success, + * negative values on failure. + * + */ +int ebitmap_export(const struct ebitmap *src, + unsigned char **dst, + size_t *dst_len) +{ + size_t bitmap_len; + unsigned char *bitmap; + struct ebitmap_node *iter_node; + MAPTYPE node_val; + size_t bitmap_byte; + unsigned char bitmask; + + bitmap_len = src->highbit / 8; + if (src->highbit % 7) + bitmap_len += 1; + if (bitmap_len == 0) + return -EINVAL; + + bitmap = kzalloc((bitmap_len & ~(sizeof(MAPTYPE) - 1)) + + sizeof(MAPTYPE), + GFP_ATOMIC); + if (bitmap == NULL) + return -ENOMEM; + + iter_node = src->node; + do { + bitmap_byte = iter_node->startbit / 8; + bitmask = 0x80; + node_val = iter_node->map; + do { + if (bitmask == 0) { + bitmap_byte++; + bitmask = 0x80; + } + if (node_val & (MAPTYPE)0x01) + bitmap[bitmap_byte] |= bitmask; + node_val >>= 1; + bitmask >>= 1; + } while (node_val > 0); + iter_node = iter_node->next; + } while (iter_node); + + *dst = bitmap; + *dst_len = bitmap_len; + return 0; +} + +/** + * ebitmap_import - Import an unsigned char bitmap string into an ebitmap + * @src: the bitmap string + * @src_len: the bitmap length in bytes + * @dst: the empty ebitmap + * + * Description: + * This function takes a little endian bitmap string in src and imports it into + * the ebitmap pointed to by dst. Returns zero on success, negative values on + * failure. + * + */ +int ebitmap_import(const unsigned char *src, + size_t src_len, + struct ebitmap *dst) +{ + size_t src_off = 0; + struct ebitmap_node *node_new; + struct ebitmap_node *node_last = NULL; + size_t iter; + size_t iter_bit; + size_t iter_limit; + unsigned char src_byte; + + do { + iter_limit = src_len - src_off; + if (iter_limit >= sizeof(MAPTYPE)) { + if (*(MAPTYPE *)&src[src_off] == 0) { + src_off += sizeof(MAPTYPE); + continue; + } + iter_limit = sizeof(MAPTYPE); + } else { + iter = src_off; + src_byte = 0; + do { + src_byte |= src[iter++]; + } while (iter < src_len && src_byte == 0); + if (src_byte == 0) + break; + } + + node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC); + if (unlikely(node_new == NULL)) { + ebitmap_destroy(dst); + return -ENOMEM; + } + node_new->startbit = src_off * 8; + iter = 0; + do { + src_byte = src[src_off++]; + iter_bit = iter++ * 8; + while (src_byte != 0) { + if (src_byte & 0x80) + node_new->map |= MAPBIT << iter_bit; + iter_bit++; + src_byte <<= 1; + } + } while (iter < iter_limit); + + if (node_last != NULL) + node_last->next = node_new; + else + dst->node = node_new; + node_last = node_new; + } while (src_off < src_len); + + if (likely(node_last != NULL)) + dst->highbit = node_last->startbit + MAPSIZE; + else + ebitmap_init(dst); + + return 0; +} + int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) { struct ebitmap_node *n1, *n2; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 8bf41055a6cb..da2d4651b10d 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -69,6 +69,12 @@ static inline int ebitmap_node_get_bit(struct ebitmap_node * n, int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); +int ebitmap_export(const struct ebitmap *src, + unsigned char **dst, + size_t *dst_len); +int ebitmap_import(const unsigned char *src, + size_t src_len, + struct ebitmap *dst); int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index e15f7e0399b8..119bd6078ba1 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -10,6 +10,13 @@ * * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ +/* + * Updated: Hewlett-Packard + * + * Added support to import/export the MLS label + * + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + */ #include #include @@ -565,3 +572,152 @@ int mls_compute_sid(struct context *scontext, return -EINVAL; } +/** + * mls_export_lvl - Export the MLS sensitivity levels + * @context: the security context + * @low: the low sensitivity level + * @high: the high sensitivity level + * + * Description: + * Given the security context copy the low MLS sensitivity level into lvl_low + * and the high sensitivity level in lvl_high. The MLS levels are only + * exported if the pointers are not NULL, if they are NULL then that level is + * not exported. + * + */ +void mls_export_lvl(const struct context *context, u32 *low, u32 *high) +{ + if (!selinux_mls_enabled) + return; + + if (low != NULL) + *low = context->range.level[0].sens - 1; + if (high != NULL) + *high = context->range.level[1].sens - 1; +} + +/** + * mls_import_lvl - Import the MLS sensitivity levels + * @context: the security context + * @low: the low sensitivity level + * @high: the high sensitivity level + * + * Description: + * Given the security context and the two sensitivty levels, set the MLS levels + * in the context according the two given as parameters. Returns zero on + * success, negative values on failure. + * + */ +void mls_import_lvl(struct context *context, u32 low, u32 high) +{ + if (!selinux_mls_enabled) + return; + + context->range.level[0].sens = low + 1; + context->range.level[1].sens = high + 1; +} + +/** + * mls_export_cat - Export the MLS categories + * @context: the security context + * @low: the low category + * @low_len: length of the cat_low bitmap in bytes + * @high: the high category + * @high_len: length of the cat_high bitmap in bytes + * + * Description: + * Given the security context export the low MLS category bitmap into cat_low + * and the high category bitmap into cat_high. The MLS categories are only + * exported if the pointers are not NULL, if they are NULL then that level is + * not exported. The caller is responsibile for freeing the memory when + * finished. Returns zero on success, negative values on failure. + * + */ +int mls_export_cat(const struct context *context, + unsigned char **low, + size_t *low_len, + unsigned char **high, + size_t *high_len) +{ + int rc = -EPERM; + + if (!selinux_mls_enabled) + return 0; + + if (low != NULL) { + rc = ebitmap_export(&context->range.level[0].cat, + low, + low_len); + if (rc != 0) + goto export_cat_failure; + } + if (high != NULL) { + rc = ebitmap_export(&context->range.level[1].cat, + high, + high_len); + if (rc != 0) + goto export_cat_failure; + } + + return 0; + +export_cat_failure: + if (low != NULL) + kfree(*low); + if (high != NULL) + kfree(*high); + return rc; +} + +/** + * mls_import_cat - Import the MLS categories + * @context: the security context + * @low: the low category + * @low_len: length of the cat_low bitmap in bytes + * @high: the high category + * @high_len: length of the cat_high bitmap in bytes + * + * Description: + * Given the security context and the two category bitmap strings import the + * categories into the security context. The MLS categories are only imported + * if the pointers are not NULL, if they are NULL they are skipped. Returns + * zero on success, negative values on failure. + * + */ +int mls_import_cat(struct context *context, + const unsigned char *low, + size_t low_len, + const unsigned char *high, + size_t high_len) +{ + int rc = -EPERM; + + if (!selinux_mls_enabled) + return 0; + + if (low != NULL) { + rc = ebitmap_import(low, + low_len, + &context->range.level[0].cat); + if (rc != 0) + goto import_cat_failure; + } + if (high != NULL) { + if (high == low) + rc = ebitmap_cpy(&context->range.level[1].cat, + &context->range.level[0].cat); + else + rc = ebitmap_import(high, + high_len, + &context->range.level[1].cat); + if (rc != 0) + goto import_cat_failure; + } + + return 0; + +import_cat_failure: + ebitmap_destroy(&context->range.level[0].cat); + ebitmap_destroy(&context->range.level[1].cat); + return rc; +} diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 90c5e88987fa..df6032c6d492 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -10,6 +10,13 @@ * * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ +/* + * Updated: Hewlett-Packard + * + * Added support to import/export the MLS label + * + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + */ #ifndef _SS_MLS_H_ #define _SS_MLS_H_ @@ -62,5 +69,19 @@ int mls_compute_sid(struct context *scontext, int mls_setup_user_range(struct context *fromcon, struct user_datum *user, struct context *usercon); +void mls_export_lvl(const struct context *context, u32 *low, u32 *high); +void mls_import_lvl(struct context *context, u32 low, u32 high); + +int mls_export_cat(const struct context *context, + unsigned char **low, + size_t *low_len, + unsigned char **high, + size_t *high_len); +int mls_import_cat(struct context *context, + const unsigned char *low, + size_t low_len, + const unsigned char *high, + size_t high_len); + #endif /* _SS_MLS_H */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b00ec69f0ffd..910afa1ffc31 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -13,6 +13,11 @@ * * Added conditional policy language extensions * + * Updated: Hewlett-Packard + * + * Added support for NetLabel + * + * Copyright (C) 2006 Hewlett-Packard Development Company, L.P. * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. * Copyright (C) 2003 - 2004 Tresys Technology, LLC * Copyright (C) 2003 Red Hat, Inc., James Morris @@ -29,6 +34,8 @@ #include #include #include +#include +#include #include "flask.h" #include "avc.h" @@ -40,6 +47,8 @@ #include "services.h" #include "conditional.h" #include "mls.h" +#include "objsec.h" +#include "selinux_netlabel.h" extern void selnl_notify_policyload(u32 seqno); unsigned int policydb_loaded_version; @@ -1241,6 +1250,7 @@ int security_load_policy(void *data, size_t len) selinux_complete_init(); avc_ss_reset(seqno); selnl_notify_policyload(seqno); + selinux_netlbl_cache_invalidate(); return 0; } @@ -1295,6 +1305,7 @@ int security_load_policy(void *data, size_t len) avc_ss_reset(seqno); selnl_notify_policyload(seqno); + selinux_netlbl_cache_invalidate(); return 0; @@ -2133,3 +2144,480 @@ void selinux_audit_set_callback(int (*callback)(void)) { aurule_callback = callback; } + +#ifdef CONFIG_NETLABEL +/* + * This is the structure we store inside the NetLabel cache block. + */ +#define NETLBL_CACHE(x) ((struct netlbl_cache *)(x)) +#define NETLBL_CACHE_T_NONE 0 +#define NETLBL_CACHE_T_SID 1 +#define NETLBL_CACHE_T_MLS 2 +struct netlbl_cache { + u32 type; + union { + u32 sid; + struct mls_range mls_label; + } data; +}; + +/** + * selinux_netlbl_cache_free - Free the NetLabel cached data + * @data: the data to free + * + * Description: + * This function is intended to be used as the free() callback inside the + * netlbl_lsm_cache structure. + * + */ +static void selinux_netlbl_cache_free(const void *data) +{ + struct netlbl_cache *cache = NETLBL_CACHE(data); + switch (cache->type) { + case NETLBL_CACHE_T_MLS: + ebitmap_destroy(&cache->data.mls_label.level[0].cat); + break; + } + kfree(data); +} + +/** + * selinux_netlbl_cache_add - Add an entry to the NetLabel cache + * @skb: the packet + * @ctx: the SELinux context + * + * Description: + * Attempt to cache the context in @ctx, which was derived from the packet in + * @skb, in the NetLabel subsystem cache. + * + */ +static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) +{ + struct netlbl_cache *cache = NULL; + struct netlbl_lsm_secattr secattr; + + netlbl_secattr_init(&secattr); + + cache = kzalloc(sizeof(*cache), GFP_ATOMIC); + if (cache == NULL) + goto netlbl_cache_add_failure; + secattr.cache.free = selinux_netlbl_cache_free; + secattr.cache.data = (void *)cache; + + cache->type = NETLBL_CACHE_T_MLS; + if (ebitmap_cpy(&cache->data.mls_label.level[0].cat, + &ctx->range.level[0].cat) != 0) + goto netlbl_cache_add_failure; + cache->data.mls_label.level[1].cat.highbit = + cache->data.mls_label.level[0].cat.highbit; + cache->data.mls_label.level[1].cat.node = + cache->data.mls_label.level[0].cat.node; + cache->data.mls_label.level[0].sens = ctx->range.level[0].sens; + cache->data.mls_label.level[1].sens = ctx->range.level[0].sens; + + if (netlbl_cache_add(skb, &secattr) != 0) + goto netlbl_cache_add_failure; + + return; + +netlbl_cache_add_failure: + netlbl_secattr_destroy(&secattr, 1); +} + +/** + * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache + * + * Description: + * Invalidate the NetLabel security attribute mapping cache. + * + */ +void selinux_netlbl_cache_invalidate(void) +{ + netlbl_cache_invalidate(); +} + +/** + * selinux_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID + * @skb: the network packet + * @secattr: the NetLabel packet security attributes + * @base_sid: the SELinux SID to use as a context for MLS only attributes + * @sid: the SELinux SID + * + * Description: + * Convert the given NetLabel packet security attributes in @secattr into a + * SELinux SID. If the @secattr field does not contain a full SELinux + * SID/context then use the context in @base_sid as the foundation. If @skb + * is not NULL attempt to cache as much data as possibile. Returns zero on + * success, negative values on failure. + * + */ +static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr, + u32 base_sid, + u32 *sid) +{ + int rc = -EIDRM; + struct context *ctx; + struct context ctx_new; + struct netlbl_cache *cache; + + POLICY_RDLOCK; + + if (secattr->cache.data) { + cache = NETLBL_CACHE(secattr->cache.data); + switch (cache->type) { + case NETLBL_CACHE_T_SID: + *sid = cache->data.sid; + rc = 0; + break; + case NETLBL_CACHE_T_MLS: + ctx = sidtab_search(&sidtab, base_sid); + if (ctx == NULL) + goto netlbl_secattr_to_sid_return; + + ctx_new.user = ctx->user; + ctx_new.role = ctx->role; + ctx_new.type = ctx->type; + ctx_new.range.level[0].sens = + cache->data.mls_label.level[0].sens; + ctx_new.range.level[0].cat.highbit = + cache->data.mls_label.level[0].cat.highbit; + ctx_new.range.level[0].cat.node = + cache->data.mls_label.level[0].cat.node; + ctx_new.range.level[1].sens = + cache->data.mls_label.level[1].sens; + ctx_new.range.level[1].cat.highbit = + cache->data.mls_label.level[1].cat.highbit; + ctx_new.range.level[1].cat.node = + cache->data.mls_label.level[1].cat.node; + + rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); + break; + default: + goto netlbl_secattr_to_sid_return; + } + } else if (secattr->mls_lvl_vld) { + ctx = sidtab_search(&sidtab, base_sid); + if (ctx == NULL) + goto netlbl_secattr_to_sid_return; + + ctx_new.user = ctx->user; + ctx_new.role = ctx->role; + ctx_new.type = ctx->type; + mls_import_lvl(&ctx_new, secattr->mls_lvl, secattr->mls_lvl); + if (secattr->mls_cat) { + if (mls_import_cat(&ctx_new, + secattr->mls_cat, + secattr->mls_cat_len, + NULL, + 0) != 0) + goto netlbl_secattr_to_sid_return; + ctx_new.range.level[1].cat.highbit = + ctx_new.range.level[0].cat.highbit; + ctx_new.range.level[1].cat.node = + ctx_new.range.level[0].cat.node; + } else { + ebitmap_init(&ctx_new.range.level[0].cat); + ebitmap_init(&ctx_new.range.level[1].cat); + } + if (mls_context_isvalid(&policydb, &ctx_new) != 1) + goto netlbl_secattr_to_sid_return_cleanup; + + rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); + if (rc != 0) + goto netlbl_secattr_to_sid_return_cleanup; + + if (skb != NULL) + selinux_netlbl_cache_add(skb, &ctx_new); + ebitmap_destroy(&ctx_new.range.level[0].cat); + } else { + *sid = SECINITSID_UNLABELED; + rc = 0; + } + +netlbl_secattr_to_sid_return: + POLICY_RDUNLOCK; + return rc; +netlbl_secattr_to_sid_return_cleanup: + ebitmap_destroy(&ctx_new.range.level[0].cat); + goto netlbl_secattr_to_sid_return; +} + +/** + * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel + * @skb: the packet + * @base_sid: the SELinux SID to use as a context for MLS only attributes + * @sid: the SID + * + * Description: + * Call the NetLabel mechanism to get the security attributes of the given + * packet and use those attributes to determine the correct context/SID to + * assign to the packet. Returns zero on success, negative values on failure. + * + */ +static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, + u32 base_sid, + u32 *sid) +{ + int rc; + struct netlbl_lsm_secattr secattr; + + netlbl_secattr_init(&secattr); + rc = netlbl_skbuff_getattr(skb, &secattr); + if (rc == 0) + rc = selinux_netlbl_secattr_to_sid(skb, + &secattr, + base_sid, + sid); + netlbl_secattr_destroy(&secattr, 0); + + return rc; +} + +/** + * selinux_netlbl_socket_setsid - Label a socket using the NetLabel mechanism + * @sock: the socket to label + * @sid: the SID to use + * + * Description: + * Attempt to label a socket using the NetLabel mechanism using the given + * SID. Returns zero values on success, negative values on failure. + * + */ +static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid) +{ + int rc = -ENOENT; + struct sk_security_struct *sksec = sock->sk->sk_security; + struct netlbl_lsm_secattr secattr; + struct context *ctx; + + if (!ss_initialized) + return 0; + + POLICY_RDLOCK; + + ctx = sidtab_search(&sidtab, sid); + if (ctx == NULL) + goto netlbl_socket_setsid_return; + + netlbl_secattr_init(&secattr); + secattr.domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1], + GFP_ATOMIC); + mls_export_lvl(ctx, &secattr.mls_lvl, NULL); + secattr.mls_lvl_vld = 1; + mls_export_cat(ctx, + &secattr.mls_cat, + &secattr.mls_cat_len, + NULL, + NULL); + + rc = netlbl_socket_setattr(sock, &secattr); + if (rc == 0) + sksec->nlbl_state = NLBL_LABELED; + + netlbl_secattr_destroy(&secattr, 0); + +netlbl_socket_setsid_return: + POLICY_RDUNLOCK; + return rc; +} + +/** + * selinux_netlbl_socket_post_create - Label a socket using NetLabel + * @sock: the socket to label + * @sock_family: the socket family + * @sid: the SID to use + * + * Description: + * Attempt to label a socket using the NetLabel mechanism using the given + * SID. Returns zero values on success, negative values on failure. + * + */ +int selinux_netlbl_socket_post_create(struct socket *sock, + int sock_family, + u32 sid) +{ + struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; + struct sk_security_struct *sksec = sock->sk->sk_security; + + if (sock_family != PF_INET) + return 0; + + sksec->sclass = isec->sclass; + sksec->nlbl_state = NLBL_REQUIRE; + return selinux_netlbl_socket_setsid(sock, sid); +} + +/** + * selinux_netlbl_sock_graft - Netlabel the new socket + * @sk: the new connection + * @sock: the new socket + * + * Description: + * The connection represented by @sk is being grafted onto @sock so set the + * socket's NetLabel to match the SID of @sk. + * + */ +void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) +{ + struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; + struct sk_security_struct *sksec = sk->sk_security; + + if (sk->sk_family != PF_INET) + return; + + sksec->nlbl_state = NLBL_REQUIRE; + sksec->peer_sid = sksec->sid; + sksec->sclass = isec->sclass; + + /* Try to set the NetLabel on the socket to save time later, if we fail + * here we will pick up the pieces in later calls to + * selinux_netlbl_inode_permission(). */ + selinux_netlbl_socket_setsid(sock, sksec->sid); +} + +/** + * selinux_netlbl_inet_conn_request - Handle a new connection request + * @skb: the packet + * @sock_sid: the SID of the parent socket + * + * Description: + * If present, use the security attributes of the packet in @skb and the + * parent sock's SID to arrive at a SID for the new child sock. Returns the + * SID of the connection or SECSID_NULL on failure. + * + */ +u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid) +{ + int rc; + u32 peer_sid; + + rc = selinux_netlbl_skbuff_getsid(skb, sock_sid, &peer_sid); + if (rc != 0) + return SECSID_NULL; + + if (peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + + return peer_sid; +} + +/** + * __selinux_netlbl_inode_permission - Label a socket using NetLabel + * @inode: the file descriptor's inode + * @mask: the permission mask + * + * Description: + * Try to label a socket with the inode's SID using NetLabel. Returns zero on + * success, negative values on failure. + * + */ +int __selinux_netlbl_inode_permission(struct inode *inode, int mask) +{ + int rc; + struct socket *sock = SOCKET_I(inode); + struct sk_security_struct *sksec = sock->sk->sk_security; + + lock_sock(sock->sk); + rc = selinux_netlbl_socket_setsid(sock, sksec->sid); + release_sock(sock->sk); + + return rc; +} + +/** + * selinux_netlbl_sock_rcv_skb - Do an inbound access check using NetLabel + * @sksec: the sock's sk_security_struct + * @skb: the packet + * @ad: the audit data + * + * Description: + * Fetch the NetLabel security attributes from @skb and perform an access check + * against the receiving socket. Returns zero on success, negative values on + * error. + * + */ +int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, + struct sk_buff *skb, + struct avc_audit_data *ad) +{ + int rc; + u32 netlbl_sid; + u32 recv_perm; + + rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid); + if (rc != 0) + return rc; + + if (netlbl_sid == SECINITSID_UNLABELED) + return 0; + + switch (sksec->sclass) { + case SECCLASS_UDP_SOCKET: + recv_perm = UDP_SOCKET__RECV_MSG; + break; + case SECCLASS_TCP_SOCKET: + recv_perm = TCP_SOCKET__RECV_MSG; + break; + default: + recv_perm = RAWIP_SOCKET__RECV_MSG; + } + + rc = avc_has_perm(sksec->sid, + netlbl_sid, + sksec->sclass, + recv_perm, + ad); + if (rc == 0) + return 0; + + netlbl_skbuff_err(skb, rc); + return rc; +} + +/** + * selinux_netlbl_socket_peersid - Return the peer SID of a connected socket + * @sock: the socket + * + * Description: + * Examine @sock to find the connected peer's SID. Returns the SID on success + * or SECSID_NULL on error. + * + */ +u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) +{ + struct sk_security_struct *sksec = sock->sk->sk_security; + + if (sksec->peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + + return sksec->peer_sid; +} + +/** + * selinux_netlbl_socket_getpeersec_dgram - Return the SID of a NetLabel packet + * @skb: the packet + * + * Description: + * Examine @skb to find the SID assigned to it by NetLabel. Returns the SID on + * success, SECSID_NULL on error. + * + */ +u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) +{ + int peer_sid; + struct sock *sk = skb->sk; + struct inode_security_struct *isec; + + if (sk == NULL || sk->sk_socket == NULL) + return SECSID_NULL; + + isec = SOCK_INODE(sk->sk_socket)->i_security; + if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0) + return SECSID_NULL; + if (peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + + return peer_sid; +} +#endif /* CONFIG_NETLABEL */ -- cgit v1.2.3-71-gd317 From 14c0b97ddfc2944982d078b8e33b088840068976 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:38:38 -0700 Subject: [NET]: Protocol Independant Policy Routing Rules Framework Derived from net/ipv/fib_rules.c Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 60 +++++++ include/net/fib_rules.h | 90 ++++++++++ net/Kconfig | 3 + net/core/Makefile | 1 + net/core/fib_rules.c | 416 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/rtnetlink.c | 9 +- 6 files changed, 577 insertions(+), 2 deletions(-) create mode 100644 include/linux/fib_rules.h create mode 100644 include/net/fib_rules.h create mode 100644 net/core/fib_rules.c (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h new file mode 100644 index 000000000000..5e503f0ca6e4 --- /dev/null +++ b/include/linux/fib_rules.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_FIB_RULES_H +#define __LINUX_FIB_RULES_H + +#include +#include + +/* rule is permanent, and cannot be deleted */ +#define FIB_RULE_PERMANENT 1 + +struct fib_rule_hdr +{ + __u8 family; + __u8 dst_len; + __u8 src_len; + __u8 tos; + + __u8 table; + __u8 res1; /* reserved */ + __u8 res2; /* reserved */ + __u8 action; + + __u32 flags; +}; + +enum +{ + FRA_UNSPEC, + FRA_DST, /* destination address */ + FRA_SRC, /* source address */ + FRA_IFNAME, /* interface name */ + FRA_UNUSED1, + FRA_UNUSED2, + FRA_PRIORITY, /* priority/preference */ + FRA_UNUSED3, + FRA_UNUSED4, + FRA_UNUSED5, + FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FLOW, /* flow/class id */ + __FRA_MAX +}; + +#define FRA_MAX (__FRA_MAX - 1) + +enum +{ + FR_ACT_UNSPEC, + FR_ACT_TO_TBL, /* Pass to fixed table */ + FR_ACT_RES1, + FR_ACT_RES2, + FR_ACT_RES3, + FR_ACT_RES4, + FR_ACT_BLACKHOLE, /* Drop without notification */ + FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */ + FR_ACT_PROHIBIT, /* Drop with EACCES */ + __FR_ACT_MAX, +}; + +#define FR_ACT_MAX (__FR_ACT_MAX - 1) + +#endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h new file mode 100644 index 000000000000..61375d9e53f8 --- /dev/null +++ b/include/net/fib_rules.h @@ -0,0 +1,90 @@ +#ifndef __NET_FIB_RULES_H +#define __NET_FIB_RULES_H + +#include +#include +#include +#include +#include + +struct fib_rule +{ + struct list_head list; + atomic_t refcnt; + int ifindex; + char ifname[IFNAMSIZ]; + u32 pref; + u32 flags; + u32 table; + u8 action; + struct rcu_head rcu; +}; + +struct fib_lookup_arg +{ + void *lookup_ptr; + void *result; + struct fib_rule *rule; +}; + +struct fib_rules_ops +{ + int family; + struct list_head list; + int rule_size; + + int (*action)(struct fib_rule *, + struct flowi *, int, + struct fib_lookup_arg *); + int (*match)(struct fib_rule *, + struct flowi *, int); + int (*configure)(struct fib_rule *, + struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *, + struct nlattr **); + int (*compare)(struct fib_rule *, + struct fib_rule_hdr *, + struct nlattr **); + int (*fill)(struct fib_rule *, struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *); + u32 (*default_pref)(void); + + int nlgroup; + struct nla_policy *policy; + struct list_head *rules_list; + struct module *owner; +}; + +static inline void fib_rule_get(struct fib_rule *rule) +{ + atomic_inc(&rule->refcnt); +} + +static inline void fib_rule_put_rcu(struct rcu_head *head) +{ + struct fib_rule *rule = container_of(head, struct fib_rule, rcu); + kfree(rule); +} + +static inline void fib_rule_put(struct fib_rule *rule) +{ + if (atomic_dec_and_test(&rule->refcnt)) + call_rcu(&rule->rcu, fib_rule_put_rcu); +} + +extern int fib_rules_register(struct fib_rules_ops *); +extern int fib_rules_unregister(struct fib_rules_ops *); + +extern int fib_rules_lookup(struct fib_rules_ops *, + struct flowi *, int flags, + struct fib_lookup_arg *); + +extern int fib_nl_newrule(struct sk_buff *, + struct nlmsghdr *, void *); +extern int fib_nl_delrule(struct sk_buff *, + struct nlmsghdr *, void *); +extern int fib_rules_dump(struct sk_buff *, + struct netlink_callback *, int); +#endif diff --git a/net/Kconfig b/net/Kconfig index eb855b7fa642..6528a935622c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -251,6 +251,9 @@ config WIRELESS_EXT source "net/netlabel/Kconfig" +config FIB_RULES + bool + endif # if NET endmenu # Networking diff --git a/net/core/Makefile b/net/core/Makefile index 2645ba428d48..119568077dab 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o +obj-$(CONFIG_FIB_RULES) += fib_rules.o diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c new file mode 100644 index 000000000000..6cdad24038e2 --- /dev/null +++ b/net/core/fib_rules.c @@ -0,0 +1,416 @@ +/* + * net/core/fib_rules.c Generic Routing Rules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include + +static LIST_HEAD(rules_ops); +static DEFINE_SPINLOCK(rules_mod_lock); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops); + +static struct fib_rules_ops *lookup_rules_ops(int family) +{ + struct fib_rules_ops *ops; + + rcu_read_lock(); + list_for_each_entry_rcu(ops, &rules_ops, list) { + if (ops->family == family) { + if (!try_module_get(ops->owner)) + ops = NULL; + rcu_read_unlock(); + return ops; + } + } + rcu_read_unlock(); + + return NULL; +} + +static void rules_ops_put(struct fib_rules_ops *ops) +{ + if (ops) + module_put(ops->owner); +} + +int fib_rules_register(struct fib_rules_ops *ops) +{ + int err = -EEXIST; + struct fib_rules_ops *o; + + if (ops->rule_size < sizeof(struct fib_rule)) + return -EINVAL; + + if (ops->match == NULL || ops->configure == NULL || + ops->compare == NULL || ops->fill == NULL || + ops->action == NULL) + return -EINVAL; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) + if (ops->family == o->family) + goto errout; + + list_add_tail_rcu(&ops->list, &rules_ops); + err = 0; +errout: + spin_unlock(&rules_mod_lock); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_register); + +static void cleanup_ops(struct fib_rules_ops *ops) +{ + struct fib_rule *rule, *tmp; + + list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_del_rcu(&rule->list); + fib_rule_put(rule); + } +} + +int fib_rules_unregister(struct fib_rules_ops *ops) +{ + int err = 0; + struct fib_rules_ops *o; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) { + if (o == ops) { + list_del_rcu(&o->list); + cleanup_ops(ops); + goto out; + } + } + + err = -ENOENT; +out: + spin_unlock(&rules_mod_lock); + + synchronize_rcu(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_unregister); + +int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, + int flags, struct fib_lookup_arg *arg) +{ + struct fib_rule *rule; + int err; + + rcu_read_lock(); + + list_for_each_entry_rcu(rule, ops->rules_list, list) { + if (rule->ifindex && (rule->ifindex != fl->iif)) + continue; + + if (!ops->match(rule, fl, flags)) + continue; + + err = ops->action(rule, fl, flags, arg); + if (err != -EAGAIN) { + fib_rule_get(rule); + arg->rule = rule; + goto out; + } + } + + err = -ENETUNREACH; +out: + rcu_read_unlock(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_lookup); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule, *r, *last = NULL; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ) + goto errout; + + rule = kzalloc(ops->rule_size, GFP_KERNEL); + if (rule == NULL) { + err = -ENOMEM; + goto errout; + } + + if (tb[FRA_PRIORITY]) + rule->pref = nla_get_u32(tb[FRA_PRIORITY]); + + if (tb[FRA_IFNAME]) { + struct net_device *dev; + + rule->ifindex = -1; + if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME], + IFNAMSIZ) >= IFNAMSIZ) + goto errout_free; + + dev = __dev_get_by_name(rule->ifname); + if (dev) + rule->ifindex = dev->ifindex; + } + + rule->action = frh->action; + rule->flags = frh->flags; + rule->table = frh->table; + + if (!rule->pref && ops->default_pref) + rule->pref = ops->default_pref(); + + err = ops->configure(rule, skb, nlh, frh, tb); + if (err < 0) + goto errout_free; + + list_for_each_entry(r, ops->rules_list, list) { + if (r->pref > rule->pref) + break; + last = r; + } + + fib_rule_get(rule); + + if (last) + list_add_rcu(&rule->list, &last->list); + else + list_add_rcu(&rule->list, ops->rules_list); + + notify_rule_change(RTM_NEWRULE, rule, ops); + rules_ops_put(ops); + return 0; + +errout_free: + kfree(rule); +errout: + rules_ops_put(ops); + return err; +} + +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + list_for_each_entry(rule, ops->rules_list, list) { + if (frh->action && (frh->action != rule->action)) + continue; + + if (frh->table && (frh->table != rule->table)) + continue; + + if (tb[FRA_PRIORITY] && + (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) + continue; + + if (tb[FRA_IFNAME] && + nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + continue; + + if (!ops->compare(rule, frh, tb)) + continue; + + if (rule->flags & FIB_RULE_PERMANENT) { + err = -EPERM; + goto errout; + } + + list_del_rcu(&rule->list); + synchronize_rcu(); + notify_rule_change(RTM_DELRULE, rule, ops); + fib_rule_put(rule); + rules_ops_put(ops); + return 0; + } + + err = -ENOENT; +errout: + rules_ops_put(ops); + return err; +} + +static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, + u32 pid, u32 seq, int type, int flags, + struct fib_rules_ops *ops) +{ + struct nlmsghdr *nlh; + struct fib_rule_hdr *frh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); + if (nlh == NULL) + return -1; + + frh = nlmsg_data(nlh); + frh->table = rule->table; + frh->res1 = 0; + frh->res2 = 0; + frh->action = rule->action; + frh->flags = rule->flags; + + if (rule->ifname[0]) + NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + + if (rule->pref) + NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + + if (ops->fill(rule, skb, nlh, frh) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); +} + +int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) +{ + int idx = 0; + struct fib_rule *rule; + struct fib_rules_ops *ops; + + ops = lookup_rules_ops(family); + if (ops == NULL) + return -EAFNOSUPPORT; + + rcu_read_lock(); + list_for_each_entry(rule, ops->rules_list, list) { + if (idx < cb->args[0]) + goto skip; + + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops) < 0) + break; +skip: + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + rules_ops_put(ops); + + return skb->len; +} + +EXPORT_SYMBOL_GPL(fib_rules_dump); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops) +{ + int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128); + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + + if (skb == NULL) + netlink_set_err(rtnl, 0, ops->nlgroup, ENOBUFS); + else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) { + kfree_skb(skb); + netlink_set_err(rtnl, 0, ops->nlgroup, EINVAL); + } else + netlink_broadcast(rtnl, skb, 0, ops->nlgroup, GFP_KERNEL); +} + +static void attach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) { + if (rule->ifindex == -1 && + strcmp(dev->name, rule->ifname) == 0) + rule->ifindex = dev->ifindex; + } +} + +static void detach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) + if (rule->ifindex == dev->ifindex) + rule->ifindex = -1; +} + + +static int fib_rules_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + rcu_read_lock(); + + switch (event) { + case NETDEV_REGISTER: + list_for_each_entry(ops, &rules_ops, list) + attach_rules(ops->rules_list, dev); + break; + + case NETDEV_UNREGISTER: + list_for_each_entry(ops, &rules_ops, list) + detach_rules(ops->rules_list, dev); + break; + } + + rcu_read_unlock(); + + return NOTIFY_DONE; +} + +static struct notifier_block fib_rules_notifier = { + .notifier_call = fib_rules_event, +}; + +static int __init fib_rules_init(void) +{ + return register_netdevice_notifier(&fib_rules_notifier); +} + +subsys_initcall(fib_rules_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 30cc1ba6ed5c..aa7cff2257b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #ifdef CONFIG_NET_WIRELESS_RTNETLINK #include @@ -103,7 +104,7 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), + [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -120,7 +121,7 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, - [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, + [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, @@ -757,6 +758,10 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, +#ifdef CONFIG_FIB_RULES + [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, + [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, +#endif [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, -- cgit v1.2.3-71-gd317 From 101367c2f8c464ea96643192673aa18d88e6336d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:39:02 -0700 Subject: [IPV6]: Policy Routing Rules Adds support for policy routing rules including a new local table for routes with a local destination. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 + include/net/ip6_fib.h | 9 +- include/net/ip6_route.h | 5 + net/ipv6/Kconfig | 1 + net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 1 + net/ipv6/fib6_rules.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_fib.c | 21 ++-- net/ipv6/route.c | 50 +++++++++ 9 files changed, 329 insertions(+), 12 deletions(-) create mode 100644 net/ipv6/fib6_rules.c (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index facd9ee37b76..bf353538ae93 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -889,6 +889,8 @@ enum rtnetlink_groups { RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 818411519c89..7b47e8d5a765 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -155,7 +155,6 @@ struct fib6_table { #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC #define RT6_TABLE_MAIN RT_TABLE_MAIN -#define RT6_TABLE_LOCAL RT6_TABLE_MAIN #define RT6_TABLE_DFLT RT6_TABLE_MAIN #define RT6_TABLE_INFO RT6_TABLE_MAIN #define RT6_TABLE_PREFIX RT6_TABLE_MAIN @@ -163,9 +162,11 @@ struct fib6_table { #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_MIN 1 #define FIB6_TABLE_MAX RT_TABLE_MAX +#define RT6_TABLE_LOCAL RT_TABLE_LOCAL #else #define FIB6_TABLE_MIN RT_TABLE_MAIN #define FIB6_TABLE_MAX FIB6_TABLE_MIN +#define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif #define RT6_F_STRICT 1 @@ -221,5 +222,11 @@ extern void fib6_run_gc(unsigned long dummy); extern void fib6_gc_cleanup(void); extern void fib6_init(void); + +extern void fib6_rules_init(void); +extern void fib6_rules_cleanup(void); +extern int fib6_rules_dump(struct sk_buff *, + struct netlink_callback *); + #endif #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d49c8c90eb68..9bfa3cc6cedb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -41,6 +41,11 @@ struct pol_chain { extern struct rt6_info ip6_null_entry; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +extern struct rt6_info ip6_prohibit_entry; +extern struct rt6_info ip6_blk_hole_entry; +#endif + extern int ip6_rt_gc_interval; extern void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 159c63d99c81..36a6c2b79889 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -139,6 +139,7 @@ config IPV6_TUNNEL config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on IPV6 && EXPERIMENTAL + select FIB_RULES ---help--- Support multiple routing tables. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 386e0a626948..9eebf6091279 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -13,6 +13,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o +ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 318767fcefdc..ed766eebc022 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3528,6 +3528,7 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, .dumpit = inet6_dump_fib, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, }; static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c new file mode 100644 index 000000000000..c3c8195744ee --- /dev/null +++ b/net/ipv6/fib6_rules.c @@ -0,0 +1,251 @@ +/* + * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules + * + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors + * Thomas Graf + * Ville Nuorvala + */ + +#include +#include + +#include +#include +#include +#include + +struct fib6_rule +{ + struct fib_rule common; + struct rt6key src; + struct rt6key dst; + u8 tclass; +}; + +static struct fib_rules_ops fib6_rules_ops; + +static struct fib6_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_MAIN, + }, +}; + +static struct fib6_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_LOCAL, + .flags = FIB_RULE_PERMANENT, + }, +}; + +static LIST_HEAD(fib6_rules); + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + struct fib_lookup_arg arg = { + .lookup_ptr = lookup, + }; + + fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); + if (arg.rule) + fib_rule_put(arg.rule); + + return (struct dst_entry *) arg.result; +} + +int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) +{ + struct rt6_info *rt = NULL; + struct fib6_table *table; + pol_lookup_t lookup = arg->lookup_ptr; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + case FR_ACT_UNREACHABLE: + rt = &ip6_null_entry; + goto discard_pkt; + default: + case FR_ACT_BLACKHOLE: + rt = &ip6_blk_hole_entry; + goto discard_pkt; + case FR_ACT_PROHIBIT: + rt = &ip6_prohibit_entry; + goto discard_pkt; + } + + table = fib6_get_table(rule->table); + if (table) + rt = lookup(table, flp, flags); + + if (rt != &ip6_null_entry) + goto out; + + dst_release(&rt->u.dst); +discard_pkt: + dst_hold(&rt->u.dst); +out: + arg->result = rt; + return rt == NULL ? -EAGAIN : 0; +} + + +static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib6_rule *r = (struct fib6_rule *) rule; + + if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + return 0; + + if ((flags & RT6_F_HAS_SADDR) && + !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) + return 0; + + return 1; +} + +static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, + [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, +}; + +static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len > 128 || frh->dst_len > 128 || + (frh->tos & ~IPV6_FLOWINFO_MASK)) + goto errout; + + if (rule->action == FR_ACT_TO_TBL) { + if (rule->table == RT6_TABLE_UNSPEC) + goto errout; + + if (fib6_new_table(rule->table) == NULL) { + err = -ENOBUFS; + goto errout; + } + } + + if (tb[FRA_SRC]) + nla_memcpy(&rule6->src.addr, tb[FRA_SRC], + sizeof(struct in6_addr)); + + if (tb[FRA_DST]) + nla_memcpy(&rule6->dst.addr, tb[FRA_DST], + sizeof(struct in6_addr)); + + rule6->src.plen = frh->src_len; + rule6->dst.plen = frh->dst_len; + rule6->tclass = frh->tos; + + err = 0; +errout: + return err; +} + +static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len && (rule6->src.plen != frh->src_len)) + return 0; + + if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) + return 0; + + if (frh->tos && (rule6->tclass != frh->tos)) + return 0; + + if (tb[FRA_SRC] && + nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) + return 0; + + if (tb[FRA_DST] && + nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) + return 0; + + return 1; +} + +static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + frh->family = AF_INET6; + frh->dst_len = rule6->dst.plen; + frh->src_len = rule6->src.plen; + frh->tos = rule6->tclass; + + if (rule6->dst.plen) + NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), + &rule6->dst.addr); + + if (rule6->src.plen) + NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), + &rule6->src.addr); + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return fib_rules_dump(skb, cb, AF_INET6); +} + +static u32 fib6_rule_default_pref(void) +{ + return 0x3FFF; +} + +static struct fib_rules_ops fib6_rules_ops = { + .family = AF_INET6, + .rule_size = sizeof(struct fib6_rule), + .action = fib6_rule_action, + .match = fib6_rule_match, + .configure = fib6_rule_configure, + .compare = fib6_rule_compare, + .fill = fib6_rule_fill, + .default_pref = fib6_rule_default_pref, + .nlgroup = RTNLGRP_IPV6_RULE, + .policy = fib6_rule_policy, + .rules_list = &fib6_rules, + .owner = THIS_MODULE, +}; + +void __init fib6_rules_init(void) +{ + list_add_tail(&local_rule.common.list, &fib6_rules); + list_add_tail(&main_rule.common.list, &fib6_rules); + + fib_rules_register(&fib6_rules_ops); +} + +void fib6_rules_cleanup(void) +{ + fib_rules_unregister(&fib6_rules_ops); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fcd7da830aca..ce226c14bef5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -159,6 +159,15 @@ static struct fib6_table fib6_main_tbl = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES +static struct fib6_table fib6_local_tbl = { + .tb6_id = RT6_TABLE_LOCAL, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + #define FIB_TABLE_HASHSZ 256 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; @@ -228,20 +237,10 @@ struct fib6_table *fib6_get_table(u32 id) return NULL; } -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) -{ - /* - * TODO: Add rule lookup - */ - struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN); - - return (struct dst_entry *) lookup(table, fl, flags); -} - static void __init fib6_tables_init(void) { fib6_link_table(&fib6_main_tbl); + fib6_link_table(&fib6_local_tbl); } #else diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 73efdadb9ab8..438977e2085d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -140,6 +140,50 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + +struct rt6_info ip6_prohibit_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_prohibit_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; + +struct rt6_info ip6_blk_hole_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_blk_hole_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; + +#endif + /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) { @@ -2408,10 +2452,16 @@ void __init ip6_route_init(void) #ifdef CONFIG_XFRM xfrm6_init(); #endif +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_init(); +#endif } void ip6_route_cleanup(void) { +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_cleanup(); +#endif #ifdef CONFIG_PROC_FS proc_net_remove("ipv6_route"); proc_net_remove("rt6_stats"); -- cgit v1.2.3-71-gd317 From 1823730fbc89fadde72a7bb3b7bdf03cc7b8835c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:04:54 -0700 Subject: [IPv4]: Move interface address bits to linux/if_addr.h Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_addr.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 56 ----------------------------------------------- net/core/rtnetlink.c | 1 + net/decnet/dn_dev.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv6/addrconf.c | 1 + net/ipv6/ndisc.c | 1 + 8 files changed, 59 insertions(+), 56 deletions(-) create mode 100644 include/linux/if_addr.h (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h new file mode 100644 index 000000000000..e1590454db59 --- /dev/null +++ b/include/linux/if_addr.h @@ -0,0 +1,53 @@ +#ifndef __LINUX_IF_ADDR_H +#define __LINUX_IF_ADDR_H + +#include + +struct ifaddrmsg +{ + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + */ +enum +{ + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 + +struct ifa_cacheinfo +{ + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bf353538ae93..890c4d4038b6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -384,62 +384,6 @@ struct rta_session }; -/********************************************************* - * Interface address. - ****/ - -struct ifaddrmsg -{ - unsigned char ifa_family; - unsigned char ifa_prefixlen; /* The prefix length */ - unsigned char ifa_flags; /* Flags */ - unsigned char ifa_scope; /* See above */ - int ifa_index; /* Link index */ -}; - -enum -{ - IFA_UNSPEC, - IFA_ADDRESS, - IFA_LOCAL, - IFA_LABEL, - IFA_BROADCAST, - IFA_ANYCAST, - IFA_CACHEINFO, - IFA_MULTICAST, - __IFA_MAX -}; - -#define IFA_MAX (__IFA_MAX - 1) - -/* ifa_flags */ - -#define IFA_F_SECONDARY 0x01 -#define IFA_F_TEMPORARY IFA_F_SECONDARY - -#define IFA_F_DEPRECATED 0x20 -#define IFA_F_TENTATIVE 0x40 -#define IFA_F_PERMANENT 0x80 - -struct ifa_cacheinfo -{ - __u32 ifa_prefered; - __u32 ifa_valid; - __u32 cstamp; /* created timestamp, hundredths of seconds */ - __u32 tstamp; /* updated timestamp, hundredths of seconds */ -}; - - -#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) -#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) - -/* - Important comment: - IFA_ADDRESS is prefix address, rather than local interface address. - It makes no difference for normally configured broadcast interfaces, - but for point-to-point IFA_ADDRESS is DESTINATION address, - local address is supplied in IFA_LOCAL attribute. - */ /************************************************************** * Neighbour discovery. diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index aa7cff2257b1..35712031e2c3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 476455fbdb03..632c5a90b589 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 80bf5b2ea2e6..398e7b9ca66b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fe4a53d4d10d..a83f1aa8034e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ed766eebc022..c2a4db843e51 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 67cfc3813c32..5743e8bffefd 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -62,6 +62,7 @@ #include #endif +#include #include #include #include -- cgit v1.2.3-71-gd317 From 0844565fb8a9418f5a860aa480c1aef70319c9a2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:05:56 -0700 Subject: [NET]: Move netlink interface bits to linux/if.h Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if.h | 129 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 133 +--------------------------------------------- 2 files changed, 130 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 374e20ad8b0d..cd080d765324 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -212,5 +212,134 @@ struct ifconf #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ +/* The struct should be in sync with struct net_device_stats */ +struct rtnl_link_stats +{ + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap +{ + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +enum +{ + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum +{ + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +struct ifla_cacheinfo +{ + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; #endif /* _LINUX_IF_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 890c4d4038b6..84f3eb426da2 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -2,6 +2,7 @@ #define __LINUX_RTNETLINK_H #include +#include /**** * Routing/neighbour discovery messages. @@ -607,138 +608,6 @@ struct prefix_cacheinfo __u32 valid_time; }; -/* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 collisions; - - /* detailed rx_errors: */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; -}; - -/* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap -{ - __u64 mem_start; - __u64 mem_end; - __u64 base_addr; - __u16 irq; - __u8 dma; - __u8 port; -}; - -enum -{ - IFLA_UNSPEC, - IFLA_ADDRESS, - IFLA_BROADCAST, - IFLA_IFNAME, - IFLA_MTU, - IFLA_LINK, - IFLA_QDISC, - IFLA_STATS, - IFLA_COST, -#define IFLA_COST IFLA_COST - IFLA_PRIORITY, -#define IFLA_PRIORITY IFLA_PRIORITY - IFLA_MASTER, -#define IFLA_MASTER IFLA_MASTER - IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ -#define IFLA_WIRELESS IFLA_WIRELESS - IFLA_PROTINFO, /* Protocol specific information for a link */ -#define IFLA_PROTINFO IFLA_PROTINFO - IFLA_TXQLEN, -#define IFLA_TXQLEN IFLA_TXQLEN - IFLA_MAP, -#define IFLA_MAP IFLA_MAP - IFLA_WEIGHT, -#define IFLA_WEIGHT IFLA_WEIGHT - IFLA_OPERSTATE, - IFLA_LINKMODE, - __IFLA_MAX -}; - - -#define IFLA_MAX (__IFLA_MAX - 1) - -#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) -#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) - -/* ifi_flags. - - IFF_* flags. - - The only change is: - IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are - more not changeable by user. They describe link media - characteristics and set by device driver. - - Comments: - - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid - - If neither of these three flags are set; - the interface is NBMA. - - - IFF_MULTICAST does not mean anything special: - multicasts can be used on all not-NBMA links. - IFF_MULTICAST means that this media uses special encapsulation - for multicast frames. Apparently, all IFF_POINTOPOINT and - IFF_BROADCAST devices are able to use multicasts too. - */ - -/* IFLA_LINK. - For usual devices it is equal ifi_index. - If it is a "virtual interface" (f.e. tunnel), ifi_link - can point to real physical interface (f.e. for bandwidth calculations), - or maybe 0, what means, that real media is unknown (usual - for IPIP tunnels, when route to endpoint is allowed to change) - */ - -/* Subtype attributes for IFLA_PROTINFO */ -enum -{ - IFLA_INET6_UNSPEC, - IFLA_INET6_FLAGS, /* link flags */ - IFLA_INET6_CONF, /* sysctl parameters */ - IFLA_INET6_STATS, /* statistics */ - IFLA_INET6_MCAST, /* MC things. What of them? */ - IFLA_INET6_CACHEINFO, /* time values and max reasm size */ - __IFLA_INET6_MAX -}; - -#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) - -struct ifla_cacheinfo -{ - __u32 max_reasm_len; - __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ - __u32 reachable_time; - __u32 retrans_time; -}; /***************************************************************** * Traffic control messages. -- cgit v1.2.3-71-gd317 From 84fa7933a33f806bbbaae6775e87459b1ec584c0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 29 Aug 2006 16:44:56 -0700 Subject: [NET]: Replace CHECKSUM_HW by CHECKSUM_PARTIAL/CHECKSUM_COMPLETE Replace CHECKSUM_HW by CHECKSUM_PARTIAL (for outgoing packets, whose checksum still needs to be completed) and CHECKSUM_COMPLETE (for incoming packets, device supplied full checksum). Patch originally from Herbert Xu, updated by myself for 2.6.18-rc3. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/atm/he.c | 2 +- drivers/net/3c59x.c | 2 +- drivers/net/8139cp.c | 6 +++--- drivers/net/acenic.c | 8 ++++---- drivers/net/bnx2.c | 2 +- drivers/net/cassini.c | 4 ++-- drivers/net/chelsio/sge.c | 10 +++++----- drivers/net/dl2k.c | 2 +- drivers/net/e1000/e1000_main.c | 8 ++++---- drivers/net/forcedeth.c | 3 ++- drivers/net/gianfar.c | 2 +- drivers/net/hamachi.c | 2 +- drivers/net/ibm_emac/ibm_emac_core.c | 2 +- drivers/net/ioc3-eth.c | 2 +- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/mv643xx_eth.c | 2 +- drivers/net/myri10ge/myri10ge.c | 8 ++++---- drivers/net/ns83820.c | 2 +- drivers/net/r8169.c | 2 +- drivers/net/s2io.c | 2 +- drivers/net/sk98lin/skge.c | 6 +++--- drivers/net/skge.c | 4 ++-- drivers/net/sky2.c | 6 +++--- drivers/net/starfire.c | 6 +++--- drivers/net/sungem.c | 4 ++-- drivers/net/sunhme.c | 6 +++--- drivers/net/tg3.c | 6 +++--- drivers/net/typhoon.c | 2 +- drivers/net/via-rhine.c | 2 +- drivers/net/via-velocity.c | 2 +- include/linux/netdevice.h | 4 ++-- include/linux/skbuff.h | 17 +++++++++-------- net/core/datagram.c | 4 ++-- net/core/dev.c | 12 ++++++------ net/core/netpoll.c | 2 +- net/core/skbuff.c | 14 +++++++------- net/ipv4/icmp.c | 2 +- net/ipv4/igmp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_output.c | 6 +++--- net/ipv4/ipvs/ip_vs_proto_tcp.c | 8 ++++---- net/ipv4/ipvs/ip_vs_proto_udp.c | 8 ++++---- net/ipv4/netfilter.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 3 +-- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 3 +-- net/ipv4/netfilter/ip_nat_standalone.c | 5 +++-- net/ipv4/netfilter/ip_queue.c | 6 +++--- net/ipv4/netfilter/ipt_ECN.c | 9 +++++---- net/ipv4/netfilter/ipt_TCPMSS.c | 5 +++-- net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_output.c | 18 ++++++++---------- net/ipv4/udp.c | 6 +++--- net/ipv4/xfrm4_output.c | 4 ++-- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 6 +++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- net/ipv6/raw.c | 2 +- net/ipv6/reassembly.c | 6 +++--- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 2 +- net/ipv6/xfrm6_output.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 3 +-- net/netfilter/nf_conntrack_proto_udp.c | 3 +-- net/netfilter/nfnetlink_queue.c | 6 +++--- net/packet/af_packet.c | 2 +- net/sched/sch_netem.c | 4 ++-- net/sunrpc/socklib.c | 2 +- 72 files changed, 168 insertions(+), 169 deletions(-) (limited to 'include/linux') diff --git a/drivers/atm/he.c b/drivers/atm/he.c index ffcb9fd31c38..41e052fecd7f 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1912,7 +1912,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) skb->tail = skb->data + skb->len; #ifdef USE_CHECKSUM_HW if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = TCP_CKSUM(skb->data, he_vcc->pdu_len); } diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 80e8ca013e44..29dede2eaa85 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -2077,7 +2077,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) vp->tx_ring[entry].next = 0; #if DO_ZEROCOPY - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); else vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum); diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 1428bb7715af..a48b211c489d 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -813,7 +813,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) if (mss) flags |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_HW) { + else if (skb->ip_summed == CHECKSUM_PARTIAL) { const struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) flags |= IPCS | TCPCS; @@ -867,7 +867,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) if (mss) ctrl |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_HW) { + else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (ip->protocol == IPPROTO_TCP) ctrl |= IPCS | TCPCS; else if (ip->protocol == IPPROTO_UDP) @@ -898,7 +898,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) txd->addr = cpu_to_le64(first_mapping); wmb(); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (ip->protocol == IPPROTO_TCP) txd->opts1 = cpu_to_le32(first_eor | first_len | FirstFrag | DescOwn | diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 1c01e9b3d07c..826548644d7b 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -2040,7 +2040,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) */ if (bd_flags & BD_FLG_TCP_UDP_SUM) { skb->csum = htons(csum); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } else { skb->ip_summed = CHECKSUM_NONE; } @@ -2511,7 +2511,7 @@ restart: mapping = ace_map_tx_skb(ap, skb, skb, idx); flagsize = (skb->len << 16) | (BD_FLG_END); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; #if ACENIC_DO_VLAN if (vlan_tx_tag_present(skb)) { @@ -2534,7 +2534,7 @@ restart: mapping = ace_map_tx_skb(ap, skb, NULL, idx); flagsize = (skb_headlen(skb) << 16); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; #if ACENIC_DO_VLAN if (vlan_tx_tag_present(skb)) { @@ -2560,7 +2560,7 @@ restart: PCI_DMA_TODEVICE); flagsize = (frag->size << 16); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 652eb05a6c2d..7857b4630124 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -4423,7 +4423,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) ring_prod = TX_RING_IDX(prod); vlan_tag_flags = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM; } diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index a31544ccb3c4..558fdb8ad2dc 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2167,7 +2167,7 @@ end_copy_pkt: cas_page_unmap(addr); } skb->csum = ntohs(i ^ 0xffff); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->protocol = eth_type_trans(skb, cp->dev); return len; } @@ -2821,7 +2821,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, } ctrl = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 61b3754f50ff..ddd0bdb498f4 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1470,9 +1470,9 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (!(adapter->flags & UDP_CSUM_CAPABLE) && - skb->ip_summed == CHECKSUM_HW && + skb->ip_summed == CHECKSUM_PARTIAL && skb->nh.iph->protocol == IPPROTO_UDP) - if (unlikely(skb_checksum_help(skb, 0))) { + if (unlikely(skb_checksum_help(skb))) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1495,11 +1495,11 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl)); cpl->opcode = CPL_TX_PKT; cpl->ip_csum_dis = 1; /* SW calculates IP csum */ - cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; + cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1; /* the length field isn't used so don't bother setting it */ - st->tx_cso += (skb->ip_summed == CHECKSUM_HW); - sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW); + st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL); + sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL); sge->stats.tx_reg_pkts++; } cpl->iff = dev->if_port; diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index 402961e68c89..b74e67654764 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -611,7 +611,7 @@ start_xmit (struct sk_buff *skb, struct net_device *dev) txdesc = &np->tx_ring[entry]; #if 0 - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { txdesc->status |= cpu_to_le64 (TCPChecksumEnable | UDPChecksumEnable | IPChecksumEnable); diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 98ef9f85482f..2ab9f96f5dab 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2600,7 +2600,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, unsigned int i; uint8_t css; - if (likely(skb->ip_summed == CHECKSUM_HW)) { + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { css = skb->h.raw - skb->data; i = tx_ring->next_to_use; @@ -2927,11 +2927,11 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } /* reserve a descriptor for the offload context */ - if ((mss) || (skb->ip_summed == CHECKSUM_HW)) + if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) count++; count++; #else - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) count++; #endif @@ -3608,7 +3608,7 @@ e1000_rx_checksum(struct e1000_adapter *adapter, */ csum = ntohl(csum ^ 0xFFFF); skb->csum = csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } adapter->hw_csum_good++; } diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 11b8f1b43dd5..32cacf115f75 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -1503,7 +1503,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); else #endif - tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); + tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ? + NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0; /* vlan tag */ if (np->vlangrp && vlan_tx_tag_present(skb)) { diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index ebbbd6ca6204..ba960913c034 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -947,7 +947,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Set up checksumming */ if (likely((dev->features & NETIF_F_IP_CSUM) - && (CHECKSUM_HW == skb->ip_summed))) { + && (CHECKSUM_PARTIAL == skb->ip_summed))) { fcb = gfar_add_fcb(skb, txbdp); status |= TXBD_TOE; gfar_tx_checksum(skb, fcb); diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 409c6aab0411..763373ae9666 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -1648,7 +1648,7 @@ static int hamachi_rx(struct net_device *dev) * could do the pseudo myself and return * CHECKSUM_UNNECESSARY */ - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } } } diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c index 82468e2dc799..57e214d85e9a 100644 --- a/drivers/net/ibm_emac/ibm_emac_core.c +++ b/drivers/net/ibm_emac/ibm_emac_core.c @@ -1036,7 +1036,7 @@ static inline u16 emac_tx_csum(struct ocp_enet_private *dev, struct sk_buff *skb) { #if defined(CONFIG_IBM_EMAC_TAH) - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { ++dev->stats.tx_packets_csum; return EMAC_TX_CTRL_TAH_CSUM; } diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c index 68d8af7df08e..65f897ddb920 100644 --- a/drivers/net/ioc3-eth.c +++ b/drivers/net/ioc3-eth.c @@ -1387,7 +1387,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev) * MAC header which should not be summed and the TCP/UDP pseudo headers * manually. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int proto = ntohs(skb->nh.iph->protocol); unsigned int csoff; struct iphdr *ih = skb->nh.iph; diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 7bbd447289b5..9405b44f3214 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1232,7 +1232,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb) unsigned int i; uint8_t css, cso; - if(likely(skb->ip_summed == CHECKSUM_HW)) { + if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) { css = skb->h.raw - skb->data; cso = (skb->h.raw + skb->csum) - skb->data; diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index eeab1df5bef3..38df58fdb358 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1147,7 +1147,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp, desc->byte_cnt = length; desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { BUG_ON(skb->protocol != ETH_P_IP); cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM | diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 9bdd43ab3573..9f16681d0e7e 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -930,7 +930,7 @@ static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, u16 hw_csum) (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { skb->csum = hw_csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } } @@ -973,7 +973,7 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, if ((skb->protocol == ntohs(ETH_P_IP)) || (skb->protocol == ntohs(ETH_P_IPV6))) { skb->csum = ntohs((u16) csum); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } else myri10ge_vlan_ip_csum(skb, ntohs((u16) csum)); } @@ -1897,13 +1897,13 @@ again: pseudo_hdr_offset = 0; odd_flag = 0; flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); - if (likely(skb->ip_summed == CHECKSUM_HW)) { + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { cksum_offset = (skb->h.raw - skb->data); pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data; /* If the headers are excessively large, then we must * fall back to a software checksum */ if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) { - if (skb_checksum_help(skb, 0)) + if (skb_checksum_help(skb)) goto drop; cksum_offset = 0; pseudo_hdr_offset = 0; diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 0e76859c90a2..5143f5dbb2e5 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -1153,7 +1153,7 @@ again: if (!nr_frags) frag = NULL; extsts = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { extsts |= EXTSTS_IPPKT; if (IPPROTO_TCP == skb->nh.iph->protocol) extsts |= EXTSTS_TCPPKT; diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 4c2f575faad7..d9b960aa9b0d 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2169,7 +2169,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) if (mss) return LargeSend | ((mss & MSSMask) << MSSShift); } - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { const struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index e72e0e099060..5b3713f622d7 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -3893,7 +3893,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_1 |= TXD_TCP_LSO_MSS(s2io_tcp_mss(skb)); } #endif - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { txdp->Control_2 |= (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN | TXD_TX_CKO_UDP_EN); diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index ee62845d3ac9..eb3b35180c2f 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -1559,7 +1559,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32); pTxd->pMBuf = pMessage; - if (pMessage->ip_summed == CHECKSUM_HW) { + if (pMessage->ip_summed == CHECKSUM_PARTIAL) { u16 hdrlen = pMessage->h.raw - pMessage->data; u16 offset = hdrlen + pMessage->csum; @@ -1678,7 +1678,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ /* ** Does the HW need to evaluate checksum for TCP or UDP packets? */ - if (pMessage->ip_summed == CHECKSUM_HW) { + if (pMessage->ip_summed == CHECKSUM_PARTIAL) { u16 hdrlen = pMessage->h.raw - pMessage->data; u16 offset = hdrlen + pMessage->csum; @@ -2158,7 +2158,7 @@ rx_start: #ifdef USE_SK_RX_CHECKSUM pMsg->csum = pRxd->TcpSums & 0xffff; - pMsg->ip_summed = CHECKSUM_HW; + pMsg->ip_summed = CHECKSUM_COMPLETE; #else pMsg->ip_summed = CHECKSUM_NONE; #endif diff --git a/drivers/net/skge.c b/drivers/net/skge.c index ad878dfddef4..b3d6fa3d6df4 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2338,7 +2338,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) td->dma_lo = map; td->dma_hi = map >> 32; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int offset = skb->h.raw - skb->data; /* This seems backwards, but it is what the sk98lin @@ -2642,7 +2642,7 @@ static inline struct sk_buff *skge_rx_get(struct skge_port *skge, skb->dev = skge->netdev; if (skge->rx_csum) { skb->csum = csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } skb->protocol = eth_type_trans(skb, skge->netdev); diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 933e87f1cc68..8e92566b587e 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1163,7 +1163,7 @@ static unsigned tx_le_req(const struct sk_buff *skb) if (skb_is_gso(skb)) ++count; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) ++count; return count; @@ -1272,7 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) #endif /* Handle TCP checksum offload */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u16 hdr = skb->h.raw - skb->data; u16 offset = hdr + skb->csum; @@ -2000,7 +2000,7 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do) #endif case OP_RXCHKS: skb = sky2->rx_ring[sky2->rx_next].skb; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = le16_to_cpu(status); break; diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index c0a62b00ffc8..2607aa51d8e0 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -1230,7 +1230,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) } #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE) - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK)) return NETDEV_TX_OK; } @@ -1252,7 +1252,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) status |= TxDescIntr; np->reap_tx = 0; } - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { status |= TxCalTCP; np->stats.tx_compressed++; } @@ -1499,7 +1499,7 @@ static int __netdev_rx(struct net_device *dev, int *quota) * Until then, the printk stays. :-) -Ion */ else if (le16_to_cpu(desc->status2) & 0x0040) { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = le16_to_cpu(desc->csum); printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2)); } diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index d7b1d1882cab..b388651b7836 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -855,7 +855,7 @@ static int gem_rx(struct gem *gp, int work_to_do) } skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->protocol = eth_type_trans(skb, gp->dev); netif_receive_skb(skb); @@ -1026,7 +1026,7 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags; ctrl = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index c6f5bc3c042f..17981da22730 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -1207,7 +1207,7 @@ static void happy_meal_transceiver_check(struct happy_meal *hp, void __iomem *tr * flags, thus: * * skb->csum = rxd->rx_flags & 0xffff; - * skb->ip_summed = CHECKSUM_HW; + * skb->ip_summed = CHECKSUM_COMPLETE; * * before sending off the skb to the protocols, and we are good as gold. */ @@ -2074,7 +2074,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) /* This card is _fucking_ hot... */ skb->csum = ntohs(csum ^ 0xffff); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; RXD(("len=%d csum=%4x]", len, csum)); skb->protocol = eth_type_trans(skb, dev); @@ -2268,7 +2268,7 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 tx_flags; tx_flags = TXFLAG_OWN; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u32 csum_start_off, csum_stuff_off; csum_start_off = (u32) (skb->h.raw - skb->data); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index eafabb253f08..6f5d3a38c582 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3851,11 +3851,11 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.th->check = 0; } - else if (skb->ip_summed == CHECKSUM_HW) + else if (skb->ip_summed == CHECKSUM_PARTIAL) base_flags |= TXD_FLAG_TCPUDP_CSUM; #else mss = 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) base_flags |= TXD_FLAG_TCPUDP_CSUM; #endif #if TG3_VLAN_TAG_USED @@ -3981,7 +3981,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) entry = tp->tx_prod; base_flags = 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) base_flags |= TXD_FLAG_TCPUDP_CSUM; #if TG3_TSO_SUPPORT != 0 mss = 0; diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index 4103c37172f9..c6e601dc6bbc 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -830,7 +830,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) first_txd->addrHi = (u64)((unsigned long) skb) >> 32; first_txd->processFlags = 0; - if(skb->ip_summed == CHECKSUM_HW) { + if(skb->ip_summed == CHECKSUM_PARTIAL) { /* The 3XP will figure out if this is UDP/TCP */ first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM; first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM; diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index ae971080e2e4..66547159bfd9 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -1230,7 +1230,7 @@ static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev) rp->tx_skbuff[entry] = skb; if ((rp->quirks & rqRhineI) && - (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_HW)) { + (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) { /* Must use alignment buffer. */ if (skb->len > PKT_BUF_SZ) { /* packet too long, drop it */ diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index aa9cd92f46b2..f1e0c746a388 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -2002,7 +2002,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev) * Handle hardware checksum */ if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM) - && (skb->ip_summed == CHECKSUM_HW)) { + && (skb->ip_summed == CHECKSUM_PARTIAL)) { struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) td_ptr->tdesc1.TCR |= TCR0_TCPCK; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 50a4719512ed..4f2c2b6beb5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -976,7 +976,7 @@ extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); -extern int skb_checksum_help(struct sk_buff *skb, int inward); +extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); @@ -1012,7 +1012,7 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_HW)); + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } /* On bonding slaves other than the currently active slave, suppress diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 755e9cddac47..85577a4ffa61 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,8 +34,9 @@ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 +#define CHECKSUM_PARTIAL 1 #define CHECKSUM_UNNECESSARY 2 +#define CHECKSUM_COMPLETE 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -56,17 +57,17 @@ * Apparently with secret goal to sell you new device, when you * will add new protocol to your host. F.e. IPv6. 8) * - * HW: the most generic way. Device supplied checksum of _all_ + * COMPLETE: the most generic way. Device supplied checksum of _all_ * the packet as seen by netif_rx in skb->csum. * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, + * is able to produce some skb->csum, it MUST use COMPLETE, * not UNNECESSARY. * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * - * HW: device is required to csum packet as seen by hard_start_xmit + * PARTIAL: device is required to csum packet as seen by hard_start_xmit * from skb->h.raw to the end and to record the checksum * at skb->h.raw+skb->csum. * @@ -1261,14 +1262,14 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to - * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE - * so that it can be recomputed from scratch. + * update the CHECKSUM_COMPLETE checksum, or set ip_summed to + * CHECKSUM_NONE so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); } @@ -1287,7 +1288,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; return __pskb_trim(skb, len); } diff --git a/net/core/datagram.c b/net/core/datagram.c index aecddcc30401..f558c61aecc7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb) sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, goto fault; if ((unsigned short)csum_fold(csum)) goto csum_error; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); iov->iov_len -= chunk; iov->iov_base += chunk; diff --git a/net/core/dev.c b/net/core/dev.c index d4a1ec3bded5..fc82f6f6e1c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1166,12 +1166,12 @@ EXPORT_SYMBOL(netif_device_attach); * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. */ -int skb_checksum_help(struct sk_buff *skb, int inward) +int skb_checksum_help(struct sk_buff *skb) { unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (inward) + if (skb->ip_summed == CHECKSUM_COMPLETE) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { @@ -1223,7 +1223,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { err = ptype->gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -1444,11 +1444,11 @@ int dev_queue_xmit(struct sk_buff *skb) /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_PARTIAL && (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) - if (skb_checksum_help(skb, 0)) + if (skb_checksum_help(skb)) goto out_kfree_skb; gso: diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 471da451cd48..ead5920c26d6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_COMPLETE && !(u16)csum_fold(csum_add(psum, skb->csum))) return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c54f3664bce5..8a476f1956e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1397,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) unsigned int csum; long csstart; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb->h.raw - skb->data; else csstart = skb_headlen(skb); @@ -1411,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart, 0); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum; *((unsigned short *)(to + csstuff)) = csum_fold(csum); @@ -1898,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, * @len: length of data pulled * * This function performs an skb_pull on the packet and updates - * update the CHECKSUM_HW checksum. It should be used on receive - * path processing instead of skb_pull unless you know that the - * checksum difference is zero (e.g., a valid IP header) or you - * are setting ip_summed to CHECKSUM_NONE. + * update the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_pull unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { @@ -1994,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag = skb_shinfo(nskb)->frags; k = 0; - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum = skb->csum; memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6ad797c14163..6d223e5c6741 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -930,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8e8117c19e4d..7003e763d970 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb) goto drop; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b84b53a47526..8d7f107c2eef 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip_frag_mem); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f9b3a31997b..e66f6ff2e198 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -576,7 +576,7 @@ static int ipgre_rcv(struct sk_buff *skb) if (flags&GRE_CSUM) { switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: csum = (u16)csum_fold(skb->csum); if (!csum) break; @@ -584,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } offset += 4; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 308bdeac3455..1b9b6742ef77 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -680,7 +680,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk { struct iovec *iov = from; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { @@ -736,7 +736,7 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -844,7 +844,7 @@ int ip_append_data(struct sock *sk, length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_ALL_CSUM && !exthdrlen) - csummode = CHECKSUM_HW; + csummode = CHECKSUM_PARTIAL; inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index bc28b1160a3a..820e8318d10d 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - tcphoff, skb->nh.iph->protocol, skb->csum)) { @@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 89d9175d8f28..90c8166c0ec1 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - udphoff, @@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6a9e34b794bc..f88347de21a9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) break; if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index fb920e76ec10..9de81ff645d5 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -865,8 +865,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because it is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 9b2c16b4d2ff..e58e52f14553 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 17de077a7901..f4f00c816d87 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -111,8 +111,9 @@ ip_nat_fn(unsigned int hooknum, & htons(IP_MF|IP_OFFSET))); /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_HW) - if (skb_checksum_help(*pskb, (out == NULL))) + if ((*pskb)->ip_summed == CHECKSUM_PARTIAL || + (*pskb)->ip_summed == CHECKSUM_COMPLETE) + if (skb_checksum_help(*pskb)) return NF_DROP; ct = ip_conntrack_get(*pskb, &ctinfo); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 198ac36db861..276a964ee6cf 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4adf5c9d34f5..4ec43f98fe49 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -49,7 +49,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) /* Return 0 if there was an error. */ static inline int -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) +set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; u_int16_t diffs[2]; @@ -70,8 +70,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, inward)) + if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || + (*pskb)->ip_summed == CHECKSUM_COMPLETE) && + skb_checksum_help(*pskb)) return 0; diffs[0] = ((u_int16_t *)tcph)[6]; @@ -106,7 +107,7 @@ target(struct sk_buff **pskb, if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && (*pskb)->nh.iph->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo, (out == NULL))) + if (!set_ect_tcp(pskb, einfo)) return NF_DROP; return IPT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index ef2fe5b3f0d8..c998dc0fcd15 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -62,8 +62,9 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, out == NULL)) + if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || + (*pskb)->ip_summed == CHECKSUM_COMPLETE) && + skb_checksum_help(*pskb)) return NF_DROP; iph = (*pskb)->nh.iph; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 934396bb1376..b0124e69ab38 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -568,7 +568,7 @@ new_segment: skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -723,7 +723,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, tp, skb); copy = size_goal; @@ -2205,7 +2205,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) th->fin = th->psh = 0; th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2219,7 +2219,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 43f6740244f8..b2aa512a30e9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -973,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static int tcp_v4_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4f3ffe1b3b4..9252a50c4b49 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { + if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), nsize, 0); @@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss skb->csum = csum_block_sub(skb->csum, buff->csum, len); } else { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); } @@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->truesize -= len; sk->sk_wmem_queued -= len; @@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; - buff->ip_summed = skb->ip_summed = CHECKSUM_HW; + buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ @@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - if (skb->ip_summed == CHECKSUM_HW) - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = skb->ip_summed; len = 0; while (len < probe_size) { @@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk) ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_partial(skb->data, skb->len, 0); } else { __pskb_trim_head(skb, copy); @@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - if (next_skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = next_skb->ip_summed; - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a4d005eccc7f..87152510980c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -429,7 +429,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) /* * Only one fragment on the socket. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { skb->csum = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, IPPROTO_UDP, 0); @@ -448,7 +448,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) * fragments on the socket so that all csums of sk_buffs * should be together. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int offset = (unsigned char *)uh - skb->data; skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -1088,7 +1088,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_HW) { + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d16f863cf687..4a96a9e3ef3b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -48,8 +48,8 @@ static int xfrm4_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 86dac106873b..05afa6b1912b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -294,7 +294,7 @@ looped_back: hdr = (struct ipv6_rt_hdr *) skb2->h.raw; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; i = n - --hdr->segments_left; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index dbfce089e916..103055107674 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -606,7 +606,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) /* Perform checksum. */ switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) break; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4fb47a252913..65514f21c186 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -866,7 +866,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 395a417ba955..580b1aba6722 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 968a14be0d05..c01c126224e2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 00d5583807f7..7a4e4c2e3197 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, return -1; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr + 1) - skb->nh.raw, @@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &nf_ct_frag6_mem); @@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->nh.ipv6h->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) + if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); fq->fragments = NULL; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d5040e172292..d4af1cb5e19f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -334,7 +334,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { skb_postpull_rcsum(skb, skb->nh.raw, skb->h.raw - skb->nh.raw); if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4e299c69e1c6..a8623d2b0879 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); @@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip6_frag_mem); @@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, *skb_in = head; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) + if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 302786a11cd6..7f1b660493b7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -545,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -570,7 +570,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -1033,7 +1033,7 @@ out: static int tcp_v6_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 82c7c9cde2a8..780b89f6dfcc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -475,7 +475,7 @@ static int udpv6_rcv(struct sk_buff **pskb) uh = skb->h.uh; } - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_COMPLETE && !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c8c8b44a0f58..6d111743e508 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -41,8 +41,8 @@ static int xfrm6_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af8adcba23a7..308d2abd7ee5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -823,8 +823,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ae07ebe3ab37..d36e03139e8b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 49ef41e34c48..eddfbe4441a2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; case NFQNL_COPY_PACKET: - if (entskb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entskb, - outdev == NULL))) { + if ((entskb->ip_summed == CHECKSUM_PARTIAL || + entskb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entskb))) { spin_unlock_bh(&queue->lock); return NULL; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4172a5235916..300215bdbf46 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -586,7 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb->nh.raw - skb->data); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a08ec4c7c55d..45939bafbdf8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) - || (skb->ip_summed == CHECKSUM_HW - && skb_checksum_help(skb, 0))) { + || (skb->ip_summed == CHECKSUM_PARTIAL + && skb_checksum_help(skb))) { sch->qstats.drops++; return NET_XMIT_DROP; } diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index eb330d4f66d6..6f17527b9e69 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if ((unsigned short)csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: -- cgit v1.2.3-71-gd317 From 4cf411de49c65140b3c259748629b561c0d3340f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Aug 2006 00:58:33 -0700 Subject: [NETFILTER]: Get rid of HW checksum invalidation Update hardware checksums incrementally to avoid breaking GSO. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 6 +++ include/linux/netfilter_ipv4/ip_nat.h | 4 -- include/linux/netfilter_ipv4/ip_nat_core.h | 8 ++-- net/ipv4/netfilter/ip_nat_core.c | 52 ++++++++++---------------- net/ipv4/netfilter/ip_nat_helper.c | 59 +++++++++++++++++++++--------- net/ipv4/netfilter/ip_nat_proto_gre.c | 5 ++- net/ipv4/netfilter/ip_nat_proto_icmp.c | 8 ++-- net/ipv4/netfilter/ip_nat_proto_tcp.c | 7 ++-- net/ipv4/netfilter/ip_nat_proto_udp.c | 15 +++++--- net/ipv4/netfilter/ip_nat_standalone.c | 10 +---- net/ipv4/netfilter/ipt_ECN.c | 19 +++------- net/ipv4/netfilter/ipt_REJECT.c | 1 + net/ipv4/netfilter/ipt_TCPMSS.c | 39 ++++++++------------ net/netfilter/core.c | 22 +++++++++++ 14 files changed, 138 insertions(+), 117 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 10168e26a846..b7e67d1d4382 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -282,6 +282,12 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, + u_int32_t csum); +extern u_int16_t nf_proto_csum_update(struct sk_buff *skb, + u_int32_t oldval, u_int32_t newval, + u_int16_t csum, int pseudohdr); + struct nf_afinfo { unsigned short family; unsigned int (*checksum)(struct sk_buff *skb, unsigned int hook, diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e9f5ed1d9f68..98f8407e4cb5 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -72,10 +72,6 @@ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); -/* Calculate relative checksum. */ -extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, - u_int32_t newval, - u_int16_t oldcheck); #else /* !__KERNEL__: iptables wants this to compile. */ #define ip_nat_multi_range ip_nat_multi_range_compat #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 30db23f06b03..60566f9fd7b3 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct ip_conntrack *ct, unsigned int hooknum, struct sk_buff **pskb); -extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir); +extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb); #endif /* _IP_NAT_CORE_H */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1741d555ad0d..4c540d03d48e 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -101,18 +101,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) write_unlock_bh(&ip_nat_lock); } -/* We do checksum mangling, so if they were wrong before they're still - * wrong. Also works for incomplete packets (eg. ICMP dest - * unreachables.) */ -u_int16_t -ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} -EXPORT_SYMBOL(ip_nat_cheat_check); - /* Is this tuple already taken? (not by us) */ int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, @@ -378,12 +366,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, - iph->check); + iph->check = nf_csum_update(~iph->saddr, target->src.ip, + iph->check); iph->saddr = target->src.ip; } else { - iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, - iph->check); + iph->check = nf_csum_update(~iph->daddr, target->dst.ip, + iph->check); iph->daddr = target->dst.ip; } return 1; @@ -423,10 +411,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct, EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { struct { struct icmphdr icmp; @@ -434,7 +422,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; + enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; @@ -443,12 +433,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { - hdrlen = (*pskb)->nh.iph->ihl * 4; - if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0))) - return 0; - } + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + return 0; /* Must be RELATED */ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || @@ -487,12 +473,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, !manip)) return 0; - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); + } /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index cbcaa45370ae..021c3daae3ed 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -165,7 +165,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct tcphdr *tcph; - int datalen; + int oldlen, datalen; if (!skb_make_writable(pskb, (*pskb)->len)) return 0; @@ -180,13 +180,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcph = (void *)iph + iph->ihl*4; + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); datalen = (*pskb)->len - iph->ihl*4; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, datalen, 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, + iph->saddr, iph->daddr, + csum_partial((char *)tcph, + datalen, 0)); + } else + tcph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ 0xFFFF, + htons(datalen), + tcph->check, 1); if (rep_len != match_len) { set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); @@ -221,6 +230,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct udphdr *udph; + int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ iph = (*pskb)->nh.iph; @@ -238,22 +248,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; udph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - udph->len = htons((*pskb)->len - iph->ihl*4); + datalen = (*pskb)->len - iph->ihl*4; + udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (udph->check) { - int datalen = (*pskb)->len - iph->ihl * 4; + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + return 1; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, csum_partial((char *)udph, datalen, 0)); - } - + if (!udph->check) + udph->check = -1; + } else + udph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ 0xFFFF, + htons(datalen), + udph->check, 1); return 1; } EXPORT_SYMBOL(ip_nat_mangle_udp_packet); @@ -293,11 +313,14 @@ sack_adjust(struct sk_buff *skb, ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = - ip_nat_cheat_check(~sack->start_seq, new_start_seq, - ip_nat_cheat_check(~sack->end_seq, - new_end_seq, - tcph->check)); + tcph->check = nf_proto_csum_update(skb, + ~sack->start_seq, + new_start_seq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(skb, + ~sack->end_seq, + new_end_seq, + tcph->check, 0); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -381,10 +404,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb, newack = ntohl(tcph->ack_seq) - other_way->offset_before; newack = htonl(newack); - tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, - ip_nat_cheat_check(~tcph->ack_seq, - newack, - tcph->check)); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, + tcph->check, 0); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 38acfdf540eb..70a65372225a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -130,9 +130,10 @@ gre_manip_pkt(struct sk_buff **pskb, if (greh->csum) { /* FIXME: Never tested this code... */ *(gre_csum(greh)) = - ip_nat_cheat_check(~*(gre_key(greh)), + nf_proto_csum_update(*pskb, + ~*(gre_key(greh)), tuple->dst.u.gre.key, - *(gre_csum(greh))); + *(gre_csum(greh)), 0); } *(gre_key(greh)) = tuple->dst.u.gre.key; break; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 31a3f4ccb99c..ec50cc295317 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb, return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); - - hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, - tuple->src.u.icmp.id, - hdr->checksum); + hdr->checksum = nf_proto_csum_update(*pskb, + hdr->un.echo.id ^ 0xFFFF, + tuple->src.u.icmp.id, + hdr->checksum, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a3d14079eba6..72a6307bd2db 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport, + hdr->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index ec6053fdc867..5da196ae758c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check) /* 0 is a special case meaning no checksum */ - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); + + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, + hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, + *portptr ^ 0xFFFF, newport, + hdr->check, 0); + if (!hdr->check) + hdr->check = -1; + } *portptr = newport; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f4f00c816d87..f3b778355432 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -110,12 +110,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_PARTIAL || - (*pskb)->ip_summed == CHECKSUM_COMPLETE) - if (skb_checksum_help(*pskb)) - return NF_DROP; - ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to @@ -146,8 +140,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(ct, ctinfo, + hooknum, pskb)) return NF_DROP; else return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4ec43f98fe49..35916c74fe4e 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -52,7 +52,7 @@ static inline int set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; - u_int16_t diffs[2]; + u_int16_t oldval; /* Not enought header? */ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, @@ -70,23 +70,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || - (*pskb)->ip_summed == CHECKSUM_COMPLETE) && - skb_checksum_help(*pskb)) - return 0; - - diffs[0] = ((u_int16_t *)tcph)[6]; + oldval = ((u_int16_t *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - diffs[1] = ((u_int16_t *)tcph)[6]; - diffs[0] = diffs[0] ^ 0xFFFF; - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) - tcph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - tcph->check^0xFFFF)); + tcph->check = nf_proto_csum_update((*pskb), + oldval ^ 0xFFFF, + ((u_int16_t *)tcph)[6], + tcph->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 7f905bf2bde5..95c6662b663c 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -185,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->urg_ptr = 0; /* Adjust TCP checksum */ + nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index c998dc0fcd15..0fce85e05507 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -27,14 +27,6 @@ MODULE_DESCRIPTION("iptables TCP MSS modification module"); #define DEBUGP(format, args...) #endif -static u_int16_t -cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} - static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { @@ -62,11 +54,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || - (*pskb)->ip_summed == CHECKSUM_COMPLETE) && - skb_checksum_help(*pskb)) - return NF_DROP; - iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; @@ -120,9 +107,10 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); - tcph->check = cheat_check(htons(oldmss)^0xFFFF, - htons(newmss), - tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + htons(oldmss)^0xFFFF, + htons(newmss), + tcph->check, 0); DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" "->%u.%u.%u.%u:%hu changed TCP MSS option" @@ -162,8 +150,10 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, - htons(tcplen + TCPOLEN_MSS), tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + htons(tcplen) ^ 0xFFFF, + htons(tcplen + TCPOLEN_MSS), + tcph->check, 1); tcplen += TCPOLEN_MSS; opt[0] = TCPOPT_MSS; @@ -171,16 +161,19 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); + tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt), + tcph->check, 0); oldval = ((u_int16_t *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - tcph->check = cheat_check(oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + oldval ^ 0xFFFF, + ((u_int16_t *)tcph)[6], + tcph->check, 0); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = cheat_check(iph->tot_len ^ 0xFFFF, - newtotlen, iph->check); + iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF, + newtotlen, iph->check); iph->tot_len = newtotlen; DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5d29d5e23624..27f639f3ac2a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -222,6 +222,28 @@ copy_skb: } EXPORT_SYMBOL(skb_make_writable); +u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum) +{ + u_int32_t diff[] = { oldval, newval }; + + return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum)); +} +EXPORT_SYMBOL(nf_csum_update); + +u_int16_t nf_proto_csum_update(struct sk_buff *skb, + u_int32_t oldval, u_int32_t newval, + u_int16_t csum, int pseudohdr) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + csum = nf_csum_update(oldval, newval, csum); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = nf_csum_update(oldval, newval, skb->csum); + } else if (pseudohdr) + csum = ~nf_csum_update(oldval, newval, ~csum); + + return csum; +} +EXPORT_SYMBOL(nf_proto_csum_update); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence -- cgit v1.2.3-71-gd317 From 9067c722cf6930adf1df2d169de9094dd90b0c33 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:57:44 -0700 Subject: [NEIGH]: Move netlink neighbour bits to linux/neighbour.h Moves netlink neighbour bits to linux/neighbour.h. Also moves bits to be exported to userspace from net/neighbour.h to linux/neighbour.h and removes __KERNEL__ guards, userspace is not supposed to be using it. rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 63 --------------------------------------------- include/net/neighbour.h | 39 ++-------------------------- net/core/rtnetlink.c | 2 -- 4 files changed, 67 insertions(+), 102 deletions(-) create mode 100644 include/linux/neighbour.h (limited to 'include/linux') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h new file mode 100644 index 000000000000..8e8293d86fbb --- /dev/null +++ b/include/linux/neighbour.h @@ -0,0 +1,65 @@ +#ifndef __LINUX_NEIGHBOUR_H +#define __LINUX_NEIGHBOUR_H + +#include + +struct ndmsg +{ + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change + and make no address resolution or NUD. + NUD_PERMANENT is also cannot be deleted by garbage collectors. + */ + +struct nda_cacheinfo +{ + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 84f3eb426da2..9750f0214c22 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -386,69 +386,6 @@ struct rta_session -/************************************************************** - * Neighbour discovery. - ****/ - -struct ndmsg -{ - unsigned char ndm_family; - unsigned char ndm_pad1; - unsigned short ndm_pad2; - int ndm_ifindex; /* Link index */ - __u16 ndm_state; - __u8 ndm_flags; - __u8 ndm_type; -}; - -enum -{ - NDA_UNSPEC, - NDA_DST, - NDA_LLADDR, - NDA_CACHEINFO, - NDA_PROBES, - __NDA_MAX -}; - -#define NDA_MAX (__NDA_MAX - 1) - -#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) -#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - - -struct nda_cacheinfo -{ - __u32 ndm_confirmed; - __u32 ndm_used; - __u32 ndm_updated; - __u32 ndm_refcnt; -}; - - /***************************************************************** * Neighbour tables specific messages. * diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4901ee446879..74c4b6ff8a5c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -1,6 +1,8 @@ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H +#include + /* * Generic neighbour manipulation * @@ -14,40 +16,6 @@ * - Add neighbour cache statistics like rtstat */ -/* The following flags & states are exported to user space, - so that they should be moved to include/linux/ directory. - */ - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT is also cannot be deleted by garbage collectors. - */ - -#ifdef __KERNEL__ - #include #include #include @@ -374,6 +342,3 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif -#endif - - diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 93ba04fb8444..78ccbd4c4e37 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -104,7 +104,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -121,7 +120,6 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, -- cgit v1.2.3-71-gd317 From b63bbc5006a0a62fabc81c4f77e95f16ff16f340 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 18:00:57 -0700 Subject: [NEIGH]: Move netlink neighbour table bits to linux/neighbour.h rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour tables layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 94 ++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 101 ---------------------------------------------- net/core/rtnetlink.c | 2 - 3 files changed, 94 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index 8e8293d86fbb..bd3bbf668cdb 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -62,4 +62,98 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9750f0214c22..784a1a29490e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -384,107 +384,6 @@ struct rta_session } u; }; - - -/***************************************************************** - * Neighbour tables specific messages. - * - * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the - * NLM_F_DUMP flag set. Every neighbour table configuration is - * spread over multiple messages to avoid running into message - * size limits on systems with many interfaces. The first message - * in the sequence transports all not device specific data such as - * statistics, configuration, and the default parameter set. - * This message is followed by 0..n messages carrying device - * specific parameter sets. - * Although the ordering should be sufficient, NDTA_NAME can be - * used to identify sequences. The initial message can be identified - * by checking for NDTA_CONFIG. The device specific messages do - * not contain this TLV but have NDTPA_IFINDEX set to the - * corresponding interface index. - * - * To change neighbour table attributes, send RTM_SETNEIGHTBL - * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], - * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked - * otherwise. Device specific parameter sets can be changed by - * setting NDTPA_IFINDEX to the interface index of the corresponding - * device. - ****/ - -struct ndt_stats -{ - __u64 ndts_allocs; - __u64 ndts_destroys; - __u64 ndts_hash_grows; - __u64 ndts_res_failed; - __u64 ndts_lookups; - __u64 ndts_hits; - __u64 ndts_rcv_probes_mcast; - __u64 ndts_rcv_probes_ucast; - __u64 ndts_periodic_gc_runs; - __u64 ndts_forced_gc_runs; -}; - -enum { - NDTPA_UNSPEC, - NDTPA_IFINDEX, /* u32, unchangeable */ - NDTPA_REFCNT, /* u32, read-only */ - NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ - NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ - NDTPA_RETRANS_TIME, /* u64, msecs */ - NDTPA_GC_STALETIME, /* u64, msecs */ - NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ - NDTPA_QUEUE_LEN, /* u32 */ - NDTPA_APP_PROBES, /* u32 */ - NDTPA_UCAST_PROBES, /* u32 */ - NDTPA_MCAST_PROBES, /* u32 */ - NDTPA_ANYCAST_DELAY, /* u64, msecs */ - NDTPA_PROXY_DELAY, /* u64, msecs */ - NDTPA_PROXY_QLEN, /* u32 */ - NDTPA_LOCKTIME, /* u64, msecs */ - __NDTPA_MAX -}; -#define NDTPA_MAX (__NDTPA_MAX - 1) - -struct ndtmsg -{ - __u8 ndtm_family; - __u8 ndtm_pad1; - __u16 ndtm_pad2; -}; - -struct ndt_config -{ - __u16 ndtc_key_len; - __u16 ndtc_entry_size; - __u32 ndtc_entries; - __u32 ndtc_last_flush; /* delta to now in msecs */ - __u32 ndtc_last_rand; /* delta to now in msecs */ - __u32 ndtc_hash_rnd; - __u32 ndtc_hash_mask; - __u32 ndtc_hash_chain_gc; - __u32 ndtc_proxy_qlen; -}; - -enum { - NDTA_UNSPEC, - NDTA_NAME, /* char *, unchangeable */ - NDTA_THRESH1, /* u32 */ - NDTA_THRESH2, /* u32 */ - NDTA_THRESH3, /* u32 */ - NDTA_CONFIG, /* struct ndt_config, read-only */ - NDTA_PARMS, /* nested TLV NDTPA_* */ - NDTA_STATS, /* struct ndt_stats, read-only */ - NDTA_GC_INTERVAL, /* u64, msecs */ - __NDTA_MAX -}; -#define NDTA_MAX (__NDTA_MAX - 1) - -#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ - NLMSG_ALIGN(sizeof(struct ndtmsg)))) -#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) - - /**** * General form of address family dependent message. ****/ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 78ccbd4c4e37..a1b783a6afc6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -112,7 +112,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)), }; static const int rta_max[RTM_NR_FAMILIES] = @@ -125,7 +124,6 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, - [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX, }; void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) -- cgit v1.2.3-71-gd317 From ac5a488ef252ed673cb067843e411f8cc43f7ab9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Aug 2006 20:57:31 -0700 Subject: [NET]: Round out in-kernel sockets API This patch implements wrapper functions that provide a convenient way to access the sockets API for in-kernel users like sunrpc, cifs & ocfs2 etc and any future users. Signed-off-by: Sridhar Samudrala Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/net.h | 19 +++++++++ net/socket.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index b20c53c74413..19da2c08d7b6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -208,6 +208,25 @@ extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); +extern int kernel_bind(struct socket *sock, struct sockaddr *addr, + int addrlen); +extern int kernel_listen(struct socket *sock, int backlog); +extern int kernel_accept(struct socket *sock, struct socket **newsock, + int flags); +extern int kernel_connect(struct socket *sock, struct sockaddr *addr, + int addrlen, int flags); +extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen); +extern int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen); +extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); +extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); + #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name #define SOCKOPS_WRAP(name, fam) diff --git a/net/socket.c b/net/socket.c index 6756e57e1ff0..2eaebf934a1a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2170,6 +2170,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, } #endif +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +{ + return sock->ops->bind(sock, addr, addrlen); +} + +int kernel_listen(struct socket *sock, int backlog) +{ + return sock->ops->listen(sock, backlog); +} + +int kernel_accept(struct socket *sock, struct socket **newsock, int flags) +{ + struct sock *sk = sock->sk; + int err; + + err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, + newsock); + if (err < 0) + goto done; + + err = sock->ops->accept(sock, *newsock, flags); + if (err < 0) { + sock_release(*newsock); + goto done; + } + + (*newsock)->ops = sock->ops; + +done: + return err; +} + +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, + int flags) +{ + return sock->ops->connect(sock, addr, addrlen, flags); +} + +int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock->ops->getname(sock, addr, addrlen, 0); +} + +int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock->ops->getname(sock, addr, addrlen, 1); +} + +int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_getsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->getsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) +{ + if (sock->ops->sendpage) + return sock->ops->sendpage(sock, page, offset, size, flags); + + return sock_no_sendpage(sock, page, offset, size, flags); +} + +int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + err = sock->ops->ioctl(sock, cmd, arg); + set_fs(oldfs); + + return err; +} + /* ABI emulation layers need these two */ EXPORT_SYMBOL(move_addr_to_kernel); EXPORT_SYMBOL(move_addr_to_user); @@ -2186,3 +2289,13 @@ EXPORT_SYMBOL(sock_wake_async); EXPORT_SYMBOL(sockfd_lookup); EXPORT_SYMBOL(kernel_sendmsg); EXPORT_SYMBOL(kernel_recvmsg); +EXPORT_SYMBOL(kernel_bind); +EXPORT_SYMBOL(kernel_listen); +EXPORT_SYMBOL(kernel_accept); +EXPORT_SYMBOL(kernel_connect); +EXPORT_SYMBOL(kernel_getsockname); +EXPORT_SYMBOL(kernel_getpeername); +EXPORT_SYMBOL(kernel_getsockopt); +EXPORT_SYMBOL(kernel_setsockopt); +EXPORT_SYMBOL(kernel_sendpage); +EXPORT_SYMBOL(kernel_sock_ioctl); -- cgit v1.2.3-71-gd317 From a8731cbf61c8768ea129780b70dc7dfc6795aad4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 9 Aug 2006 15:56:46 -0700 Subject: [DECNET]: Covert rules to use generic code This patch converts the DECnet rules code to use the generic rules system created by Thomas Graf . Signed-off-by: Steven Whitehouse Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +- include/net/dn_fib.h | 8 +- net/decnet/Kconfig | 1 + net/decnet/af_decnet.c | 1 + net/decnet/dn_dev.c | 3 +- net/decnet/dn_fib.c | 1 + net/decnet/dn_route.c | 3 +- net/decnet/dn_rules.c | 494 +++++++++++++++++----------------------------- net/decnet/dn_table.c | 1 + 9 files changed, 196 insertions(+), 319 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 784a1a29490e..0aaffa2ae666 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -534,7 +534,8 @@ enum rtnetlink_groups { RTNLGRP_NOP2, RTNLGRP_DECnet_ROUTE, #define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE - RTNLGRP_NOP3, + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index a15dcf0d5c1e..32bc8ce5c5ce 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -22,7 +22,7 @@ struct dn_kern_rta }; struct dn_fib_res { - struct dn_fib_rule *r; + struct fib_rule *r; struct dn_fib_info *fi; unsigned char prefixlen; unsigned char nh_sel; @@ -147,10 +147,8 @@ extern void dn_fib_table_cleanup(void); */ extern void dn_fib_rules_init(void); extern void dn_fib_rules_cleanup(void); -extern void dn_fib_rule_put(struct dn_fib_rule *); -extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags); extern unsigned dnet_addr_type(__le16 addr); -extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res); +extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res); /* * rtnetlink interface @@ -176,7 +174,7 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) if (res->fi) dn_fib_info_put(res->fi); if (res->r) - dn_fib_rule_put(res->r); + fib_rule_put(res->r); } extern struct dn_fib_table *dn_fib_tables[]; diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 92f2ec46fd22..36e72cb145b0 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -27,6 +27,7 @@ config DECNET config DECNET_ROUTER bool "DECnet: router support (EXPERIMENTAL)" depends on DECNET && EXPERIMENTAL + select FIB_RULES ---help--- Add support for turning your DECnet Endnode into a level 1 or 2 router. This is an experimental, but functional option. If you diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5486247735f6..70e027375682 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -130,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 632c5a90b589..88ea7a13bb24 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -1418,8 +1419,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, .dumpit = dn_fib_dump, }, - [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index fa20e2efcfc1..846df3954a63 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 743e9fcf7c5a..5e6f4616ca10 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); + src_map = fl.fld_src; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6986be754ef2..096f1273e714 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -11,259 +11,198 @@ * * * Changes: + * Steve Whitehouse + * Updated for Thomas Graf's generic rules * */ -#include #include -#include -#include #include -#include #include #include -#include #include -#include #include -#include #include #include -#include -#include #include #include #include +#include #include #include #include #include +static struct fib_rules_ops dn_fib_rules_ops; + struct dn_fib_rule { - struct hlist_node r_hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - __le16 r_src; - __le16 r_srcmask; - __le16 r_dst; - __le16 r_dstmask; - __le16 r_srcmap; - u8 r_flags; + struct fib_rule common; + unsigned char dst_len; + unsigned char src_len; + __le16 src; + __le16 srcmask; + __le16 dst; + __le16 dstmask; + __le16 srcmap; + u8 flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; #endif - int r_ifindex; - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; static struct dn_fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7fff, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7fff, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct hlist_head dn_fib_rules; +static LIST_HEAD(dn_fib_rules); + -int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) && -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - - err = -EPERM; - if (r == &default_rule) - break; - - hlist_del_rcu(&r->r_hlist); - r->r_dead = 1; - dn_fib_rule_put(r); - err = 0; - break; - } - } + struct fib_lookup_arg arg = { + .result = res, + }; + int err; + + err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); + res->r = arg.rule; return err; } -static inline void dn_fib_rule_put_rcu(struct rcu_head *head) +int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, + struct fib_lookup_arg *arg) { - struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); - kfree(r); -} + int err = -EAGAIN; + struct dn_fib_table *tbl; -void dn_fib_rule_put(struct dn_fib_rule *r) -{ - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, dn_fib_rule_put_rcu); - else - printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); + switch(rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } + + tbl = dn_fib_get_table(rule->table, 0); + if (tbl == NULL) + goto errout; + + err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; } +static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U16 }, + [FRA_DST] = { .type = NLA_U16 }, + [FRA_FWMARK] = { .type = NLA_U32 }, +}; -int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - if (rtm->rtm_type == RTN_NAT) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct dn_fib_table *tb; - if (rtm->rtm_type == RTN_UNICAST) { - if ((tb = dn_fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = tb->n; - } - } + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + u16 daddr = fl->fld_dst; + u16 saddr = fl->fld_src; + + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; - new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); + if (r->fwmark && (r->fwmark != fl->fld_fwmark)) + return 0; #endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = dev_get_by_name(new_r->r_ifname); - if (dev) { - new_r->r_ifindex = dev->ifindex; - dev_put(dev); - } - } - r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); - if (!new_r->r_preference) { - if (r && r->r_hlist.next != NULL) { - r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; + return 1; +} + +static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) + goto errout; + + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct dn_fib_table *table; + + table = dn_fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } + + rule->table = table->n; } } - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); + if (tb[FRA_SRC]) + r->src = nla_get_u16(tb[FRA_SRC]); - if (last) - hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); - else - hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist); - return 0; -} + if (tb[FRA_DST]) + r->dst = nla_get_u16(tb[FRA_DST]); +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + r->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif + + r->src_len = frh->src_len; + r->srcmask = dnet_make_mask(r->src_len); + r->dst_len = frh->dst_len; + r->dstmask = dnet_make_mask(r->dst_len); + err = 0; +errout: + return err; +} -int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct dn_fib_rule *r, *policy; - struct dn_fib_table *tb; - __le16 saddr = flp->fld_src; - __le16 daddr = flp->fld_dst; - struct hlist_node *node; - int err; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len && (r->src_len != frh->src_len)) + return 0; - rcu_read_lock(); + if (frh->dst_len && (r->dst_len != frh->dst_len)) + return 0; - hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || #ifdef CONFIG_DECNET_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || + if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; #endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - - switch(r->r_action) { - case RTN_UNICAST: - case RTN_NAT: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) - continue; - err = tb->lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } + if (tb[FRA_SRC] && (r->src != nla_get_u32(tb[FRA_SRC]))) + return 0; + + if (tb[FRA_DST] && (r->dst != nla_get_u32(tb[FRA_DST]))) + return 0; - rcu_read_unlock(); - return -ESRCH; + return 1; } unsigned dnet_addr_type(__le16 addr) @@ -284,142 +223,77 @@ unsigned dnet_addr_type(__le16 addr) return ret; } -__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) +static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) { - struct dn_fib_rule *r = res->r; - - if (r->r_action == RTN_NAT) { - int addrtype = dnet_addr_type(r->r_srcmap); - - if (addrtype == RTN_NAT) { - saddr = (saddr&~r->r_srcmask)|r->r_srcmap; - *flags |= RTCF_SNAT; - } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { - saddr = r->r_srcmap; - *flags |= RTCF_MASQ; - } - } - return saddr; -} + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; -static void dn_fib_rules_detach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; + frh->family = AF_DECnet; + frh->dst_len = r->dst_len; + frh->src_len = r->src_len; + frh->tos = 0; - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; - } -} +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (r->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); +#endif + if (r->dst_len) + NLA_PUT_U16(skb, FRA_DST, r->dst); + if (r->src_len) + NLA_PUT_U16(skb, FRA_SRC, r->src); -static void dn_fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; + return 0; - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; - } +nla_put_failure: + return -ENOBUFS; } -static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) +static u32 dn_fib_rule_default_pref(void) { - struct net_device *dev = ptr; - - switch(event) { - case NETDEV_UNREGISTER: - dn_fib_rules_detach(dev); - dn_fib_sync_down(0, dev, 1); - case NETDEV_REGISTER: - dn_fib_rules_attach(dev); - dn_fib_sync_up(dev); + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&dn_fib_rules)) { + pos = dn_fib_rules.next; + if (pos->next != &dn_fib_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } - return NOTIFY_DONE; -} - - -static struct notifier_block dn_fib_rules_notifier = { - .notifier_call = dn_fib_rules_event, -}; - -static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, - struct netlink_callback *cb, unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_DECnet; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); -#endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 2, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 2, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return 0; } int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - int s_idx = cb->args[0]; - struct dn_fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (idx < s_idx) - goto next; - if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) - break; -next: - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; - - return skb->len; + return fib_rules_dump(skb, cb, AF_DECnet); } +static struct fib_rules_ops dn_fib_rules_ops = { + .family = AF_DECnet, + .rule_size = sizeof(struct dn_fib_rule), + .action = dn_fib_rule_action, + .match = dn_fib_rule_match, + .configure = dn_fib_rule_configure, + .compare = dn_fib_rule_compare, + .fill = dn_fib_rule_fill, + .default_pref = dn_fib_rule_default_pref, + .nlgroup = RTNLGRP_DECnet_RULE, + .policy = dn_fib_rule_policy, + .rules_list = &dn_fib_rules, + .owner = THIS_MODULE, +}; + void __init dn_fib_rules_init(void) { - INIT_HLIST_HEAD(&dn_fib_rules); - hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); - register_netdevice_notifier(&dn_fib_rules_notifier); + list_add_tail(&default_rule.common.list, &dn_fib_rules); + fib_rules_register(&dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - unregister_netdevice_notifier(&dn_fib_rules_notifier); + fib_rules_unregister(&dn_fib_rules_ops); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index e926c952e363..2e01b67398c8 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-71-gd317 From 757dbb494be3309fe41ce4c62f8057d8b41d8897 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Aug 2006 20:50:00 -0700 Subject: [NET]: drop unused elements from net_proto_family Three values in net_proto_family are defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 19da2c08d7b6..1bd76327ee2b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -169,11 +169,6 @@ struct proto_ops { struct net_proto_family { int family; int (*create)(struct socket *sock, int protocol); - /* These are counters for the number of different methods of - each we support */ - short authentication; - short encryption; - short encrypt_net; struct module *owner; }; -- cgit v1.2.3-71-gd317 From f0fd27d42e39b91f85e1840ec49b072fd6c545b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Aug 2006 21:03:17 -0700 Subject: [NET]: sock_register interface changes The sock_register() doesn't change the family, so the protocols can define it read-only. No caller ever checks return value from sock_unregister() Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++-- net/socket.c | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 1bd76327ee2b..c257f716e00f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -176,8 +176,8 @@ struct iovec; struct kvec; extern int sock_wake_async(struct socket *sk, int how, int band); -extern int sock_register(struct net_proto_family *fam); -extern int sock_unregister(int family); +extern int sock_register(const struct net_proto_family *fam); +extern void sock_unregister(int family); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, diff --git a/net/socket.c b/net/socket.c index b5a3fcb9ed6d..4147fe4bf41d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -147,7 +147,7 @@ static struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO]; +static const struct net_proto_family *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -2080,7 +2080,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) * socket interface. The value ops->family coresponds to the * socket system call protocol family. */ -int sock_register(struct net_proto_family *ops) +int sock_register(const struct net_proto_family *ops) { int err; @@ -2116,10 +2116,9 @@ int sock_register(struct net_proto_family *ops) * a module then it needs to provide its own protection in * the ops->create routine. */ -int sock_unregister(int family) +void sock_unregister(int family) { - if (family < 0 || family >= NPROTO) - return -EINVAL; + BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); net_families[family] = NULL; @@ -2128,7 +2127,6 @@ int sock_unregister(int family) synchronize_rcu(); printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); - return 0; } static int __init sock_init(void) -- cgit v1.2.3-71-gd317 From 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:09:48 -0700 Subject: [NET]: Introduce RTA_TABLE/FRA_TABLE attributes Introduce RTA_TABLE route attribute and FRA_TABLE routing rule attribute to hold 32 bit routing table IDs. Usespace compatibility is provided by continuing to accept and send the rtm_table field, but because of its limited size it can only carry the low 8 bits of the table ID. This implies that if larger IDs are used, _all_ userspace programs using them need to use RTA_TABLE. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 4 ++++ include/linux/rtnetlink.h | 8 ++++++++ include/net/fib_rules.h | 7 +++++++ net/core/fib_rules.c | 5 +++-- net/decnet/dn_fib.c | 7 ++++--- net/decnet/dn_route.c | 1 + net/decnet/dn_table.c | 1 + net/ipv4/fib_frontend.c | 7 ++++--- net/ipv4/fib_rules.c | 1 + net/ipv4/fib_semantics.c | 1 + net/ipv4/route.c | 1 + net/ipv6/fib6_rules.c | 1 + net/ipv6/route.c | 13 +++++++++---- 13 files changed, 45 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 5e503f0ca6e4..19a82b6c1c1f 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -36,6 +36,10 @@ enum FRA_UNUSED5, FRA_FWMARK, /* netfilter mark (IPv4) */ FRA_FLOW, /* flow/class id */ + FRA_UNUSED6, + FRA_UNUSED7, + FRA_UNUSED8, + FRA_TABLE, /* Extended table id */ __FRA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0aaffa2ae666..ea422a539a03 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -264,6 +264,7 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, + RTA_TABLE, __RTA_MAX }; @@ -717,6 +718,13 @@ extern void __rtnl_unlock(void); } \ } while(0) +static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +{ + return RTA_GET_U32(rta[RTA_TABLE-1]); +rtattr_failure: + return table; +} + #endif /* __KERNEL__ */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 61375d9e53f8..8e2f473d3e82 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -74,6 +74,13 @@ static inline void fib_rule_put(struct fib_rule *rule) call_rcu(&rule->rcu, fib_rule_put_rcu); } +static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) +{ + if (nla[FRA_TABLE]) + return nla_get_u32(nla[FRA_TABLE]); + return frh->table; +} + extern int fib_rules_register(struct fib_rules_ops *); extern int fib_rules_unregister(struct fib_rules_ops *); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 6cdad24038e2..873b04d5df81 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -187,7 +187,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->action = frh->action; rule->flags = frh->flags; - rule->table = frh->table; + rule->table = frh_get_table(frh, tb); if (!rule->pref && ops->default_pref) rule->pref = ops->default_pref(); @@ -245,7 +245,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh->table != rule->table)) + if (frh->table && (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -291,6 +291,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->table = rule->table; + NLA_PUT_U32(skb, FRA_TABLE, rule->table); frh->res1 = 0; frh->res2 = 0; frh->action = rule->action; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 7b3bf5c3d720..fb596373daa8 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -491,7 +491,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) rta[i-1] = (struct rtattr *)RTA_DATA(attr); } } @@ -508,7 +509,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 0); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); if (tb) return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); @@ -524,7 +525,7 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 1); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); if (tb) return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e6f4616ca10..4c963213fba5 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1486,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 0; r->rtm_tos = 0; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; r->rtm_scope = RT_SCOPE_UNIVERSE; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 1601ee5406a8..eca7c1e10c80 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -278,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = 0; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; rtm->rtm_type = type; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 06f4b23f6f57..2696ede52de2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -294,7 +294,8 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) *rta = (struct rtattr*)RTA_DATA(attr); } } @@ -310,7 +311,7 @@ int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet_check_attr(r, rta)) return -EINVAL; - tb = fib_get_table(r->rtm_table); + tb = fib_get_table(rtm_get_table(rta, r->rtm_table)); if (tb) return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); return -ESRCH; @@ -325,7 +326,7 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet_check_attr(r, rta)) return -EINVAL; - tb = fib_new_table(r->rtm_table); + tb = fib_new_table(rtm_get_table(rta, r->rtm_table)); if (tb) return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); return -ENOBUFS; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 58fb91b00fdf..0330b9cc4b58 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -184,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_DST] = { .type = NLA_U32 }, [FRA_FWMARK] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7a112b5a185..ab753df20a39 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -953,6 +953,7 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b873cbcdd0b8..12128b82c9dc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2652,6 +2652,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 22a2fdb09831..2c4fbc855e6c 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -129,6 +129,7 @@ static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e08d84063c1f..843c5509fced 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1859,7 +1859,8 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); + return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), + rtm_get_table(arg, r->rtm_table)); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -1869,7 +1870,8 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); + return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), + rtm_get_table(arg, r->rtm_table)); } struct rt6_rtnl_dump_arg @@ -1887,6 +1889,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rta_cacheinfo ci; + u32 table; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -1902,9 +1905,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; if (rt->rt6i_table) - rtm->rtm_table = rt->rt6i_table->tb6_id; + table = rt->rt6i_table->tb6_id; else - rtm->rtm_table = RT6_TABLE_UNSPEC; + table = RT6_TABLE_UNSPEC; + rtm->rtm_table = table; + RTA_PUT_U32(skb, RTA_TABLE, table); if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) -- cgit v1.2.3-71-gd317 From b801f54917b7c6e8540f877ee562cd0725e62ebd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:12:34 -0700 Subject: [NET]: Increate RT_TABLE_MAX to 2^32 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ea422a539a03..7e4aa48680a7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -239,10 +239,8 @@ enum rt_class_t RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, - __RT_TABLE_MAX + RT_TABLE_MAX=0xFFFFFFFF }; -#define RT_TABLE_MAX (__RT_TABLE_MAX - 1) - /* Routing message attributes */ -- cgit v1.2.3-71-gd317 From 81aa646cc4df3779bcbf9d18cc2c0813ee9b3262 Mon Sep 17 00:00:00 2001 From: Martin Bligh Date: Mon, 14 Aug 2006 23:57:10 -0700 Subject: [IPV4]: add the UdpSndbufErrors and UdpRcvbufErrors MIBs Signed-off-by: Martin Bligh Signed-off-by: Andrew Morton --- include/linux/snmp.h | 2 ++ net/ipv4/proc.c | 2 ++ net/ipv4/udp.c | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 4db25d5c7cd1..30156556f78d 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -155,6 +155,8 @@ enum UDP_MIB_NOPORTS, /* NoPorts */ UDP_MIB_INERRORS, /* InErrors */ UDP_MIB_OUTDATAGRAMS, /* OutDatagrams */ + UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */ + UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ __UDP_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d61e2a9d394d..9c6cbe3d9fb8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), + SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), + SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 87152510980c..514c1e9ae810 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -662,6 +662,16 @@ out: UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + } return err; do_confirm: @@ -981,6 +991,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); + int rc; /* * Charge it to the socket, dropping if the queue is full. @@ -1027,7 +1038,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (sock_queue_rcv_skb(sk,skb)<0) { + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; -- cgit v1.2.3-71-gd317 From 2942e90050569525628a9f34e0daaa9b661b49cc Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:30:25 -0700 Subject: [RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/core/rtnetlink.c | 10 +++++++--- net/decnet/dn_route.c | 4 +--- net/ipv4/ipmr.c | 7 ++++--- net/ipv4/route.c | 7 +++---- net/ipv6/addrconf.c | 4 +--- net/ipv6/route.c | 4 +--- net/sched/act_api.c | 7 ++----- 8 files changed, 20 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7e4aa48680a7..0e4f478e2cb5 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -584,6 +584,7 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); +extern int rtnl_unicast(struct sk_buff *skb, u32 pid); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a1b783a6afc6..e02fa6a33f42 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -166,6 +166,11 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) return err; } +int rtnl_unicast(struct sk_buff *skb, u32 pid) +{ + return nlmsg_unicast(rtnl, skb, pid); +} + int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { struct rtattr *mx = (struct rtattr*)skb->tail; @@ -574,9 +579,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) goto errout; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; + err = rtnl_unicast(skb, NETLINK_CB(skb).pid); errout: kfree(iw_buf); dev_put(dev); @@ -825,3 +828,4 @@ EXPORT_SYMBOL(rtnl); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); +EXPORT_SYMBOL(rtnl_unicast); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 4c963213fba5..c5daf3557c1f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1611,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - - return err; + return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out_free: kfree_skb(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 85893eef6b16..98f0aa0d4216 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e = NLMSG_DATA(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (skb->nh.iph->version == 0) { - int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { @@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } - err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12128b82c9dc..b8f6cadc5b3a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2809,10 +2809,9 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; -out: return err; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +out: + return err; out_free: kfree_skb(skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ba1e811ba50..4f991a2234d0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3268,9 +3268,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out: in6_ifa_put(ifa); return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9ce28277f47f..024c8e26c2ec 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2044,9 +2044,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out: return err; out_free: diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a2587b52e531..6990747d6d5a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -459,7 +459,6 @@ static int act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; - int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -468,10 +467,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) kfree_skb(skb); return -EINVAL; } - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - if (err > 0) - err = 0; - return err; + + return rtnl_unicast(skb, pid); } static struct tc_action * -- cgit v1.2.3-71-gd317 From 97676b6b5538b3e059d33b8338e7d5cc41c5f1f1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:31:41 -0700 Subject: [RTNETLINK]: Add rtnetlink notification interface Adds rtnl_notify() to send rtnetlink notification messages and rtnl_set_sk_err() to report notification errors as socket errors in order to indicate the need of a resync due to loss of events. nlmsg_report() is added to properly document the meaning of NLM_F_ECHO. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +++ include/net/netlink.h | 17 +++++++++++++++++ net/core/rtnetlink.c | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e4f478e2cb5..ecbe0349060c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -585,6 +585,9 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags); +extern void rtnl_set_sk_err(u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); diff --git a/include/net/netlink.h b/include/net/netlink.h index b154b81d9a7a..bf593eb59e1b 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -65,6 +65,9 @@ * nlmsg_validate() validate netlink message incl. attrs * nlmsg_for_each_attr() loop over all attributes * + * Misc: + * nlmsg_report() report back to application? + * * ------------------------------------------------------------------------ * Attributes Interface * ------------------------------------------------------------------------ @@ -194,6 +197,9 @@ extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, struct nlmsghdr *, int *)); extern void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); +extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, int report, + gfp_t flags); extern int nla_validate(struct nlattr *head, int len, int maxtype, struct nla_policy *policy); @@ -375,6 +381,17 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, nlmsg_attrlen(nlh, hdrlen), maxtype, policy); } +/** + * nlmsg_report - need to report back to application? + * @nlh: netlink message header + * + * Returns 1 if a report back to the application is requested. + */ +static inline int nlmsg_report(struct nlmsghdr *nlh) +{ + return !!(nlh->nlmsg_flags & NLM_F_ECHO); +} + /** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e02fa6a33f42..2b1af17e6389 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -171,6 +171,22 @@ int rtnl_unicast(struct sk_buff *skb, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } +int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) +{ + int report = 0; + + if (nlh) + report = nlmsg_report(nlh); + + return nlmsg_notify(rtnl, skb, pid, group, report, flags); +} + +void rtnl_set_sk_err(u32 group, int error) +{ + netlink_set_err(rtnl, 0, group, error); +} + int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { struct rtattr *mx = (struct rtattr*)skb->tail; @@ -829,3 +845,5 @@ EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); +EXPORT_SYMBOL(rtnl_notify); +EXPORT_SYMBOL(rtnl_set_sk_err); -- cgit v1.2.3-71-gd317 From 56fc85ac961e2c20dcb5ef07e2628b3f93de2e49 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:37:29 -0700 Subject: [RTNETLINK]: Unexport rtnl socket Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 -- net/core/rtnetlink.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ecbe0349060c..9c92dc8b9a08 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -574,8 +574,6 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) -extern struct sock *rtnl; - struct rtnetlink_link { int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5300b5dd0fd..dfc58269240a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -58,6 +58,7 @@ #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ static DEFINE_MUTEX(rtnl_mutex); +static struct sock *rtnl; void rtnl_lock(void) { @@ -95,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } -struct sock *rtnl; - struct rtnetlink_link * rtnetlink_links[NPROTO]; static const int rtm_min[RTM_NR_FAMILIES] = @@ -842,7 +841,6 @@ EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); EXPORT_SYMBOL(rtnetlink_links); EXPORT_SYMBOL(rtnetlink_put_metrics); -EXPORT_SYMBOL(rtnl); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); -- cgit v1.2.3-71-gd317 From ac0b04627269ff16c3c7ab854a65fe6780c6e3e5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 00:15:33 -0700 Subject: [SCTP]: Extend /proc/net/sctp/snmp to provide more statistics. This patch adds more statistics info under /proc/net/sctp/snmp that should be useful for debugging. The additional events that are counted now include timer expirations, retransmits, packet and data chunk discards. The Data chunk discards include all the cases where a data chunk is discarded including high tsn, bad stream, dup tsn and the most useful one(out of receive buffer/rwnd). Also moved the SCTP MIB data structures from the generic include directories to include/sctp/sctp.h. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/snmp.h | 33 --------------------------------- include/net/sctp/sctp.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/net/snmp.h | 6 ------ net/sctp/input.c | 8 ++++++-- net/sctp/inqueue.c | 4 ++-- net/sctp/outqueue.c | 6 +++++- net/sctp/proc.c | 17 ++++++++++++++++- net/sctp/sm_statefuns.c | 15 +++++++++++++++ 8 files changed, 88 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 30156556f78d..854aa6b543f1 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -160,39 +160,6 @@ enum __UDP_MIB_MAX }; -/* sctp mib definitions */ -/* - * draft-ietf-sigtran-sctp-mib-07.txt - */ -enum -{ - SCTP_MIB_NUM = 0, - SCTP_MIB_CURRESTAB, /* CurrEstab */ - SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ - SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ - SCTP_MIB_ABORTEDS, /* Aborteds */ - SCTP_MIB_SHUTDOWNS, /* Shutdowns */ - SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ - SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ - SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ - SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ - SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ - SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ - SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ - SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ - SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ - SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ - SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ - SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ - SCTP_MIB_RTOALGORITHM, /* RtoAlgorithm */ - SCTP_MIB_RTOMIN, /* RtoMin */ - SCTP_MIB_RTOMAX, /* RtoMax */ - SCTP_MIB_RTOINITIAL, /* RtoInitial */ - SCTP_MIB_VALCOOKIELIFE, /* ValCookieLife */ - SCTP_MIB_MAXINITRETR, /* MaxInitRetr */ - __SCTP_MIB_MAX -}; - /* linux mib definitions */ enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1c1abce5f6b6..e274fd479990 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -216,6 +216,50 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); #endif /* !TEST_FRAME */ +/* sctp mib definitions */ +enum +{ + SCTP_MIB_NUM = 0, + SCTP_MIB_CURRESTAB, /* CurrEstab */ + SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ + SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ + SCTP_MIB_ABORTEDS, /* Aborteds */ + SCTP_MIB_SHUTDOWNS, /* Shutdowns */ + SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ + SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ + SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ + SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ + SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ + SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ + SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ + SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ + SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ + SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ + SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ + SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ + SCTP_MIB_T1_INIT_EXPIREDS, + SCTP_MIB_T1_COOKIE_EXPIREDS, + SCTP_MIB_T2_SHUTDOWN_EXPIREDS, + SCTP_MIB_T3_RTX_EXPIREDS, + SCTP_MIB_T4_RTO_EXPIREDS, + SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS, + SCTP_MIB_DELAY_SACK_EXPIREDS, + SCTP_MIB_AUTOCLOSE_EXPIREDS, + SCTP_MIB_T3_RETRANSMITS, + SCTP_MIB_PMTUD_RETRANSMITS, + SCTP_MIB_FAST_RETRANSMITS, + SCTP_MIB_IN_PKT_SOFTIRQ, + SCTP_MIB_IN_PKT_BACKLOG, + SCTP_MIB_IN_PKT_DISCARDS, + SCTP_MIB_IN_DATA_CHUNK_DISCARDS, + __SCTP_MIB_MAX +}; + +#define SCTP_MIB_MAX __SCTP_MIB_MAX +struct sctp_mib { + unsigned long mibs[SCTP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + /* Print debugging messages. */ #if SCTP_DEBUG diff --git a/include/net/snmp.h b/include/net/snmp.h index a36bed8ea210..464970e39ec0 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -100,12 +100,6 @@ struct udp_mib { unsigned long mibs[UDP_MIB_MAX]; } __SNMP_MIB_ALIGN__; -/* SCTP */ -#define SCTP_MIB_MAX __SCTP_MIB_MAX -struct sctp_mib { - unsigned long mibs[SCTP_MIB_MAX]; -} __SNMP_MIB_ALIGN__; - /* Linux */ #define LINUX_MIB_MAX __LINUX_MIB_MAX struct linux_mib { diff --git a/net/sctp/input.c b/net/sctp/input.c index 42b66e74bbb5..8a34d95602ce 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -255,10 +255,13 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk)) { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); sctp_add_backlog(sk, skb); - else + } else { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); + } sctp_bh_unlock_sock(sk); @@ -271,6 +274,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index cf0c767d43ae..cf6deed7e849 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue) /* Put a new packet in an SCTP inqueue. * We assume that packet->sctp_hdr is set and in host byte order. */ -void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) +void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) { /* Directly call the packet handling routine. */ @@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ - list_add_tail(&packet->list, &q->in_chunk_list); + list_add_tail(&chunk->list, &q->in_chunk_list); q->immediate.func(q->immediate.data); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 30b710c54e64..37074a39ecbb 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -467,6 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, switch(reason) { case SCTP_RTXR_T3_RTX: + SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -475,12 +476,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_assoc_update_retran_path(transport->asoc); break; case SCTP_RTXR_FAST_RTX: + SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); fast_retransmit = 1; break; case SCTP_RTXR_PMTUD: - default: + SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); break; + default: + BUG(); } sctp_retransmit_mark(q, transport, fast_retransmit); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5b3b0e0ae7e5..a356d8d310a9 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), + SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), + SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), + SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), + SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), + SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), + SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), + SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), + SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), + SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), + SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), + SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), + SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), + SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), + SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), + SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), SNMP_MIB_SENTINEL }; @@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, - (sk->sk_rcvbuf - assoc->rwnd), assoc->sndbuf_used, + (sk->sk_rcvbuf - assoc->rwnd), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5b5ae7958322..32f57f42af9e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2663,9 +2663,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3652,6 +3654,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4548,6 +4551,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; + SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4616,6 +4621,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -4650,6 +4656,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -4709,6 +4716,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -4753,6 +4761,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4814,6 +4824,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; + SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in * RFC2960 [5] section 8.1 and 8.2. @@ -4880,6 +4892,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -4910,6 +4923,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; + SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper * layer, the endpoint enters SHUTDOWN-PENDING state and -- cgit v1.2.3-71-gd317 From 9ba1627617d396135a4d679542a3623d5819e628 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 22 Aug 2006 00:29:37 -0700 Subject: [NETFILTER]: x_tables: replace IPv4 dscp match by address family independent version This replaces IPv4 dscp match by address family independent version. This also - utilizes dsfield.h to get the DS field in IPv4/IPv6 header, and - checks for the DSCP value from user space. - fixes Kconfig help text. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_dscp.h | 23 +++++++ include/linux/netfilter_ipv4/ipt_dscp.h | 14 ++-- net/ipv4/netfilter/Kconfig | 11 ---- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_dscp.c | 54 --------------- net/netfilter/Kconfig | 11 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_dscp.c | 113 ++++++++++++++++++++++++++++++++ 8 files changed, 154 insertions(+), 74 deletions(-) create mode 100644 include/linux/netfilter/xt_dscp.h delete mode 100644 net/ipv4/netfilter/ipt_dscp.c create mode 100644 net/netfilter/xt_dscp.c (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_dscp.h b/include/linux/netfilter/xt_dscp.h new file mode 100644 index 000000000000..1da61e6acaf7 --- /dev/null +++ b/include/linux/netfilter/xt_dscp.h @@ -0,0 +1,23 @@ +/* x_tables module for matching the IPv4/IPv6 DSCP field + * + * (C) 2002 Harald Welte + * This software is distributed under GNU GPL v2, 1991 + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp +*/ +#ifndef _XT_DSCP_H +#define _XT_DSCP_H + +#define XT_DSCP_MASK 0xfc /* 11111100 */ +#define XT_DSCP_SHIFT 2 +#define XT_DSCP_MAX 0x3f /* 00111111 */ + +/* match info */ +struct xt_dscp_info { + u_int8_t dscp; + u_int8_t invert; +}; + +#endif /* _XT_DSCP_H */ diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h index 2fa6dfe92894..4b82ca912b0e 100644 --- a/include/linux/netfilter_ipv4/ipt_dscp.h +++ b/include/linux/netfilter_ipv4/ipt_dscp.h @@ -10,14 +10,12 @@ #ifndef _IPT_DSCP_H #define _IPT_DSCP_H -#define IPT_DSCP_MASK 0xfc /* 11111100 */ -#define IPT_DSCP_SHIFT 2 -#define IPT_DSCP_MAX 0x3f /* 00111111 */ +#include -/* match info */ -struct ipt_dscp_info { - u_int8_t dscp; - u_int8_t invert; -}; +#define IPT_DSCP_MASK XT_DSCP_MASK +#define IPT_DSCP_SHIFT XT_DSCP_SHIFT +#define IPT_DSCP_MAX XT_DSCP_MAX + +#define ipt_dscp_info xt_dscp_info #endif /* _IPT_DSCP_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ef0b5aac5838..d88d71d1ce0d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_DSCP - tristate "DSCP match support" - depends on IP_NF_IPTABLES - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_AH tristate "AH match support" depends on IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3ded4a3af59c..b946b0f3ea9d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c deleted file mode 100644 index 47177591aeb6..000000000000 --- a/net/ipv4/netfilter/ipt_dscp.c +++ /dev/null @@ -1,54 +0,0 @@ -/* IP tables module for matching the value of the IPv4 DSCP field - * - * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp - * - * (C) 2002 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("iptables DSCP matching module"); -MODULE_LICENSE("GPL"); - -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_dscp_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - - u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; -} - -static struct ipt_match dscp_match = { - .name = "dscp", - .match = match, - .matchsize = sizeof(struct ipt_dscp_info), - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_match(&dscp_match); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_match(&dscp_match); - -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a9894ddfd72a..f781405f5d65 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -263,6 +263,17 @@ config NETFILTER_XT_MATCH_DCCP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_DSCP + tristate '"DSCP" match support' + depends on NETFILTER_XTABLES + help + This option adds a `DSCP' match, which allows you to match against + the IPv4/IPv6 header DSCP field (differentiated services codepoint). + + The DSCP field can have any value between 0x0 and 0x3f inclusive. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_ESP tristate '"ESP" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6fa4b7580458..0b8a70c1df46 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c new file mode 100644 index 000000000000..82e250d1f007 --- /dev/null +++ b/net/netfilter/xt_dscp.c @@ -0,0 +1,113 @@ +/* IP tables module for matching the value of the IPv4/IPv6 DSCP field + * + * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp + * + * (C) 2002 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("x_tables DSCP matching module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_dscp"); +MODULE_ALIAS("ip6t_dscp"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static int match6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static int checkentry(const char *tablename, + const void *info, + const struct xt_match *match, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; + + if (dscp > XT_DSCP_MAX) { + printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); + return 0; + } + + return 1; +} + +static struct xt_match dscp_match = { + .name = "dscp", + .match = match, + .checkentry = checkentry, + .matchsize = sizeof(struct xt_dscp_info), + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match dscp6_match = { + .name = "dscp", + .match = match6, + .checkentry = checkentry, + .matchsize = sizeof(struct xt_dscp_info), + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static int __init xt_dscp_match_init(void) +{ + int ret; + ret = xt_register_match(&dscp_match); + if (ret) + return ret; + + ret = xt_register_match(&dscp6_match); + if (ret) + xt_unregister_match(&dscp_match); + + return ret; +} + +static void __exit xt_dscp_match_fini(void) +{ + xt_unregister_match(&dscp_match); + xt_unregister_match(&dscp6_match); +} + +module_init(xt_dscp_match_init); +module_exit(xt_dscp_match_fini); -- cgit v1.2.3-71-gd317 From a468701db58a8b3e08e3f55fa6ac66db42014922 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 22 Aug 2006 00:30:26 -0700 Subject: [NETFILTER]: x_tables: replace IPv4 DSCP target by address family independent version This replaces IPv4 DSCP target by address family independent version. This also - utilizes dsfield.h to get/mangle DS field in IPv4/IPv6 header - fixes Kconfig help text. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_DSCP.h | 20 +++++ include/linux/netfilter_ipv4/ipt_DSCP.h | 6 +- net/ipv4/netfilter/Kconfig | 11 --- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_DSCP.c | 96 ----------------------- net/netfilter/Kconfig | 12 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_DSCP.c | 130 ++++++++++++++++++++++++++++++++ 8 files changed, 165 insertions(+), 112 deletions(-) create mode 100644 include/linux/netfilter/xt_DSCP.h delete mode 100644 net/ipv4/netfilter/ipt_DSCP.c create mode 100644 net/netfilter/xt_DSCP.c (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_DSCP.h b/include/linux/netfilter/xt_DSCP.h new file mode 100644 index 000000000000..3c7c963997bd --- /dev/null +++ b/include/linux/netfilter/xt_DSCP.h @@ -0,0 +1,20 @@ +/* x_tables module for setting the IPv4/IPv6 DSCP field + * + * (C) 2002 Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * This software is distributed under GNU GPL v2, 1991 + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp +*/ +#ifndef _XT_DSCP_TARGET_H +#define _XT_DSCP_TARGET_H +#include + +/* target info */ +struct xt_DSCP_info { + u_int8_t dscp; +}; + +#endif /* _XT_DSCP_TARGET_H */ diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h index b30f510b5bef..3491e524d5ea 100644 --- a/include/linux/netfilter_ipv4/ipt_DSCP.h +++ b/include/linux/netfilter_ipv4/ipt_DSCP.h @@ -11,10 +11,8 @@ #ifndef _IPT_DSCP_TARGET_H #define _IPT_DSCP_TARGET_H #include +#include -/* target info */ -struct ipt_DSCP_info { - u_int8_t dscp; -}; +#define ipt_DSCP_info xt_DSCP_info #endif /* _IPT_DSCP_TARGET_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d88d71d1ce0d..a55b8ff70ded 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -557,17 +557,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_DSCP - tristate "DSCP target support" - depends on IP_NF_MANGLE - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_TTL tristate 'TTL target support' depends on IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index b946b0f3ea9d..09aaed1a8063 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -67,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c deleted file mode 100644 index c8e971288dfe..000000000000 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ /dev/null @@ -1,96 +0,0 @@ -/* iptables module for setting the IPv4 DSCP field, Version 1.8 - * - * (C) 2002 by Harald Welte - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp -*/ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("iptables DSCP modification module"); -MODULE_LICENSE("GPL"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo, - void *userinfo) -{ - const struct ipt_DSCP_info *dinfo = targinfo; - u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - - if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) - | sh_dscp; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^ 0xFFFF)); - } - return IPT_CONTINUE; -} - -static int -checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; - - if ((dscp > IPT_DSCP_MAX)) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; - } - return 1; -} - -static struct ipt_target ipt_dscp_reg = { - .name = "DSCP", - .target = target, - .targetsize = sizeof(struct ipt_DSCP_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_target(&ipt_dscp_reg); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_target(&ipt_dscp_reg); -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f781405f5d65..0a28d2c5c44f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK . The module will be called ipt_CONNMARK.o. If unsure, say `N'. +config NETFILTER_XT_TARGET_DSCP + tristate '"DSCP" target support' + depends on NETFILTER_XTABLES + depends on IP_NF_MANGLE || IP6_NF_MANGLE + help + This option adds a `DSCP' target, which allows you to manipulate + the IPv4/IPv6 header DSCP field (differentiated services codepoint). + + The DSCP field can have any value between 0x0 and 0x3f inclusive. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0b8a70c1df46..a74be492fd0a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c new file mode 100644 index 000000000000..79df8165cd79 --- /dev/null +++ b/net/netfilter/xt_DSCP.c @@ -0,0 +1,130 @@ +/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 + * + * (C) 2002 by Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp +*/ + +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("x_tables DSCP modification module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_DSCP"); +MODULE_ALIAS("ip6t_DSCP"); + +static unsigned int target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, + void *userinfo) +{ + const struct xt_DSCP_info *dinfo = targinfo; + u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; + + if (dscp != dinfo->dscp) { + if (!skb_make_writable(pskb, sizeof(struct iphdr))) + return NF_DROP; + + ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + + } + return XT_CONTINUE; +} + +static unsigned int target6(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, + void *userinfo) +{ + const struct xt_DSCP_info *dinfo = targinfo; + u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; + + if (dscp != dinfo->dscp) { + if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) + return NF_DROP; + + ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + } + return XT_CONTINUE; +} + +static int checkentry(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; + + if ((dscp > XT_DSCP_MAX)) { + printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); + return 0; + } + return 1; +} + +static struct xt_target xt_dscp_reg = { + .name = "DSCP", + .target = target, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_target xt_dscp6_reg = { + .name = "DSCP", + .target = target6, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static int __init xt_dscp_target_init(void) +{ + int ret; + ret = xt_register_target(&xt_dscp_reg); + if (ret) + return ret; + + ret = xt_register_target(&xt_dscp6_reg); + if (ret) + xt_unregister_target(&xt_dscp_reg); + + return ret; +} + +static void __exit xt_dscp_target_fini(void) +{ + xt_unregister_target(&xt_dscp_reg); + xt_unregister_target(&xt_dscp6_reg); +} + +module_init(xt_dscp_target_init); +module_exit(xt_dscp_target_fini); -- cgit v1.2.3-71-gd317 From 2521c12cf1a29f6c380b13ca32a38175f6beed08 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2006 00:31:24 -0700 Subject: [NETFILTER]: conntrack: introduce connection mark event This patch introduces the mark event. ctnetlink can use this to know if the mark needs to be dumped. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ net/netfilter/xt_CONNMARK.c | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index d2e4bd7a7a14..9e0dae07861e 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -125,6 +125,10 @@ enum ip_conntrack_events /* Counter highest bit has been set */ IPCT_COUNTER_FILLING_BIT = 11, IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), + + /* Mark is set */ + IPCT_MARK_BIT = 12, + IPCT_MARK = (1 << IPCT_MARK_BIT), }; enum ip_conntrack_expect_events { diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 60c375d36f01..784482b74e58 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -52,13 +52,25 @@ target(struct sk_buff **pskb, switch(markinfo->mode) { case XT_CONNMARK_SET: newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; - if (newmark != *ctmark) + if (newmark != *ctmark) { *ctmark = newmark; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_event_cache(IPCT_MARK, *pskb); +#else + nf_conntrack_event_cache(IPCT_MARK, *pskb); +#endif + } break; case XT_CONNMARK_SAVE: newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); - if (*ctmark != newmark) + if (*ctmark != newmark) { *ctmark = newmark; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_event_cache(IPCT_MARK, *pskb); +#else + nf_conntrack_event_cache(IPCT_MARK, *pskb); +#endif + } break; case XT_CONNMARK_RESTORE: nfmark = (*pskb)->nfmark; -- cgit v1.2.3-71-gd317 From 52d9c42ef2563d2c420eb23b96bf5a4cae9e167b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:33:45 -0700 Subject: [NETFILTER]: x_tables: add helpers for mass match/target registration Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 5 ++++ net/netfilter/x_tables.c | 60 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 48cc32d83f77..9a9912430e3a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -290,8 +290,13 @@ struct xt_table_info extern int xt_register_target(struct xt_target *target); extern void xt_unregister_target(struct xt_target *target); +extern int xt_register_targets(struct xt_target *target, unsigned int n); +extern void xt_unregister_targets(struct xt_target *target, unsigned int n); + extern int xt_register_match(struct xt_match *target); extern void xt_unregister_match(struct xt_match *target); +extern int xt_register_matches(struct xt_match *match, unsigned int n); +extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 174e8f970095..8037ba63d587 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -86,6 +86,36 @@ xt_unregister_target(struct xt_target *target) } EXPORT_SYMBOL(xt_unregister_target); +int +xt_register_targets(struct xt_target *target, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = xt_register_target(&target[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + xt_unregister_targets(target, i); + return err; +} +EXPORT_SYMBOL(xt_register_targets); + +void +xt_unregister_targets(struct xt_target *target, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + xt_unregister_target(&target[i]); +} +EXPORT_SYMBOL(xt_unregister_targets); + int xt_register_match(struct xt_match *match) { @@ -113,6 +143,36 @@ xt_unregister_match(struct xt_match *match) } EXPORT_SYMBOL(xt_unregister_match); +int +xt_register_matches(struct xt_match *match, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = xt_register_match(&match[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + xt_unregister_matches(match, i); + return err; +} +EXPORT_SYMBOL(xt_register_matches); + +void +xt_unregister_matches(struct xt_match *match, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + xt_unregister_match(&match[i]); +} +EXPORT_SYMBOL(xt_unregister_matches); + /* * These are weird, but module loading must not be done with mutex -- cgit v1.2.3-71-gd317 From fe1cb10873b44cf89082465823ee6d4d4ac63ad7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:35:47 -0700 Subject: [NETFILTER]: x_tables: remove unused argument to target functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 3 +-- include/linux/netfilter_arp/arp_tables.h | 3 +-- include/linux/netfilter_ipv4/ip_tables.h | 3 +-- include/linux/netfilter_ipv6/ip6_tables.h | 3 +-- net/ipv4/netfilter/arp_tables.c | 9 +++------ net/ipv4/netfilter/arpt_mangle.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 2 +- net/ipv4/netfilter/ip_nat_rule.c | 8 +++----- net/ipv4/netfilter/ip_tables.c | 9 +++------ net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 +-- net/ipv4/netfilter/ipt_ECN.c | 3 +-- net/ipv4/netfilter/ipt_LOG.c | 3 +-- net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +-- net/ipv4/netfilter/ipt_NETMAP.c | 3 +-- net/ipv4/netfilter/ipt_REDIRECT.c | 3 +-- net/ipv4/netfilter/ipt_REJECT.c | 3 +-- net/ipv4/netfilter/ipt_SAME.c | 3 +-- net/ipv4/netfilter/ipt_TCPMSS.c | 3 +-- net/ipv4/netfilter/ipt_TOS.c | 3 +-- net/ipv4/netfilter/ipt_TTL.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 4 ++-- net/ipv4/netfilter/iptable_mangle.c | 4 ++-- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 9 +++------ net/ipv6/netfilter/ip6t_HL.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 3 +-- net/ipv6/netfilter/ip6t_REJECT.c | 3 +-- net/ipv6/netfilter/ip6table_filter.c | 4 ++-- net/ipv6/netfilter/ip6table_mangle.c | 4 ++-- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/netfilter/xt_CLASSIFY.c | 3 +-- net/netfilter/xt_CONNMARK.c | 3 +-- net/netfilter/xt_CONNSECMARK.c | 2 +- net/netfilter/xt_DSCP.c | 6 ++---- net/netfilter/xt_MARK.c | 6 ++---- net/netfilter/xt_NFQUEUE.c | 3 +-- net/netfilter/xt_NOTRACK.c | 3 +-- net/netfilter/xt_SECMARK.c | 2 +- net/netfilter/xt_connbytes.c | 2 +- net/sched/act_ipt.c | 2 +- 41 files changed, 55 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9a9912430e3a..9cef0e91542b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -211,8 +211,7 @@ struct xt_target const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userdata); + const void *targinfo); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 62cc27daca4e..149e87c9ab13 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -248,8 +248,7 @@ extern unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata); + struct arpt_table *table); #define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1)) #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index c0dac16e1902..a536bbdef145 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -312,8 +312,7 @@ extern unsigned int ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata); + struct ipt_table *table); #define IPT_ALIGN(s) XT_ALIGN(s) diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index d0d5d1ee4be3..d7a8e9c0dad0 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -300,8 +300,7 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ip6t_table *table, - void *userdata); + struct ip6t_table *table); /* Check for an extension */ extern int ip6t_ext_hdr(u8 nexthdr); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d1d7a6e72a5..c6bd270bf46a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -208,8 +208,7 @@ static unsigned int arpt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("arp_tables: error: '%s'\n", (char *)targinfo); @@ -226,8 +225,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata) + struct arpt_table *table) { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; @@ -302,8 +300,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); /* Target might have changed stuff. */ arp = (*pskb)->nh.arph; diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a58325c1ceb9..05fb2421bb26 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -11,7 +11,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { const struct arpt_mangle *mangle = targinfo; struct arphdr *arp; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index d7c472faa53b..7edea2a1696c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return arpt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aba926c1cb0..1aa0e4f462a5 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -104,8 +104,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -147,8 +146,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -255,7 +253,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 048514f15f2f..8ce5b6f76447 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip_tables: error: `%s'\n", (char *)targinfo); @@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata) + struct ipt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d994c5f5744c..a08383cf9e7a 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -302,8 +302,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 7e30e6d2b5da..1c3da4a48e5f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -85,8 +85,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_ECN_info *einfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b98f7b08b084..a8d356c6191f 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_log_info *loginfo = targinfo; struct nf_loginfo li; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index ebd94f2abf0d..9659793c66c0 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -64,8 +64,7 @@ masquerade_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 736c4b5a86a7..fd5e74a19fb5 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -55,8 +55,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index f290463232de..839fe99f71d4 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -58,8 +58,7 @@ redirect_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 95c6662b663c..1dfd8e56be8b 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -228,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_reject_info *reject = targinfo; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 7169b09b5a67..cf801749490f 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -133,8 +133,7 @@ same_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 0fce85e05507..6d668dcfc22a 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -41,8 +41,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; struct tcphdr *tcph; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 52e9d705d48e..043df0137084 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -26,8 +26,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; struct iphdr *iph = (*pskb)->nh.iph; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 2afb2a8aa8c5..164007107b5e 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -23,7 +23,7 @@ static unsigned int ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct iphdr *iph; const struct ipt_TTL_info *info = targinfo; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d46fd677fa11..4c5f0a117862 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7f417484bfbf..e2e7dd8d7903 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -90,7 +90,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4e7998beda63..79336cb42527 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook, daddr = (*pskb)->nh.iph->daddr; tos = (*pskb)->nh.iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE && ((*pskb)->nh.iph->saddr != saddr diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7912cce1e1b8..bcbeb4aeacd9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -95,7 +95,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c9d6b23cd3f7..38cd7ffda9a0 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -220,8 +220,7 @@ ip6t_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip6_tables: error: `%s'\n", (char *)targinfo); @@ -258,8 +257,7 @@ ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct xt_table *table, - void *userdata) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; @@ -349,8 +347,7 @@ ip6t_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ip6t_entry *)table_base)->comefrom diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index b8eff8ee69b1..c85d124f9a3d 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -22,7 +22,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 73c6300109d6..acb91733e1fd 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ip6t_log_info *loginfo = targinfo; struct nf_loginfo li; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 7929ff402166..343acd3cbf5e 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -180,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ip6t_reject_info *reject = targinfo; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 60976c0c58e8..2fc07c74decf 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 03a13eab1dae..32db04fd8310 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -174,7 +174,7 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 61a7c58e99f8..b4154da575c0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 1f92edd05933..50de965bb104 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -29,8 +29,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_classify_target_info *clinfo = targinfo; diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index e577356b5c71..c2125f6ee128 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -38,8 +38,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_connmark_target_info *markinfo = targinfo; u_int32_t diff; diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 48f7fc3c85cd..4b9cc65bb82b 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb) static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index a1cd9723644f..9d23c9580d80 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -32,8 +32,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; @@ -54,8 +53,7 @@ static unsigned int target6(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 0a6127219467..95a171c87994 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_mark_target_info *markinfo = targinfo; @@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_mark_target_info_v1 *markinfo = targinfo; int mark = 0; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 7b982283abdb..db9b896e57c8 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -29,8 +29,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_NFQ_info *tinfo = targinfo; diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index cab881d4424c..6d00dcaed238 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -16,8 +16,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { /* Previously seen (loopback)? Ignore. */ if ((*pskb)->nfct != NULL) diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 4300988786c9..8a04dcf2611e 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -31,7 +31,7 @@ static u8 mode; static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { u32 secmark = 0; const struct xt_secmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 2d49948d3c38..d725e8b84503 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -143,7 +143,7 @@ static int check(const char *tablename, return 1; } -static struct xt_match xt_connbytes_match = { +static struct xt_match xt_connbytes_match[] = { { .name = "connbytes", .family = AF_INET, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 224c078a398e..45a3143b8629 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -222,7 +222,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, ipt->tcfi_hook, ipt->tcfi_t->u.kernel.target, - ipt->tcfi_t->data, NULL); + ipt->tcfi_t->data); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; -- cgit v1.2.3-71-gd317 From efa741656e9ebf5fd6e0432b0d1b3c7f156392d3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:36:37 -0700 Subject: [NETFILTER]: x_tables: remove unused size argument to check/destroy functions The size is verified by x_tables and isn't needed by the modules anymore. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 8 ++------ net/ipv4/netfilter/arp_tables.c | 5 +---- net/ipv4/netfilter/arpt_mangle.c | 2 +- net/ipv4/netfilter/ip_nat_rule.c | 2 -- net/ipv4/netfilter/ip_tables.c | 14 +++----------- net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 +--- net/ipv4/netfilter/ipt_ECN.c | 1 - net/ipv4/netfilter/ipt_LOG.c | 1 - net/ipv4/netfilter/ipt_MASQUERADE.c | 1 - net/ipv4/netfilter/ipt_NETMAP.c | 1 - net/ipv4/netfilter/ipt_REDIRECT.c | 1 - net/ipv4/netfilter/ipt_REJECT.c | 1 - net/ipv4/netfilter/ipt_SAME.c | 4 +--- net/ipv4/netfilter/ipt_TCPMSS.c | 1 - net/ipv4/netfilter/ipt_TOS.c | 1 - net/ipv4/netfilter/ipt_TTL.c | 1 - net/ipv4/netfilter/ipt_ULOG.c | 1 - net/ipv4/netfilter/ipt_ah.c | 1 - net/ipv4/netfilter/ipt_ecn.c | 3 +-- net/ipv4/netfilter/ipt_hashlimit.c | 4 +--- net/ipv4/netfilter/ipt_owner.c | 1 - net/ipv4/netfilter/ipt_recent.c | 5 ++--- net/ipv6/netfilter/ip6_tables.c | 10 ++-------- net/ipv6/netfilter/ip6t_HL.c | 1 - net/ipv6/netfilter/ip6t_LOG.c | 1 - net/ipv6/netfilter/ip6t_REJECT.c | 1 - net/ipv6/netfilter/ip6t_ah.c | 1 - net/ipv6/netfilter/ip6t_dst.c | 1 - net/ipv6/netfilter/ip6t_frag.c | 1 - net/ipv6/netfilter/ip6t_hbh.c | 1 - net/ipv6/netfilter/ip6t_ipv6header.c | 1 - net/ipv6/netfilter/ip6t_owner.c | 1 - net/ipv6/netfilter/ip6t_rt.c | 1 - net/netfilter/xt_CONNMARK.c | 1 - net/netfilter/xt_CONNSECMARK.c | 2 +- net/netfilter/xt_DSCP.c | 1 - net/netfilter/xt_MARK.c | 2 -- net/netfilter/xt_SECMARK.c | 2 +- net/netfilter/xt_connbytes.c | 1 - net/netfilter/xt_connmark.c | 3 +-- net/netfilter/xt_conntrack.c | 3 +-- net/netfilter/xt_dccp.c | 1 - net/netfilter/xt_dscp.c | 1 - net/netfilter/xt_esp.c | 1 - net/netfilter/xt_helper.c | 3 +-- net/netfilter/xt_limit.c | 1 - net/netfilter/xt_mark.c | 1 - net/netfilter/xt_multiport.c | 4 ---- net/netfilter/xt_physdev.c | 1 - net/netfilter/xt_policy.c | 3 +-- net/netfilter/xt_quota.c | 2 +- net/netfilter/xt_sctp.c | 1 - net/netfilter/xt_state.c | 3 +-- net/netfilter/xt_statistic.c | 2 +- net/netfilter/xt_string.c | 4 +--- net/netfilter/xt_tcpudp.c | 2 -- net/sched/act_ipt.c | 4 +--- 57 files changed, 26 insertions(+), 106 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9cef0e91542b..9d97102a9347 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -174,12 +174,10 @@ struct xt_match const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize); + void (*destroy)(const struct xt_match *match, void *matchinfo); /* Called when userspace align differs from kernel space one */ int (*compat)(void *match, void **dstptr, int *size, int convert); @@ -221,12 +219,10 @@ struct xt_target const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo, - unsigned int targinfosize); + void (*destroy)(const struct xt_target *target, void *targinfo); /* Called when userspace align differs from kernel space one */ int (*compat)(void *target, void **dstptr, int *size, int convert); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c6bd270bf46a..4f10b06413a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -491,8 +491,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -559,8 +557,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) t = arpt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 05fb2421bb26..d12b1df252a1 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -67,7 +67,7 @@ target(struct sk_buff **pskb, static int checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int targinfosize, unsigned int hook_mask) + void *targinfo, unsigned int hook_mask) { const struct arpt_mangle *mangle = targinfo; diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aa0e4f462a5..e59f5a8ecb6b 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -172,7 +172,6 @@ static int ipt_snat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -189,7 +188,6 @@ static int ipt_dnat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 8ce5b6f76447..a0f36806998c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -464,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -518,7 +517,6 @@ check_match(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -579,8 +577,6 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -652,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -1599,7 +1594,6 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, dm->data, - dm->u.match_size - sizeof(*dm), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -1658,8 +1652,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, goto out; } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, de, target, - t->data, t->u.target_size - sizeof(*t), - de->comefrom)) { + t->data, de->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); goto out; @@ -2182,7 +2175,6 @@ icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_icmp *icmpinfo = matchinfo; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a08383cf9e7a..41589665fc5d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -372,7 +372,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -449,8 +448,7 @@ checkentry(const char *tablename, } /* drop reference count of cluster config when rule is deleted */ -static void destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +static void destroy(const struct xt_target *target, void *targinfo) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 1c3da4a48e5f..23f9c7ebe7eb 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -106,7 +106,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index a8d356c6191f..7dc820df8bc5 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -439,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9659793c66c0..bc65168a3437 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -42,7 +42,6 @@ masquerade_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index fd5e74a19fb5..beb2914225ff 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -33,7 +33,6 @@ check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 839fe99f71d4..f03d43671c6d 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -36,7 +36,6 @@ redirect_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1dfd8e56be8b..b81821edd893 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -276,7 +276,6 @@ static int check(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index cf801749490f..efbcb1198832 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -52,7 +52,6 @@ same_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { unsigned int count, countess, rangeip, index = 0; @@ -116,8 +115,7 @@ same_check(const char *tablename, } static void -same_destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +same_destroy(const struct xt_target *target, void *targinfo) { struct ipt_same_info *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 6d668dcfc22a..ac8a35eeea3f 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -207,7 +207,6 @@ ipt_tcpmss_checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 043df0137084..471a4c438b0a 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -49,7 +49,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 164007107b5e..214d9d9c428f 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -67,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_TTL_info *info = targinfo; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 4c5f0a117862..2b104ea54f48 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hookmask) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 2927135873d7..1798f86bc534 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -74,7 +74,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ipt_ah *ahinfo = matchinfo; diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index b28250414933..dafbdec0efc0 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 3bd2368e1fc9..b5b74b07370c 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct ipt_hashlimit_info *r = matchinfo; @@ -529,8 +528,7 @@ hashlimit_checkentry(const char *tablename, } static void -hashlimit_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +hashlimit_destroy(const struct xt_match *match, void *matchinfo) { struct ipt_hashlimit_info *r = matchinfo; diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 5ac6ac023b5e..78c336f12a9e 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -56,7 +56,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_owner_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 682c0946201e..32ae8d7ac506 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -238,7 +238,7 @@ out: static int ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -294,8 +294,7 @@ out: } static void -ipt_recent_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +ipt_recent_destroy(const struct xt_match *match, void *matchinfo) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 38cd7ffda9a0..d1c315364ee7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -504,8 +504,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -558,7 +557,6 @@ check_match(struct ip6t_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -619,8 +617,6 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -692,8 +688,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) IP6T_MATCH_ITERATE(e, cleanup_match, NULL); t = ip6t_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -1349,7 +1344,6 @@ icmp6_checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_icmp *icmpinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index c85d124f9a3d..e54ea92d107b 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -66,7 +66,6 @@ static int ip6t_hl_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip6t_HL_info *info = targinfo; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index acb91733e1fd..0cf537d30185 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -451,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 343acd3cbf5e..311eae82feb3 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -223,7 +223,6 @@ static int check(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 2f7bb20c758b..ec1b1608156c 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -102,7 +102,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_ah *ahinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 9422413d0571..223c335467cc 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -182,7 +182,6 @@ checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 06768c84bd31..78d9c8b9e28a 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -119,7 +119,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_frag *fraginfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 374f1be85c0d..72defc816563 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -182,7 +182,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 9375eeb1369f..3093c398002f 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ipv6header_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 5d047990cd44..4eb9bbc4ebc3 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -57,7 +57,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_owner_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index fbb0184a41d8..bcb2e168a5bc 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -197,7 +197,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_rt *rtinfo = matchinfo; diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index c2125f6ee128..0e4249ddc17b 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -89,7 +89,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_connmark_target_info *matchinfo = targinfo; diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 4b9cc65bb82b..4b0e14bb1726 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_connsecmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 9d23c9580d80..a7cc75aeb38d 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -72,7 +72,6 @@ static int checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 95a171c87994..782f8d8c3edf 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -74,7 +74,6 @@ checkentry_v0(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info *markinfo = targinfo; @@ -91,7 +90,6 @@ checkentry_v1(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info_v1 *markinfo = targinfo; diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 8a04dcf2611e..451b67c4bb53 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -85,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_secmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d725e8b84503..dcc497ea8183 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -125,7 +125,6 @@ static int check(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_connbytes_info *sinfo = matchinfo; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index a97b2d455b79..c9104d05a19c 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -55,7 +55,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_connmark_info *cm = matchinfo; @@ -75,7 +74,7 @@ checkentry(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 1540885174ee..39c57e9f7563 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -208,7 +208,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -222,7 +221,7 @@ checkentry(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 5ca6f5288f46..3e6cf430e518 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -131,7 +131,6 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_dccp_info *info = matchinfo; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index d84075c30159..26c7f4ad102a 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -58,7 +58,6 @@ static int checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 7b19bc9ea205..7c95f149d942 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -79,7 +79,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct xt_esp *espinfo = matchinfo; diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index db453a7a154e..5d7818b73e3a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -139,7 +139,6 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_helper_info *info = matchinfo; @@ -156,7 +155,7 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index e8d5e7ac695a..b9c9ff3a06ea 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_rateinfo *r = matchinfo; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 39f9b079f5d4..e8059cd17275 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -39,7 +39,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_mark_info *minfo = matchinfo; diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index e74f9bb98b3c..d3aefd380930 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -176,7 +176,6 @@ checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -191,7 +190,6 @@ checkentry_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -206,7 +204,6 @@ checkentry6(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; @@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index af3d70f96ecd..fd8f954cded5 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -106,7 +106,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_physdev_info *info = matchinfo; diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index f5639c451112..e9d81378d653 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -135,8 +135,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { struct xt_policy_info *info = matchinfo; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index cc44f87cb8e6..b75fa2c70e66 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -41,7 +41,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 5628621170e6..7956acaaa24b 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -163,7 +163,6 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_sctp_info *info = matchinfo; diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5f9492e3b2b1..d9010b16a1f9 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -48,7 +48,6 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -62,7 +61,7 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 5181630a87fc..091a9f89f5d5 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -55,7 +55,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 1a1c1d17d85e..4453252400aa 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -46,7 +46,6 @@ static int checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_string_info *conf = matchinfo; @@ -69,8 +68,7 @@ static int checkentry(const char *tablename, return 1; } -static void destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +static void destroy(const struct xt_match *match, void *matchinfo) { textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 54aab051af86..e76a68e0bc66 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *tcpinfo = matchinfo; @@ -190,7 +189,6 @@ udp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *udpinfo = matchinfo; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 45a3143b8629..d8c9310da6e5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -69,7 +69,6 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(table, NULL, t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t), hook)) { module_put(t->u.kernel.target->me); ret = -EINVAL; @@ -81,8 +80,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int static void ipt_destroy_target(struct ipt_entry_target *t) { if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); } -- cgit v1.2.3-71-gd317 From 53e26658282373b84ba85a0c9807cb762f7738a6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:20 -0700 Subject: [NETFILTER]: nfnetlink: remove unnecessary packed attributes Remove unnecessary packed attributes in nfnetlink structures. Unfortunately in a few cases they have to stay to avoid changing structure sizes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 4 ++-- include/linux/netfilter/nfnetlink_log.h | 6 +++--- include/linux/netfilter/nfnetlink_queue.h | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 9f5b12cf489b..6d8e3e5a80e9 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -43,7 +43,7 @@ struct nfattr u_int16_t nfa_len; u_int16_t nfa_type; /* we use 15 bits for the type, and the highest * bit to indicate whether the payload is nested */ -} __attribute__ ((packed)); +}; /* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from * rtnetlink.h, it's time to put this in a generic file */ @@ -79,7 +79,7 @@ struct nfgenmsg { u_int8_t nfgen_family; /* AF_xxx */ u_int8_t version; /* nfnetlink version */ u_int16_t res_id; /* resource id */ -} __attribute__ ((packed)); +}; #define NFNETLINK_V0 0 diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a7497c7436df..87b92f8b988f 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -19,18 +19,18 @@ struct nfulnl_msg_packet_hdr { u_int16_t hw_protocol; /* hw protocol (network order) */ u_int8_t hook; /* netfilter hook */ u_int8_t _pad; -} __attribute__ ((packed)); +}; struct nfulnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -} __attribute__ ((packed)); +}; struct nfulnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -} __attribute__ ((packed)); +}; #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index 9e774373244c..36af0360b56d 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -22,12 +22,12 @@ struct nfqnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -} __attribute__ ((packed)); +}; struct nfqnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -} __attribute__ ((packed)); +}; enum nfqnl_attr_type { NFQA_UNSPEC, @@ -49,7 +49,7 @@ enum nfqnl_attr_type { struct nfqnl_msg_verdict_hdr { u_int32_t verdict; u_int32_t id; -} __attribute__ ((packed)); +}; enum nfqnl_msg_config_cmds { @@ -64,7 +64,7 @@ struct nfqnl_msg_config_cmd { u_int8_t command; /* nfqnl_msg_config_cmds */ u_int8_t _pad; u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ -} __attribute__ ((packed)); +}; enum nfqnl_config_mode { NFQNL_COPY_NONE, -- cgit v1.2.3-71-gd317 From 91270cf81765152f6e77953440beb4d3b34a71b5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:38 -0700 Subject: [NETFILTER]: x_tables: add data member to struct xt_match Shared match functions can use this to make runtime decisions basen on the used match. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9d97102a9347..03d1027fb0e8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -185,6 +185,9 @@ struct xt_match /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; + /* Free to use by each match */ + unsigned long data; + char *table; unsigned int matchsize; unsigned int hooks; -- cgit v1.2.3-71-gd317 From 8e1ef0a95b87e8b4292b2ba733e8cb854ea2d2fe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 29 Aug 2006 17:15:09 -0700 Subject: [IPV6]: Cache source address as well in ipv6_pinfo{}. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ include/net/ip6_route.h | 9 ++++++--- net/dccp/ipv6.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 7 ++++++- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 3 +++ net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 7 ++++++- 9 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 297853c841b4..02d14a3ff2af 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -242,6 +242,9 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_addr *daddr_cache; +#ifdef CONFIG_IPV6_SUBTREES + struct in6_addr *saddr_cache; +#endif __u32 flow_label; __u32 frag_size; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 249ce4545ef0..0d40f84df21b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -144,21 +144,24 @@ extern rwlock_t rt6_lock; * Store a destination cache entry in a socket */ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; sk_setup_caps(sk, dst); np->daddr_cache = daddr; +#ifdef CONFIG_IPV6_SUBTREES + np->saddr_cache = saddr; +#endif np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, struct in6_addr *saddr) { write_lock(&sk->sk_dst_lock); - __ip6_dst_store(sk, dst, daddr); + __ip6_dst_store(sk, dst, daddr, saddr); write_unlock(&sk->sk_dst_lock); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 231bc7c7e749..f9c5e12d7038 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -231,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -872,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - __ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2ff600cfe3a4..57ee5ddea96f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -659,7 +659,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); } return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c73508e090a6..8561b9da6db6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -193,7 +193,12 @@ ipv4_connected: ip6_dst_store(sk, dst, ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); sk->sk_state = TCP_ESTABLISHED; out: diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7a51a258615d..827f41d1478b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -186,7 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0a18cb6b1cbb..2a376b7d91b4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -762,6 +762,9 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * 2. oif also should be the same. */ if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || +#ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || +#endif (fl->oif && fl->oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7f1b660493b7..2b18918f3011 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -272,7 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -954,7 +954,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ newsk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eb9e1b39c8f8..b9cc55ccb000 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -847,7 +847,12 @@ do_append_data: if (connected) { ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); } else { dst_release(dst); } -- cgit v1.2.3-71-gd317 From 7e49e6de30efa716614e280d97963c570f3acf29 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:05:15 -0700 Subject: [XFRM]: Add XFRM_MODE_xxx for future use. Transformation mode is used as either IPsec transport or tunnel. It is required to add two more items, route optimization and inbound trigger for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 6 ++++-- include/net/xfrm.h | 2 +- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 6 +++--- net/ipv4/ipcomp.c | 8 ++++---- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_output.c | 4 ++-- net/ipv4/xfrm4_policy.c | 2 +- net/ipv4/xfrm4_state.c | 2 +- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 4 ++-- net/ipv6/ipcomp6.c | 6 +++--- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_output.c | 4 ++-- net/ipv6/xfrm6_policy.c | 2 +- net/ipv6/xfrm6_state.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 6 +++--- net/xfrm/xfrm_policy.c | 11 ++++++----- net/xfrm/xfrm_user.c | 4 ++-- 21 files changed, 42 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 46a15c7a1a13..5154064b6d95 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -120,7 +120,9 @@ enum #define XFRM_MODE_TRANSPORT 0 #define XFRM_MODE_TUNNEL 1 -#define XFRM_MODE_MAX 2 +#define XFRM_MODE_ROUTEOPTIMIZATION 2 +#define XFRM_MODE_IN_TRIGGER 3 +#define XFRM_MODE_MAX 4 /* Netlink configuration messages. */ enum { @@ -247,7 +249,7 @@ struct xfrm_usersa_info { __u32 seq; __u32 reqid; __u16 family; - __u8 mode; /* 0=transport,1=tunnel */ + __u8 mode; /* XFRM_MODE_xxx */ __u8 replay_window; __u8 flags; #define XFRM_STATE_NOECN 1 diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 00bf86e6e82b..762795624b10 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,7 +298,7 @@ struct xfrm_tmpl __u32 reqid; -/* Mode: transport/tunnel */ +/* Mode: transport, tunnel etc. */ __u8 mode; /* Sharing mode: unique, this session only, this user only etc. */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 008e69d2e423..99542977e47e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b428489f6ccd..e87377e1d6b6 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -248,7 +248,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (!x->props.mode) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -267,7 +267,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -383,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x) if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 5bb9c9f03fb6..17342430a843 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) return 0; out_ok: - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) ip_send_check(iph); return 0; } @@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; @@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); mutex_lock(&ipcomp_resource_mutex); @@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 817ed84511a6..040e8475f295 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 4a96a9e3ef3b..5fd115f0c547 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -54,7 +54,7 @@ static int xfrm4_output_one(struct sk_buff *skb) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8f50eae47d03..a5bed741de2c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -96,7 +96,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = xfrm[i]->id.daddr.a4; local = xfrm[i]->props.saddr.a4; tunnel = 1; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 81e1751c966e..97b0c7589711 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -42,7 +42,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode && x->props.saddr.a4 == 0) { + if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) { struct rtable *rt; struct flowi fl_tunnel = { .nl_u = { diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index f8ceaa127c83..f110af5b1319 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) static int ipip_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 00ffa7bc6c9f..60954fc7eb36 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -398,7 +398,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = ahp; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2ebfd281e721..2b8e52e1d0ab 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -237,7 +237,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -358,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x) if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = esp; return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index a81e9e9d93bd..19eba8d9f851 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); mutex_lock(&ipcomp6_resource_mutex); @@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0405d74ff910..ee2f6b3908b6 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { /* XXX */ + if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ decaps = 1; break; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6d111743e508..26f18869f77b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -47,7 +47,7 @@ static int xfrm6_output_one(struct sk_buff *skb) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; @@ -80,7 +80,7 @@ static int xfrm6_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 73cd250aecbb..81355bb50328 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -114,7 +114,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = (struct in6_addr*)&xfrm[i]->id.daddr; local = (struct in6_addr*)&xfrm[i]->props.saddr; tunnel = 1; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b33296b3f6de..a1a1f5476442 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -42,7 +42,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); - if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { + if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { struct rt6_info *rt; struct flowi fl_tunnel = { .nl_u = { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index c8f9369c2a87..59685ee8f700 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static int xfrm6_tunnel_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/key/af_key.c b/net/key/af_key.c index 797c744a8438..19e047b0e678 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1765,7 +1765,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) } /* addresses present only in tunnel mode */ - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -1997,7 +1997,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i int req_size; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode) + if (t->mode == XFRM_MODE_TUNNEL) req_size += 2*socklen; else size -= 2*socklen; @@ -2013,7 +2013,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 32c963c90573..a0d58971391d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -779,7 +779,7 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode) { + if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; } @@ -1005,7 +1005,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && (tmpl->aalgos & (1<props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); + !(x->props.mode != XFRM_MODE_TRANSPORT && + xfrm_state_addr_cmp(tmpl, x, family)); } static inline int @@ -1015,14 +1016,14 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, int idx = start; if (tmpl->optional) { - if (!tmpl->mode) + if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) break; } return start; @@ -1047,7 +1048,7 @@ EXPORT_SYMBOL(xfrm_decode_session); static inline int secpath_has_tunnel(struct sec_path *sp, int k) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) return 1; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f70e158874d2..0d580ac19771 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -174,8 +174,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->mode) { - case 0: - case 1: + case XFRM_MODE_TRANSPORT: + case XFRM_MODE_TUNNEL: break; default: -- cgit v1.2.3-71-gd317 From eb2971b68a7d17a7d0fa2c7fc6fbc4bfe41cd694 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:56:04 -0700 Subject: [XFRM] STATE: Search by address using source address list. This is a support to search transformation states by its addresses by using source address list for Mobile IPv6 usage. To use it from user-space, it is also added a message type for source address as a xfrm state option. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_state.c | 9 ++++++++ net/ipv6/xfrm6_state.c | 21 ++++++++++++++++++ net/xfrm/xfrm_state.c | 37 +++++++++++++++++++++++++++---- net/xfrm/xfrm_user.c | 59 +++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 119 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 5154064b6d95..66343d3d4b91 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -234,6 +234,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_VAL, XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, + XFRMA_SRCADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 88145e3348d0..d9c40e713184 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -244,6 +244,7 @@ struct xfrm_state_afinfo { struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); + struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); @@ -937,6 +938,7 @@ extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index c56b258fad73..616be131b4e3 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -80,6 +80,14 @@ __xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } +/* placeholder until ipv4's code is written */ +static struct xfrm_state * +__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + return NULL; +} + static struct xfrm_state * __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, @@ -137,6 +145,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, + .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, .find_acq = __xfrm4_find_acq, }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 2fb07850449f..9c95b9d3e110 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,6 +63,26 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } +static struct xfrm_state * +__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + struct xfrm_state *x = NULL; + unsigned h; + + h = __xfrm6_src_hash(saddr); + list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { + if (x->props.family == AF_INET6 && + ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && + ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && + proto == x->id.proto) { + xfrm_state_hold(x); + return x; + } + } + return NULL; +} + static struct xfrm_state * __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) { @@ -140,6 +160,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, + .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .find_acq = __xfrm6_find_acq, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2a9992894e69..11f480b12952 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -487,6 +487,16 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, + int use_spi) +{ + if (use_spi) + return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + else + return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); +} + static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) @@ -495,6 +505,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1; int family; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; afinfo = xfrm_state_get_afinfo(family); @@ -503,7 +514,7 @@ int xfrm_state_add(struct xfrm_state *x) spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -511,7 +522,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (x->km.seq) { + if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -519,7 +530,7 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (!x1) + if (use_spi && !x1) x1 = afinfo->find_acq( x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -548,13 +559,14 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); afinfo = xfrm_state_get_afinfo(x->props.family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); err = -ESRCH; if (!x1) @@ -674,6 +686,23 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, } EXPORT_SYMBOL(xfrm_state_lookup); +struct xfrm_state * +xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); + struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 41f3d51ffc33..b5f8ab71aa54 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -87,6 +87,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } +static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, + xfrm_address_t **addrp) +{ + struct rtattr *rt = xfrma[type - 1]; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) + return -EINVAL; + + if (addrp) + *addrp = RTA_DATA(rt); + + return 0; +} static inline int verify_sec_ctx_len(struct rtattr **xfrma) { @@ -418,16 +434,48 @@ out: return err; } +static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = NULL; + int err; + + if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { + err = -ESRCH; + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + } else { + xfrm_address_t *saddr = NULL; + + err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); + if (err) + goto out; + + if (!saddr) { + err = -EINVAL; + goto out; + } + + x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, + p->family); + } + + out: + if (!x && errp) + *errp = err; + return x; +} + static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct xfrm_state *x; - int err; + int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) - return -ESRCH; + return err; if ((err = security_xfrm_state_delete(x)) != 0) goto out; @@ -578,10 +626,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err; + int err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - err = -ESRCH; + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) goto out_noput; -- cgit v1.2.3-71-gd317 From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:08:21 -0700 Subject: [XFRM] STATE: Common receive function for route optimization extension headers. XFRM_STATE_WILDRECV flag is introduced; the last resort state is set it and receives packet which is not route optimized but uses such extension headers i.e. Mobile IPv6 signaling (binding update and acknowledgement). A node enabled Mobile IPv6 adds the state. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 + net/ipv6/ipv6_syms.c | 1 + net/ipv6/xfrm6_input.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_state.c | 1 + 5 files changed, 113 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 66343d3d4b91..a7c9e4cfb15b 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -256,6 +256,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 #define XFRM_STATE_NOPMTUDISC 4 +#define XFRM_STATE_WILDRECV 8 }; struct xfrm_usersa_id { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index eed48f832ce1..0d735a5aba61 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -955,6 +955,8 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi); extern int xfrm6_rcv(struct sk_buff **pskb); +extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index e1a741612888..7b7b90d9c3d0 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_input_addr); EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index ee2f6b3908b6..a40a05789013 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb) { return xfrm6_rcv_spi(*pskb, 0); } + +int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto) +{ + struct xfrm_state *x = NULL; + int wildcard = 0; + struct in6_addr any; + xfrm_address_t *xany; + struct xfrm_state *xfrm_vec_one = NULL; + int nh = 0; + int i = 0; + + ipv6_addr_set(&any, 0, 0, 0, 0); + xany = (xfrm_address_t *)&any; + + for (i = 0; i < 3; i++) { + xfrm_address_t *dst, *src; + switch (i) { + case 0: + dst = daddr; + src = saddr; + break; + case 1: + /* lookup state with wild-card source address */ + wildcard = 1; + dst = daddr; + src = xany; + break; + case 2: + default: + /* lookup state with wild-card addresses */ + wildcard = 1; /* XXX */ + dst = xany; + src = xany; + break; + } + + x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + if (!x) + continue; + + spin_lock(&x->lock); + + if (wildcard) { + if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + } + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + if (xfrm_state_check_expire(x)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + nh = x->type->input(x, skb); + if (nh <= 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + xfrm_vec_one = x; + break; + } + + if (!xfrm_vec_one) + goto drop; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + sp = secpath_dup(skb->sp); + if (!sp) + goto drop; + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len > XFRM_MAX_DEPTH) + goto drop; + + skb->sp->xvec[skb->sp->len] = xfrm_vec_one; + skb->sp->len ++; + + return 1; +drop: + if (xfrm_vec_one) + xfrm_state_put(xfrm_vec_one); + return -1; +} diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 11f480b12952..f05371556cce 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -352,6 +352,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && -- cgit v1.2.3-71-gd317 From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 18:18:55 -0700 Subject: [XFRM] STATE: Introduce care-of address. Care-of address is carried by state as a transformation option like IPsec encryption/authentication algorithm. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/xfrm/xfrm_state.c | 6 ++++++ net/xfrm/xfrm_user.c | 28 +++++++++++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index a7c9e4cfb15b..b53f799189af 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -235,6 +235,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ + XFRMA_COADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa93cc1f6299..872a2a4022b2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -134,6 +134,9 @@ struct xfrm_state /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; + /* Data for care-of address */ + xfrm_address_t *coaddr; + /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f05371556cce..3da89c01ea71 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,6 +78,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -603,6 +604,11 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b5f8ab71aa54..939808de9e20 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -187,11 +187,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(xfrma))) goto out; + if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + goto out; err = -EINVAL; switch (p->mode) { case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: + case XFRM_MODE_ROUTEOPTIMIZATION: break; default: @@ -276,6 +279,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) return security_xfrm_state_alloc(x, uctx); } +static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + xfrm_address_t *p, *uaddrp; + + if (!rta) + return 0; + + uaddrp = RTA_DATA(rta); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, uaddrp, sizeof(*p)); + *addrpp = p; + return 0; +} + static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -365,7 +386,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - + if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + goto error; err = xfrm_init_state(x); if (err) goto error; @@ -569,6 +591,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) uctx->ctx_len = x->security->ctx_len; memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); } + + if (x->coaddr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; -- cgit v1.2.3-71-gd317 From 9afaca057980c02771f4657c455cc7592fcd7373 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:20:16 -0700 Subject: [XFRM] IPV6: Update outbound state timestamp for each sending. With this patch transformation state is updated last used time for each sending. Xtime is used for it like other state lifetime expiration. Mobile IPv6 enabled nodes will want to know traffic status of each binding (e.g. judgement to request binding refresh by correspondent node, or to keep home/care-of nonce alive by mobile node). The last used timestamp is an important hint about it. Based on MIPL2 kernel patch. This patch was also written by: Henrik Petander Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/ipv6/xfrm6_output.c | 2 ++ net/xfrm/xfrm_user.c | 3 +++ 4 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index b53f799189af..1d8c1f22c12d 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -236,6 +236,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ + XFRMA_LASTUSED, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 872a2a4022b2..248874ecf8df 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -167,6 +167,9 @@ struct xfrm_state struct xfrm_lifetime_cur curlft; struct timer_list timer; + /* Last used time */ + u64 lastused; + /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b4628fbf8ff5..db58104e710b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -75,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; + if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) + x->lastused = (u64)xtime.tv_sec; spin_unlock_bh(&x->lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 939808de9e20..f643063a1cbd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -595,6 +595,9 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) if (x->coaddr) RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (x->lastused) + RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; -- cgit v1.2.3-71-gd317 From 642ec62eee5bdc158e01029220c8a23c685778fb Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 19:15:07 -0700 Subject: [IPV6] MIP6: Add routing header type 2 definition. Add routing header type 2 definition for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 02d14a3ff2af..d995662e94c4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -29,6 +29,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ #define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ +#define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* * routing header @@ -73,6 +74,18 @@ struct rt0_hdr { #define rt0_type rt_hdr.type }; +/* + * routing header type 2 + */ + +struct rt2_hdr { + struct ipv6_rt_hdr rt_hdr; + __u32 reserved; + struct in6_addr addr; + +#define rt2_type rt_hdr.type +}; + struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ -- cgit v1.2.3-71-gd317 From 842426e719f86cd5709617208efae93ff1a1e2d8 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 19:21:34 -0700 Subject: [IPV6] MIP6: Add home address option definition. Add home address option definition for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/linux/ipv6.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 304aaedea305..086ec2ac8c5f 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -142,6 +142,7 @@ struct in6_flowlabel_req #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_JUMBO 194 +#define IPV6_TLV_HAO 201 /* home address option */ /* * IPV6 socket options diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d995662e94c4..5bf4406e26d4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -86,6 +86,16 @@ struct rt2_hdr { #define rt2_type rt_hdr.type }; +/* + * home address option in destination options header + */ + +struct ipv6_destopt_hao { + __u8 type; + __u8 length; + struct in6_addr addr; +} __attribute__ ((__packed__)); + struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ -- cgit v1.2.3-71-gd317 From a831f5bbc89a9978795504be9e1ff412043f8f77 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:24:48 -0700 Subject: [IPV6] MIP6: Add inbound interface of home address option. Add inbound function of home address option by registering it to TLV table for destination options header. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++ net/ipv6/exthdrs.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5bf4406e26d4..db3b2ba0f4f8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -226,6 +226,9 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; +#ifdef CONFIG_IPV6_MIP6 + __u16 dsthao; +#endif __u16 lastopt; __u32 nhoff; __u16 flags; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 1cdd0f0b5d34..6a6466bb5f26 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -196,8 +196,80 @@ bad: Destination options header. *****************************/ +#ifdef CONFIG_IPV6_MIP6 +static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) +{ + struct sk_buff *skb = *skbp; + struct ipv6_destopt_hao *hao; + struct inet6_skb_parm *opt = IP6CB(skb); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw; + struct in6_addr tmp_addr; + int ret; + + if (opt->dsthao) { + LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + goto discard; + } + opt->dsthao = opt->dst1; + opt->dst1 = 0; + + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff); + + if (hao->length != 16) { + LIMIT_NETDEBUG( + KERN_DEBUG "hao invalid option length = %d\n", hao->length); + goto discard; + } + + if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { + LIMIT_NETDEBUG( + KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + goto discard; + } + + ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, + (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); + if (unlikely(ret < 0)) + goto discard; + + if (skb_cloned(skb)) { + struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); + if (skb2 == NULL) + goto discard; + + kfree_skb(skb); + + /* update all variable using below by copied skbuff */ + *skbp = skb = skb2; + hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff); + ipv6h = (struct ipv6hdr *)skb2->nh.raw; + } + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + + ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); + ipv6_addr_copy(&ipv6h->saddr, &hao->addr); + ipv6_addr_copy(&hao->addr, &tmp_addr); + + if (skb->tstamp.off_sec == 0) + __net_timestamp(skb); + + return 1; + + discard: + kfree_skb(skb); + return 0; +} +#endif + static struct tlvtype_proc tlvprocdestopt_lst[] = { - /* No destination options are defined now */ +#ifdef CONFIG_IPV6_MIP6 + { + .type = IPV6_TLV_HAO, + .func = ipv6_dest_hao, + }, +#endif {-1, NULL} }; @@ -205,6 +277,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); +#ifdef CONFIG_IPV6_MIP6 + __u16 dstbuf; +#endif if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { @@ -215,11 +290,18 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; +#ifdef CONFIG_IPV6_MIP6 + dstbuf = opt->dst1; +#endif if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); +#ifdef CONFIG_IPV6_MIP6 + opt->nhoff = dstbuf; +#else opt->nhoff = opt->dst1; +#endif return 1; } -- cgit v1.2.3-71-gd317 From 8dd7368dd97def967bbb3aec67b882e8dfd1a528 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Aug 2006 19:25:55 -0700 Subject: [IPV6]: Put dsthao after flags in order to pack inet6_skb_parm better. Signed-off-by: David S. Miller --- include/linux/ipv6.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index db3b2ba0f4f8..1d6d3ccc9413 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -226,12 +226,12 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; -#ifdef CONFIG_IPV6_MIP6 - __u16 dsthao; -#endif __u16 lastopt; __u32 nhoff; __u16 flags; +#ifdef CONFIG_IPV6_MIP6 + __u16 dsthao; +#endif #define IP6SKB_XFRM_TRANSFORMED 1 }; -- cgit v1.2.3-71-gd317 From 2b741653b6c824fe7520ee92b6795f11c5f24b24 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:34:26 -0700 Subject: [IPV6] MIP6: Add Mobility header definition. Add Mobility header definition for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Antti Tuominen Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/net/flow.h | 9 +++++++++ include/net/ipv6.h | 1 + include/net/mip6.h | 23 +++++++++++++++++++++++ 4 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 086ec2ac8c5f..d776829b443f 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -134,6 +134,7 @@ struct in6_flowlabel_req #define IPPROTO_ICMPV6 58 /* ICMPv6 */ #define IPPROTO_NONE 59 /* IPv6 no next header */ #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ +#define IPPROTO_MH 135 /* IPv6 mobility header */ /* * IPv6 TLV options. diff --git a/include/net/flow.h b/include/net/flow.h index 21d988b2058a..e0522914316e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -72,12 +72,21 @@ struct flowi { } dnports; __u32 spi; + +#ifdef CONFIG_IPV6_MIP6 + struct { + __u8 type; + } mht; +#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi +#ifdef CONFIG_IPV6_MIP6 +#define fl_mh_type uli_u.mht.type +#endif __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8e6ec6063f8c..72bf47b2a4e0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -40,6 +40,7 @@ #define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ #define NEXTHDR_NONE 59 /* No next header */ #define NEXTHDR_DEST 60 /* Destination options header. */ +#define NEXTHDR_MOBILITY 135 /* Mobility header. */ #define NEXTHDR_MAX 255 diff --git a/include/net/mip6.h b/include/net/mip6.h index 42b65bace122..fd43178faace 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -28,6 +28,29 @@ #define MIP6_OPT_PAD_1 0 #define MIP6_OPT_PAD_N 1 +/* + * Mobility Header + */ +struct ip6_mh { + __u8 ip6mh_proto; + __u8 ip6mh_hdrlen; + __u8 ip6mh_type; + __u8 ip6mh_reserved; + __u16 ip6mh_cksum; + /* Followed by type specific messages */ + __u8 data[0]; +} __attribute__ ((__packed__)); + +#define IP6_MH_TYPE_BRR 0 /* Binding Refresh Request */ +#define IP6_MH_TYPE_HOTI 1 /* HOTI Message */ +#define IP6_MH_TYPE_COTI 2 /* COTI Message */ +#define IP6_MH_TYPE_HOT 3 /* HOT Message */ +#define IP6_MH_TYPE_COT 4 /* COT Message */ +#define IP6_MH_TYPE_BU 5 /* Binding Update */ +#define IP6_MH_TYPE_BACK 6 /* Binding ACK */ +#define IP6_MH_TYPE_BERROR 7 /* Binding Error */ +#define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR + extern int mip6_init(void); extern void mip6_fini(void); -- cgit v1.2.3-71-gd317 From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:44:06 -0700 Subject: [XFRM]: Introduce XFRM_MSG_REPORT. XFRM_MSG_REPORT is a message as notification of state protocol and selector from kernel to user-space. Mobile IPv6 will use it when inbound reject is occurred at route optimization to make user-space know a binding error requirement. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 ++++++++++++ include/net/xfrm.h | 2 ++ net/xfrm/xfrm_state.c | 19 +++++++++++++++++++ net/xfrm/xfrm_user.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 1d8c1f22c12d..4009f4445fa9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -166,6 +166,10 @@ enum { #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE XFRM_MSG_GETAE, #define XFRM_MSG_GETAE XFRM_MSG_GETAE + + XFRM_MSG_REPORT, +#define XFRM_MSG_REPORT XFRM_MSG_REPORT + __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -325,12 +329,18 @@ struct xfrm_usersa_flush { __u8 proto; }; +struct xfrm_user_report { + __u8 proto; + struct xfrm_selector sel; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#define XFRMGRP_REPORT 0x10 #endif enum xfrm_nlgroups { @@ -346,6 +356,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY XFRMNLGRP_AEVENTS, #define XFRMNLGRP_AEVENTS XFRMNLGRP_AEVENTS + XFRMNLGRP_REPORT, +#define XFRMNLGRP_REPORT XFRMNLGRP_REPORT __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9ebbdc1dd471..0b223eed4c9b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -381,6 +381,7 @@ struct xfrm_mgr struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); + int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1043,6 +1044,7 @@ extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3da89c01ea71..a26ef6952c30 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1055,6 +1055,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(proto, sel, addr); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_report); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 770bd2410749..7303b820bea4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1491,6 +1491,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), }; #undef XMSGSIZE @@ -2058,12 +2059,57 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static int build_report(struct sk_buff *skb, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct xfrm_user_report *ur; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); + ur = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + ur->proto = proto; + memcpy(&ur->sel, sel, sizeof(ur->sel)); + + if (addr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, + xfrm_address_t *addr) +{ + struct sk_buff *skb; + size_t len; + + len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_report(skb, proto, sel, addr) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, + .report = xfrm_send_report, }; static int __init xfrm_user_init(void) -- cgit v1.2.3-71-gd317 From 4e81bb8336a0ac50289d4d4c7a55e559b994ee8f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:43:30 -0700 Subject: [XFRM] POLICY: sub policy support. Sub policy is introduced. Main and sub policy are applied the same flow. (Policy that current kernel uses is named as main.) It is required another transformation policy management to keep IPsec and Mobile IPv6 lives separate. Policy which lives shorter time in kernel should be a sub i.e. normally main is for IPsec and sub is for Mobile IPv6. (Such usage as two IPsec policies on different database can be used, too.) Limitation or TODOs: - Sub policy is not supported for per socket one (it is always inserted as main). - Current kernel makes cached outbound with flowi to skip searching database. However this patch makes it disabled only when "two policies are used and the first matched one is bypass case" because neither flowi nor bundle information knows about transformation template size. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 7 ++ include/net/xfrm.h | 45 +++++++-- net/xfrm/xfrm_policy.c | 252 ++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 260 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4009f4445fa9..492fb9818747 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -102,6 +102,13 @@ struct xfrm_stats { __u32 integrity_failed; }; +enum +{ + XFRM_POLICY_TYPE_MAIN = 0, + XFRM_POLICY_TYPE_SUB = 1, + XFRM_POLICY_TYPE_MAX = 2 +}; + enum { XFRM_POLICY_IN = 0, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0b223eed4c9b..4655ca25f808 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -341,6 +341,7 @@ struct xfrm_policy atomic_t refcnt; struct timer_list timer; + u8 type; u32 priority; u32 index; struct xfrm_selector selector; @@ -389,6 +390,19 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km); extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; +#ifdef CONFIG_XFRM_SUB_POLICY +extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; + +static inline int xfrm_policy_lists_empty(int dir) +{ + return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]); +} +#else +static inline int xfrm_policy_lists_empty(int dir) +{ + return (!xfrm_policy_list[dir]); +} +#endif static inline void xfrm_pol_hold(struct xfrm_policy *policy) { @@ -404,6 +418,20 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy) __xfrm_policy_destroy(policy); } +#ifdef CONFIG_XFRM_SUB_POLICY +static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +{ + int i; + for (i = npols - 1; i >= 0; --i) + xfrm_pol_put(pols[i]); +} +#else +static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +{ + xfrm_pol_put(pols[0]); +} +#endif + #define XFRM_DST_HSIZE 1024 static __inline__ @@ -737,8 +765,8 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, dir, skb, family); - - return (!xfrm_policy_list[dir] && !skb->sp) || + + return (xfrm_policy_lists_empty(dir) && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, dir, skb, family); } @@ -758,7 +786,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !xfrm_policy_list[XFRM_POLICY_OUT] || + return xfrm_policy_lists_empty(XFRM_POLICY_OUT) || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } @@ -1023,18 +1051,19 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig #endif struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); -void xfrm_policy_flush(void); +struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); +void xfrm_policy_flush(u8 type); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(void); +extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); extern void xfrm_flush_all_bundles(void); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d125a2649037..96de6c76ed57 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -32,6 +32,24 @@ static DEFINE_RWLOCK(xfrm_policy_lock); struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_list); +#ifdef CONFIG_XFRM_SUB_POLICY +struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_list_sub); + +#define XFRM_POLICY_LISTS(type) \ + ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \ + xfrm_policy_list) +#define XFRM_POLICY_LISTHEAD(type, dir) \ + ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \ + xfrm_policy_list[dir]) +#define XFRM_POLICY_LISTHEADP(type, dir) \ + ((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \ + &xfrm_policy_list[dir]) +#else +#define XFRM_POLICY_LISTS(type) xfrm_policy_list +#define XFRM_POLICY_LISTHEAD(type, dif) xfrm_policy_list[dir] +#define XFRM_POLICY_LISTHEADP(type, dif) &xfrm_policy_list[dir] +#endif static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -397,7 +415,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(u8 type, int dir) { u32 idx; struct xfrm_policy *p; @@ -408,7 +426,7 @@ static u32 xfrm_gen_index(int dir) idx_generator += 8; if (idx == 0) idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { + for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) { if (p->index == idx) break; } @@ -425,7 +443,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { + for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) { if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { @@ -452,7 +470,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->next = *p; *p = policy; atomic_inc(&flow_cache_genid); - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -493,13 +511,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && (xfrm_sec_ctx_match(ctx, pol->security))) { xfrm_pol_hold(pol); @@ -518,12 +537,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { if (pol->index == id) { xfrm_pol_hold(pol); if (delete) @@ -541,15 +560,16 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(void) +void xfrm_policy_flush(u8 type) { struct xfrm_policy *xp; + struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type); int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; + while ((xp = p_list[dir]) != NULL) { + p_list[dir] = xp->next; write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(xp); @@ -562,7 +582,7 @@ void xfrm_policy_flush(void) } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { struct xfrm_policy *xp; @@ -572,7 +592,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), read_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) count++; } @@ -582,7 +602,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { + for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) { error = func(xp, dir%XFRM_POLICY_MAX, --count, data); if (error) goto out; @@ -597,13 +617,13 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, + u16 family, u8 dir) { struct xfrm_policy *pol; read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { + for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) { struct xfrm_selector *sel = &pol->selector; int match; @@ -620,6 +640,25 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, } } read_unlock_bh(&xfrm_policy_lock); + + return pol; +} + +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) +{ + struct xfrm_policy *pol; + +#ifdef CONFIG_XFRM_SUB_POLICY + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol) + goto end; +#endif + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + +#ifdef CONFIG_XFRM_SUB_POLICY + end: +#endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; } @@ -665,8 +704,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - pol->next = xfrm_policy_list[dir]; - xfrm_policy_list[dir] = pol; + struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type); + + pol->next = p_list[dir]; + p_list[dir] = pol; xfrm_pol_hold(pol); } @@ -675,7 +716,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, { struct xfrm_policy **polp; - for (polp = &xfrm_policy_list[dir]; + for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir); *polp != NULL; polp = &(*polp)->next) { if (*polp == pol) { *polp = pol->next; @@ -704,12 +745,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) + return -EINVAL; +#endif + write_lock_bh(&xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -738,6 +784,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; + newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&xfrm_policy_lock); @@ -764,9 +811,9 @@ int __xfrm_sk_clone_policy(struct sock *sk) /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) { int nx; int i, error; @@ -809,6 +856,38 @@ fail: return error; } +static int +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + int cnx = 0; + int error; + int ret; + int i; + + for (i = 0; i < npols; i++) { + if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { + error = -ENOBUFS; + goto fail; + } + ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family); + if (ret < 0) { + error = ret; + goto fail; + } else + cnx += ret; + } + + return cnx; + + fail: + for (cnx--; cnx>=0; cnx--) + xfrm_state_put(xfrm[cnx]); + return error; + +} + /* Check that the bundle accepts the flow and its components are * still valid. */ @@ -855,6 +934,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols; + int pol_dead; + int xfrm_nr; + int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; @@ -866,12 +950,18 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, restart: genid = atomic_read(&flow_cache_genid); policy = NULL; + for (pi = 0; pi < ARRAY_SIZE(pols); pi++) + pols[pi] = NULL; + npols = 0; + pol_dead = 0; + xfrm_nr = 0; + if (sk && sk->sk_policy[1]) policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT)) return 0; policy = flow_cache_lookup(fl, dst_orig->ops->family, @@ -883,6 +973,9 @@ restart: family = dst_orig->ops->family; policy->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = policy; + npols ++; + xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { case XFRM_POLICY_BLOCK: @@ -891,11 +984,13 @@ restart: goto error; case XFRM_POLICY_ALLOW: +#ifndef CONFIG_XFRM_SUB_POLICY if (policy->xfrm_nr == 0) { /* Flow passes not transformed. */ xfrm_pol_put(policy); return 0; } +#endif /* Try to find matching bundle. * @@ -911,7 +1006,36 @@ restart: if (dst) break; - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (pols[1]->action == XFRM_POLICY_BLOCK) { + err = -EPERM; + goto error; + } + npols ++; + xfrm_nr += pols[1]->xfrm_nr; + } + } + + /* + * Because neither flowi nor bundle information knows about + * transformation template size. On more than one policy usage + * we can realize whether all of them is bypass or not after + * they are searched. See above not-transformed bypass + * is surrounded by non-sub policy configuration, too. + */ + if (xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pols_put(pols, npols); + return 0; + } + +#endif + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (unlikely(nx<0)) { err = nx; @@ -924,7 +1048,7 @@ restart: set_current_state(TASK_RUNNING); remove_wait_queue(&km_waitq, &wait); - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { err = -ERESTART; @@ -932,7 +1056,7 @@ restart: } if (nx == -EAGAIN || genid != atomic_read(&flow_cache_genid)) { - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); goto restart; } err = nx; @@ -942,7 +1066,7 @@ restart: } if (nx == 0) { /* Flow passes not transformed. */ - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; } @@ -956,8 +1080,14 @@ restart: goto error; } + for (pi = 0; pi < npols; pi++) { + read_lock_bh(&pols[pi]->lock); + pol_dead |= pols[pi]->dead; + read_unlock_bh(&pols[pi]->lock); + } + write_lock_bh(&policy->lock); - if (unlikely(policy->dead || stale_bundle(dst))) { + if (unlikely(pol_dead || stale_bundle(dst))) { /* Wow! While we worked on resolving, this * policy has gone. Retry. It is not paranoia, * we just cannot enlist new bundle to dead object. @@ -977,12 +1107,12 @@ restart: } *dst_p = dst; dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; error: dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); *dst_p = NULL; return err; } @@ -1090,6 +1220,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols = 0; + int xfrm_nr; + int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); int xerr_idx = -1; @@ -1128,22 +1262,50 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = pol; + npols ++; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + &fl, family, + XFRM_POLICY_IN); + if (pols[1]) { + pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; + npols ++; + } + } +#endif + if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; + struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl **tpp = tp; + int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; + for (pi = 0; pi < npols; pi++) { + if (pols[pi] != pol && + pols[pi]->action != XFRM_POLICY_ALLOW) + goto reject; + if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) + goto reject_error; + for (i = 0; i < pols[pi]->xfrm_nr; i++) + tpp[ti++] = &pols[pi]->xfrm_vec[i]; + } + xfrm_nr = ti; + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); + for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(tpp[i], sp, k, family); if (k < 0) { if (k < -1 && xerr_idxp) *xerr_idxp = -(2+k); @@ -1154,13 +1316,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (secpath_has_nontransport(sp, k, xerr_idxp)) goto reject; - xfrm_pol_put(pol); + xfrm_pols_put(pols, npols); return 1; } reject: xfrm_secpath_reject(xerr_idx, skb, &fl); - xfrm_pol_put(pol); +reject_error: + xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); @@ -1246,6 +1409,23 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) read_lock_bh(&xfrm_policy_lock); for (i=0; i<2*XFRM_POLICY_MAX; i++) { +#ifdef CONFIG_XFRM_SUB_POLICY + for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); + } + +#endif for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { write_lock(&pol->lock); dstp = &pol->bundles; -- cgit v1.2.3-71-gd317 From f7b6983f0feeefcd2a594138adcffe640593d8de Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:49:28 -0700 Subject: [XFRM] POLICY: Support netlink socket interface for sub policy. Sub policy can be used through netlink socket. PF_KEY uses main only and it is TODO to support sub. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 7 +++ include/net/xfrm.h | 1 + net/key/af_key.c | 18 +++++-- net/xfrm/xfrm_user.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 142 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 492fb9818747..14ecd19f4cdc 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -230,6 +230,12 @@ enum xfrm_ae_ftype_t { #define XFRM_AE_MAX (__XFRM_AE_MAX - 1) }; +struct xfrm_userpolicy_type { + __u8 type; + __u16 reserved1; + __u8 reserved2; +}; + /* Netlink message attributes. */ enum xfrm_attr_type_t { XFRMA_UNSPEC, @@ -248,6 +254,7 @@ enum xfrm_attr_type_t { XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ XFRMA_LASTUSED, + XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d341603e4ba8..c75b3287d8f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -203,6 +203,7 @@ struct km_event u32 proto; u32 byid; u32 aevent; + u32 type; } data; u32 seq; diff --git a/net/key/af_key.c b/net/key/af_key.c index 19e047b0e678..83b443ddc72f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1731,7 +1731,8 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) + if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, + (void*)&reqid) != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -2268,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); + xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, + &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); if (xp == NULL) return -ENOENT; @@ -2330,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (dir >= XFRM_POLICY_MAX) return -EINVAL; - xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, + xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, hdr->sadb_msg_type == SADB_X_SPDDELETE2); if (xp == NULL) return -ENOENT; @@ -2378,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; - return xfrm_policy_walk(dump_sp, &data); + return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); } static int key_notify_policy_flush(struct km_event *c) @@ -2405,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; - xfrm_policy_flush(); + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; @@ -2667,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) + return 0; + switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); @@ -2675,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: + if (c->data.type != XFRM_POLICY_TYPE_MAIN) + break; return key_notify_policy_flush(c); default: printk("pfkey: Unknown policy event %d\n", c->event); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7303b820bea4..c59a78d2923a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -786,6 +786,22 @@ static int verify_policy_dir(__u8 dir) return 0; } +static int verify_policy_type(__u8 type) +{ + switch (type) { + case XFRM_POLICY_TYPE_MAIN: +#ifdef CONFIG_XFRM_SUB_POLICY + case XFRM_POLICY_TYPE_SUB: +#endif + break; + + default: + return -EINVAL; + }; + + return 0; +} + static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { switch (p->share) { @@ -879,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) return 0; } +static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct xfrm_userpolicy_type *upt; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + if (rt) { + if (rt->rta_len < sizeof(*upt)) + return -EINVAL; + + upt = RTA_DATA(rt); + type = upt->type; + } + + err = verify_policy_type(type); + if (err) + return err; + + *tp = type; + return 0; +} + static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; @@ -917,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); + err = copy_from_user_policy_type(&xp->type, xfrma); + if (err) + goto error; + if (!(err = copy_from_user_tmpl(xp, xfrma))) err = copy_from_user_sec_ctx(xp, xfrma); - - if (err) { - *errp = err; - kfree(xp); - xp = NULL; - } + if (err) + goto error; return xp; + error: + *errp = err; + kfree(xp); + return NULL; } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) @@ -1037,6 +1080,29 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s return 0; } +#ifdef CONFIG_XFRM_SUB_POLICY +static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + struct xfrm_userpolicy_type upt; + + memset(&upt, 0, sizeof(upt)); + upt.type = xp->type; + + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + + return 0; + +rtattr_failure: + return -1; +} + +#else +static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + return 0; +} +#endif + static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -1060,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; out: @@ -1081,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(dump_one_policy, &info); + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); +#ifdef CONFIG_XFRM_SUB_POLICY + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); +#endif cb->args[0] = info.this_idx; return skb->len; @@ -1117,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; @@ -1124,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + err = verify_policy_dir(p->dir); if (err) return err; if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, delete); + xp = xfrm_policy_byid(type, p->dir, p->index, delete); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1146,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } if (xp == NULL) @@ -1329,9 +1405,16 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { -struct km_event c; + struct km_event c; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; - xfrm_policy_flush(); + xfrm_policy_flush(type); + c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; @@ -1344,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); struct xfrm_userpolicy_info *p = &up->pol; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, 0); + xp = xfrm_policy_byid(type, p->dir, p->index, 0); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1364,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0); security_xfrm_policy_free(&tmp); } @@ -1818,6 +1906,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, goto nlmsg_failure; if (copy_to_user_state_sec_ctx(x, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; return skb->len; @@ -1898,6 +1988,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, } copy_from_user_policy(xp, p); + xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); if (!xp->security) { @@ -1931,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; upe->hard = !!hard; nlh->nlmsg_len = skb->tail - b; @@ -2002,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -2019,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned char *b; +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_userpolicy_type upt; +#endif int len = NLMSG_LENGTH(0); skb = alloc_skb(len, GFP_ATOMIC); @@ -2028,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); + nlh->nlmsg_flags = 0; + +#ifdef CONFIG_XFRM_SUB_POLICY + memset(&upt, 0, sizeof(upt)); + upt.type = c->data.type; + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); +#endif nlh->nlmsg_len = skb->tail - b; @@ -2035,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: +#ifdef CONFIG_XFRM_SUB_POLICY +rtattr_failure: +#endif kfree_skb(skb); return -1; } -- cgit v1.2.3-71-gd317 From f034b5d4efdfe0fb9e2a1ce1d95fa7914f24de49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:08:07 -0700 Subject: [XFRM]: Dynamic xfrm_state hash table sizing. The grow algorithm is simple, we grow if: 1) we see a hash chain collision at insert, and 2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and 3) the number of xfrm_state objects is > the current hash mask All of this needs some tweaking. Remove __initdata from "hashdist" so we can use it safely at run time. Signed-off-by: David S. Miller --- include/linux/bootmem.h | 2 +- mm/page_alloc.c | 2 +- net/xfrm/xfrm_state.c | 247 ++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 197 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 1021f508d82c..e319c649e4fd 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename, #else #define HASHDIST_DEFAULT 0 #endif -extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ +extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif /* _LINUX_BOOTMEM_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54a4f5375bba..3b5358a0561f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2363,7 +2363,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, return 0; } -__initdata int hashdist = HASHDIST_DEFAULT; +int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA static int __init set_hashdist(char *str) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fe3c8c38d5e1..445263c54c94 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #include struct sock *xfrm_nl; @@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); -#define XFRM_DST_HSIZE 1024 - /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; - -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +static struct hlist_head *xfrm_state_bydst __read_mostly; +static struct hlist_head *xfrm_state_bysrc __read_mostly; +static struct hlist_head *xfrm_state_byspi __read_mostly; +static unsigned int xfrm_state_hmask __read_mostly; +static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static unsigned int xfrm_state_num; + +static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); } -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_src_hash(addr); + return __xfrm4_src_hash(addr, hmask); case AF_INET6: - return __xfrm6_src_hash(addr); + return __xfrm6_src_hash(addr, hmask); } return 0; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_src_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); case AF_INET6: - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } return 0; } -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_dst_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +static inline +unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); + return __xfrm4_spi_hash(addr, spi, proto, hmask); case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); + return __xfrm6_spi_hash(addr, spi, proto, hmask); } return 0; /*XXX*/ } +static inline unsigned int +xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); +} + +static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} + +static void xfrm_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + struct hlist_head *nsrctable, + struct hlist_head *nspitable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_state *x; + + hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + unsigned int h; + + h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + hlist_add_head(&x->bydst, ndsttable+h); + + h = __xfrm_src_hash(&x->props.saddr, x->props.family, + nhashmask); + hlist_add_head(&x->bysrc, nsrctable+h); + + h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family, nhashmask); + hlist_add_head(&x->byspi, nspitable+h); + } +} + +static unsigned long xfrm_hash_new_size(void) +{ + return ((xfrm_state_hmask + 1) << 1) * + sizeof(struct hlist_head); +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + unsigned long nsize, osize; + unsigned int nhashmask, ohashmask; + int i; + + mutex_lock(&hash_resize_mutex); + + nsize = xfrm_hash_new_size(); + ndst = xfrm_state_hash_alloc(nsize); + if (!ndst) + goto out_unlock; + nsrc = xfrm_state_hash_alloc(nsize); + if (!nsrc) { + xfrm_state_hash_free(ndst, nsize); + goto out_unlock; + } + nspi = xfrm_state_hash_alloc(nsize); + if (!nspi) { + xfrm_state_hash_free(ndst, nsize); + xfrm_state_hash_free(nsrc, nsize); + goto out_unlock; + } + + spin_lock_bh(&xfrm_state_lock); + + nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + for (i = xfrm_state_hmask; i >= 0; i--) + xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + nhashmask); + + odst = xfrm_state_bydst; + osrc = xfrm_state_bysrc; + ospi = xfrm_state_byspi; + ohashmask = xfrm_state_hmask; + + xfrm_state_bydst = ndst; + xfrm_state_bysrc = nsrc; + xfrm_state_byspi = nspi; + xfrm_state_hmask = nhashmask; + + spin_unlock_bh(&xfrm_state_lock); + + osize = (ohashmask + 1) * sizeof(struct hlist_head); + xfrm_state_hash_free(odst, osize); + xfrm_state_hash_free(osrc, osize); + xfrm_state_hash_free(ospi, osize); + +out_unlock: + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->byspi); __xfrm_state_put(x); } + xfrm_state_num--; spin_unlock(&xfrm_state_lock); if (del_timer(&x->timer)) __xfrm_state_put(x); @@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i < xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: @@ -611,7 +743,7 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); @@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x) xfrm_state_hold(x); wake_up(&km_waitq); + + xfrm_state_num++; + + if (x->bydst.next != NULL && + (xfrm_state_hmask + 1) < xfrm_state_hashmax && + xfrm_state_num > xfrm_state_hmask) + schedule_work(&xfrm_hash_work); } void xfrm_state_insert(struct xfrm_state *x) @@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) { - u32 h; + unsigned int h; struct xfrm_state *x0; if (x->id.spi) @@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), int err = 0; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; @@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), goto out; } - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; @@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { - int i; + unsigned int sz; + + sz = sizeof(struct hlist_head) * 8; + + xfrm_state_bydst = xfrm_state_hash_alloc(sz); + xfrm_state_bysrc = xfrm_state_hash_alloc(sz); + xfrm_state_byspi = xfrm_state_hash_alloc(sz); + if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) + panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); + xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); - for (i=0; i Date: Mon, 21 Aug 2006 19:22:01 +0900 Subject: [IPV6] ROUTE: Routing by FWMARK. Based on patch by Jean Lorchat . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/fib_rules.h | 2 +- include/net/flow.h | 2 ++ net/ipv6/Kconfig | 7 +++++++ net/ipv6/fib6_rules.c | 23 +++++++++++++++++++++++ net/ipv6/route.c | 1 + 5 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 19a82b6c1c1f..2987549d6044 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,7 +34,7 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, diff --git a/include/net/flow.h b/include/net/flow.h index e0522914316e..3ca210ec1379 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -26,6 +26,7 @@ struct flowi { struct { struct in6_addr daddr; struct in6_addr saddr; + __u32 fwmark; __u32 flowlabel; } ip6_u; @@ -42,6 +43,7 @@ struct flowi { #define fld_scope nl_u.dn_u.scope #define fl6_dst nl_u.ip6_u.daddr #define fl6_src nl_u.ip6_u.saddr +#define fl6_fwmark nl_u.ip6_u.fwmark #define fl6_flowlabel nl_u.ip6_u.flowlabel #define fl4_dst nl_u.ip4_u.daddr #define fl4_src nl_u.ip4_u.saddr diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 21e0cc808f44..a2d211da2aba 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -173,3 +173,10 @@ config IPV6_MULTIPLE_TABLES ---help--- Support multiple routing tables. +config IPV6_ROUTE_FWMARK + bool "IPv6: use netfilter MARK value as routing key" + depends on IPV6_MULTIPLE_TABLES && NETFILTER + ---help--- + If you say Y here, you will be able to specify different routes for + packets with different mark values (see iptables(8), MARK target). + diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 91f6233d8efd..aebd9e2b85a8 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -26,6 +26,9 @@ struct fib6_rule struct fib_rule common; struct rt6key src; struct rt6key dst; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + u8 fwmark; +#endif u8 tclass; }; @@ -124,6 +127,11 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) return 0; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (r->fwmark && (r->fwmark != fl->fl6_fwmark)) + return 0; +#endif + return 1; } @@ -164,6 +172,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, nla_memcpy(&rule6->dst.addr, tb[FRA_DST], sizeof(struct in6_addr)); +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif + rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; @@ -195,6 +208,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; +#endif + return 1; } @@ -216,6 +234,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), &rule6->src.addr); +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (rule6->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); +#endif + return 0; nla_put_failure: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 20691285aee5..649350bd9299 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -703,6 +703,7 @@ void ip6_route_input(struct sk_buff *skb) .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, + .fwmark = skb->nfmark, .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, }, }, -- cgit v1.2.3-71-gd317 From 1aaec67f9335a17856dfacdd3e5cc6f4c18faeec Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sun, 25 Jun 2006 23:54:55 +0900 Subject: [NET]: Add common helper functions to convert IPv6/IPv4 address string to network address structure. These helpers can be used in netfilter, cifs etc. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/inet.h | 2 + net/core/utils.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet.h b/include/linux/inet.h index 6c5587af118d..b7c6da7d6d32 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -46,5 +46,7 @@ #include extern __be32 in_aton(const char *str); +extern int in4_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); +extern int in6_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); #endif #endif /* _LINUX_INET_H */ diff --git a/net/core/utils.c b/net/core/utils.c index e31c90e05594..5a06e8a72c17 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -4,6 +4,7 @@ * Authors: * net_random Alan Cox * net_ratelimit Andy Kleen + * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov * @@ -191,3 +192,217 @@ __be32 in_aton(const char *str) } EXPORT_SYMBOL(in_aton); + +#define IN6PTON_XDIGIT 0x00010000 +#define IN6PTON_DIGIT 0x00020000 +#define IN6PTON_COLON_MASK 0x00700000 +#define IN6PTON_COLON_1 0x00100000 /* single : requested */ +#define IN6PTON_COLON_2 0x00200000 /* second : requested */ +#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ +#define IN6PTON_DOT 0x00800000 /* . */ +#define IN6PTON_DELIM 0x10000000 +#define IN6PTON_NULL 0x20000000 /* first/tail */ +#define IN6PTON_UNKNOWN 0x40000000 + +static inline int digit2bin(char c, char delim) +{ + if (c == delim || c == '\0') + return IN6PTON_DELIM; + if (c == '.') + return IN6PTON_DOT; + if (c >= '0' && c <= '9') + return (IN6PTON_DIGIT | (c - '0')); + return IN6PTON_UNKNOWN; +} + +static inline int xdigit2bin(char c, char delim) +{ + if (c == delim || c == '\0') + return IN6PTON_DELIM; + if (c == ':') + return IN6PTON_COLON_MASK; + if (c == '.') + return IN6PTON_DOT; + if (c >= '0' && c <= '9') + return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); + if (c >= 'a' && c <= 'f') + return (IN6PTON_XDIGIT | (c - 'a' + 10)); + if (c >= 'A' && c <= 'F') + return (IN6PTON_XDIGIT | (c - 'A' + 10)); + return IN6PTON_UNKNOWN; +} + +int in4_pton(const char *src, int srclen, + u8 *dst, + char delim, const char **end) +{ + const char *s; + u8 *d; + u8 dbuf[4]; + int ret = 0; + int i; + int w = 0; + + if (srclen < 0) + srclen = strlen(src); + s = src; + d = dbuf; + i = 0; + while(1) { + int c; + c = xdigit2bin(srclen > 0 ? *s : '\0', delim); + if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) { + goto out; + } + if (c & (IN6PTON_DOT | IN6PTON_DELIM)) { + if (w == 0) + goto out; + *d++ = w & 0xff; + w = 0; + i++; + if (c & IN6PTON_DELIM) { + if (i != 4) + goto out; + break; + } + goto cont; + } + w = (w * 10) + c; + if ((w & 0xffff) > 255) { + goto out; + } +cont: + if (i >= 4) + goto out; + s++; + srclen--; + } + ret = 1; + memcpy(dst, dbuf, sizeof(dbuf)); +out: + if (end) + *end = s; + return ret; +} + +EXPORT_SYMBOL(in4_pton); + +int in6_pton(const char *src, int srclen, + u8 *dst, + char delim, const char **end) +{ + const char *s, *tok = NULL; + u8 *d, *dc = NULL; + u8 dbuf[16]; + int ret = 0; + int i; + int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; + int w = 0; + + memset(dbuf, 0, sizeof(dbuf)); + + s = src; + d = dbuf; + if (srclen < 0) + srclen = strlen(src); + + printf("srclen=%d\n", srclen); + + while (1) { + int c; + + c = xdigit2bin(srclen > 0 ? *s : '\0', delim); + if (!(c & state)) + goto out; + if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { + /* process one 16-bit word */ + if (!(state & IN6PTON_NULL)) { + *d++ = (w >> 8) & 0xff; + *d++ = w & 0xff; + } + w = 0; + if (c & IN6PTON_DELIM) { + /* We've processed last word */ + break; + } + /* + * COLON_1 => XDIGIT + * COLON_2 => XDIGIT|DELIM + * COLON_1_2 => COLON_2 + */ + switch (state & IN6PTON_COLON_MASK) { + case IN6PTON_COLON_2: + dc = d; + state = IN6PTON_XDIGIT | IN6PTON_DELIM; + if (dc - dbuf >= sizeof(dbuf)) + state |= IN6PTON_NULL; + break; + case IN6PTON_COLON_1|IN6PTON_COLON_1_2: + state = IN6PTON_XDIGIT | IN6PTON_COLON_2; + break; + case IN6PTON_COLON_1: + state = IN6PTON_XDIGIT; + break; + case IN6PTON_COLON_1_2: + state = IN6PTON_COLON_2; + break; + default: + state = 0; + } + tok = s + 1; + goto cont; + } + + if (c & IN6PTON_DOT) { + ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); + if (ret > 0) { + d += 4; + break; + } + goto out; + } + + w = (w << 4) | (0xff & c); + state = IN6PTON_COLON_1 | IN6PTON_DELIM; + if (!(w & 0xf000)) { + state |= IN6PTON_XDIGIT; + } + if (!dc && d + 2 < dbuf + sizeof(dbuf)) { + state |= IN6PTON_COLON_1_2; + state &= ~IN6PTON_DELIM; + } + if (d + 2 >= dbuf + sizeof(dbuf)) { + state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); + } +cont: + if ((dc && d + 4 < dbuf + sizeof(dbuf)) || + d + 4 == dbuf + sizeof(dbuf)) { + state |= IN6PTON_DOT; + } + if (d >= dbuf + sizeof(dbuf)) { + state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); + } + s++; + srclen--; + } + + i = 15; d--; + + if (dc) { + while(d >= dc) + dst[i--] = *d--; + while(i >= dc - dbuf) + dst[i--] = 0; + while(i >= 0) + dst[i--] = *d--; + } else + memcpy(dst, dbuf, sizeof(dbuf)); + + ret = 1; +out: + if (end) + *end = s; + return ret; +} + +EXPORT_SYMBOL(in6_pton); -- cgit v1.2.3-71-gd317 From bbfb39cbf63829d1db607aa90cbdca557a3a131d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:10:14 -0700 Subject: [IPV4]: Add support for fwmark masks in routing rules Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of 0xFFFFFFFF is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 3 ++- net/ipv4/fib_rules.c | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 2987549d6044..4418c8d9d479 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,12 +34,13 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ + FRA_FWMARK, /* netfilter mark */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, FRA_UNUSED8, FRA_TABLE, /* Extended table id */ + FRA_FWMASK, /* mask for netfilter mark */ __FRA_MAX }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ce185ac6f260..280f424ca9c9 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -46,6 +46,7 @@ struct fib4_rule u32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif #ifdef CONFIG_NET_CLS_ROUTE u32 tclassid; @@ -160,7 +161,7 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r->fwmark && (r->fwmark != fl->fl4_fwmark)) + if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) return 0; #endif @@ -183,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_SRC] = { .type = NLA_U32 }, [FRA_DST] = { .type = NLA_U32 }, [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -219,8 +221,17 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dst = nla_get_u32(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule4->fwmark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule4->fwmask = 0xFFFFFFFF; + } + + if (tb[FRA_FWMASK]) + rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif #ifdef CONFIG_NET_CLS_ROUTE @@ -256,6 +267,9 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, #ifdef CONFIG_IP_ROUTE_FWMARK if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif #ifdef CONFIG_NET_CLS_ROUTE @@ -285,6 +299,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, #ifdef CONFIG_IP_ROUTE_FWMARK if (rule4->fwmark) NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); + + if (rule4->fwmask || rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); #endif if (rule4->dst_len) -- cgit v1.2.3-71-gd317 From b4e9b520ca5d07a37ea59648e7f50f478e7487a3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:11:42 -0700 Subject: [NET_SCHED]: Add mask support to fwmark classifier Support masking the nfmark value before the search. The mask value is global for all filters contained in one instance. It can only be set when a new instance is created, all filters must specify the same mask. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 1 + net/sched/cls_fw.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index bd2c5a2bbbf5..c3f01b3085a4 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -305,6 +305,7 @@ enum TCA_FW_POLICE, TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, __TCA_FW_MAX }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index e6973d9b686d..e54acc6bcccd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -50,6 +50,7 @@ struct fw_head { struct fw_filter *ht[HTSIZE]; + u32 mask; }; struct fw_filter @@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_filter *f; int r; #ifdef CONFIG_NETFILTER - u32 id = skb->nfmark; + u32 id = skb->nfmark & head->mask; #else u32 id = 0; #endif @@ -209,7 +210,9 @@ static int fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, struct rtattr **tb, struct rtattr **tca, unsigned long base) { + struct fw_head *head = (struct fw_head *)tp->root; struct tcf_exts e; + u32 mask; int err; err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); @@ -232,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + goto errout; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + if (mask != head->mask) + goto errout; + } else if (head->mask != 0xFFFFFFFF) + goto errout; + tcf_exts_change(tp, &f->exts, &e); return 0; @@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, return -EINVAL; if (head == NULL) { + u32 mask = 0xFFFFFFFF; + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + return -EINVAL; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + } + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; + head->mask = mask; tcf_tree_lock(tp); tp->root = head; @@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { + struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter*)fh; unsigned char *b = skb->tail; struct rtattr *rta; @@ -351,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, if (strlen(f->indev)) RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); #endif /* CONFIG_NET_CLS_IND */ + if (head->mask != 0xFFFFFFFF) + RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; -- cgit v1.2.3-71-gd317 From 97e5848dd39e7e76bd6077735ebb5473763ab9c5 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:16:45 -0700 Subject: [DCCP]: Introduce tx buffering This adds transmit buffering to DCCP. I have tested with CCID2/3 and with loss and rate limiting. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 ++ net/dccp/dccp.h | 2 +- net/dccp/output.c | 90 +++++++++++++++++++++++++++++++++++++--------------- net/dccp/proto.c | 16 +++------- 4 files changed, 73 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 676333b9fad0..2d7671c92c0b 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -438,6 +438,7 @@ struct dccp_ackvec; * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackvec - rx half connection ack vector + * @dccps_xmit_timer - timer for when CCID is not ready to send */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ @@ -470,6 +471,7 @@ struct dccp_sock { enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; + struct timer_list dccps_xmit_timer; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a5c5475724c0..0a21be437ed3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -130,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); +extern void dccp_write_xmit(struct sock *sk, int block); extern void dccp_write_space(struct sock *sk); extern void dccp_init_xmit_timers(struct sock *sk); diff --git a/net/dccp/output.c b/net/dccp/output.c index 58669beee132..7102e3aed4ca 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -198,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + if (sk->sk_err) goto do_error; if (!*timeo) goto do_nonblock; @@ -234,37 +234,72 @@ do_interrupted: goto out; } -int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) +static void dccp_write_xmit_timer(unsigned long data) { + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); + else + dccp_write_xmit(sk, 0); + bh_unlock_sock(sk); + sock_put(sk); +} + +void dccp_write_xmit(struct sock *sk, int block) { - const struct dccp_sock *dp = dccp_sk(sk); - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + long timeo = 30000; /* If a packet is taking longer than 2 secs + we have other issues */ + + while ((skb = skb_peek(&sk->sk_write_queue))) { + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, skb->len); - if (err > 0) - err = dccp_wait_for_ccid(sk, skb, timeo); + if (err > 0) { + if (!block) { + sk_reset_timer(sk, &dp->dccps_xmit_timer, + msecs_to_jiffies(err)+jiffies); + break; + } else + err = dccp_wait_for_ccid(sk, skb, &timeo); + if (err) { + printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" + " %d\n", __FUNCTION__, err); + dump_stack(); + } + } - if (err == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int len = skb->len; + skb_dequeue(&sk->sk_write_queue); + if (err == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; - if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - - err = dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - } else - kfree_skb(skb); - - return err; + dcb->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + if (err) { + printk(KERN_CRIT "%s:err from " + "ccid_hc_tx_packet_sent %d\n", + __FUNCTION__, err); + dump_stack(); + } + } else + kfree(skb); + } } int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) @@ -426,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk) dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); icsk->icsk_retransmits = 0; + init_timer(&dp->dccps_xmit_timer); + dp->dccps_xmit_timer.data = (unsigned long)sk; + dp->dccps_xmit_timer.function = dccp_write_xmit_timer; } int dccp_connect(struct sock *sk) @@ -560,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; if (active) { + dccp_write_xmit(sk, 1); dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, prio)); + /* FIXME do we need a retransmit timer here? */ } else dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6f14bb5a28d4..962df0ea31aa 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -662,17 +662,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - rc = dccp_write_xmit(sk, skb, &timeo); - /* - * XXX we don't use sk_write_queue, so just discard the packet. - * Current plan however is to _use_ sk_write_queue with - * an algorith similar to tcp_sendmsg, where the main difference - * is that in DCCP we have to respect packet boundaries, so - * no coalescing of skbs. - * - * This bug was _quickly_ found & fixed by just looking at an OSTRA - * generated callgraph 8) -acme - */ + skb_queue_tail(&sk->sk_write_queue, skb); + dccp_write_xmit(sk,0); out_release: release_sock(sk); return rc ? : len; @@ -846,6 +837,7 @@ static int dccp_close_state(struct sock *sk) void dccp_close(struct sock *sk, long timeout) { + struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; int state; @@ -862,6 +854,8 @@ void dccp_close(struct sock *sk, long timeout) goto adjudge_to_death; } + sk_stop_timer(sk, &dp->dccps_xmit_timer); + /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the -- cgit v1.2.3-71-gd317 From def42ff4dd6f54ebcf78192579a8ff1f81d8e2e8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 28 Aug 2006 23:57:56 -0700 Subject: [IPV4]: Make struct in_addr::s_addr __be32 There will be relatively small increase in sparse endian warnings, but this (and sin_port) patch is a first step to make networking code endian clean. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 94f557fa4636..9a9d5dd32e73 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -52,7 +52,7 @@ enum { /* Internet address. */ struct in_addr { - __u32 s_addr; + __be32 s_addr; }; #define IP_TOS 1 -- cgit v1.2.3-71-gd317 From cd360007a0eb8cbf17c006cca42aa884d33f96be Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 28 Aug 2006 23:58:32 -0700 Subject: [IPV4]: Make struct sockaddr_in::sin_port __be16 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 9a9d5dd32e73..bcaca8399aed 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -177,7 +177,7 @@ struct in_pktinfo #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { sa_family_t sin_family; /* Address family */ - unsigned short int sin_port; /* Port number */ + __be16 sin_port; /* Port number */ struct in_addr sin_addr; /* Internet address */ /* Pad to size of `struct sockaddr'. */ -- cgit v1.2.3-71-gd317 From 07317621d004e8e6967f2dac8562825267e56135 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:48:17 -0700 Subject: [NETFILTER] bridge: code rearrangement for clarity Cleanup and rearrangement for better style and clarity: Split the function nf_bridge_maybe_copy_header into two pieces Move copy portion out of line. Use Ethernet header size macros. Use header file to handle CONFIG_NETFILTER_BRIDGE differences Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 26 +++++++------------------- net/bridge/br_forward.c | 5 +---- net/bridge/br_netfilter.c | 27 +++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 427c67ff89e9..274fe4b33155 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -47,26 +47,12 @@ enum nf_br_hook_priorities { /* Only used in br_forward.c */ -static inline -int nf_bridge_maybe_copy_header(struct sk_buff *skb) +extern int nf_bridge_copy_header(struct sk_buff *skb); +static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) { - int err; - - if (skb->nf_bridge) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - err = skb_cow(skb, 18); - if (err) - return err; - memcpy(skb->data - 18, skb->nf_bridge->data, 18); - skb_push(skb, 4); - } else { - err = skb_cow(skb, 16); - if (err) - return err; - memcpy(skb->data - 16, skb->nf_bridge->data, 16); - } - } - return 0; + if (skb->nf_bridge) + return nf_bridge_copy_header(skb); + return 0; } /* This is called by the IP fragmenting code and it ensures there is @@ -90,6 +76,8 @@ struct bridge_skb_cb { }; extern int brnf_deferred_hooks; +#else +#define nf_bridge_maybe_copy_header(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 864fbbc7b24d..191b861e5e53 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -38,13 +38,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { -#ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ if (nf_bridge_maybe_copy_header(skb)) kfree_skb(skb); - else -#endif - { + else { skb_push(skb, ETH_HLEN); dev_queue_xmit(skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 05b3de888243..b498efcfe451 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -127,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) static inline void nf_bridge_save_header(struct sk_buff *skb) { - int header_size = 16; + int header_size = ETH_HLEN; if (skb->protocol == htons(ETH_P_8021Q)) - header_size = 18; + header_size += VLAN_HLEN; memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); } +/* + * When forwarding bridge frames, we save a copy of the original + * header before processing. + */ +int nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + int header_size = ETH_HLEN; + + if (skb->protocol == htons(ETH_P_8021Q)) + header_size += VLAN_HLEN; + + err = skb_cow(skb, header_size); + if (err) + return err; + + memcpy(skb->data - header_size, skb->nf_bridge->data, header_size); + + if (skb->protocol == htons(ETH_P_8021Q)) + __skb_push(skb, VLAN_HLEN); + return 0; +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ -- cgit v1.2.3-71-gd317 From 9bcfcaf5e9cc887eb39236e43bdbe4b4b2572229 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:48:57 -0700 Subject: [NETFILTER] bridge: simplify nf_bridge_pad Do some simple optimization on the nf_bridge_pad() function and don't use magic constants. Eliminate a double call and the #ifdef'd code for CONFIG_BRIDGE_NETFILTER. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 16 +++++----------- net/ipv4/ip_output.c | 15 +++++++-------- 2 files changed, 12 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 274fe4b33155..9a4dd11af86e 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -5,9 +5,8 @@ */ #include -#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER) #include -#endif +#include /* Bridge Hooks */ /* After promisc drops, checksum checks. */ @@ -57,16 +56,10 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ -static inline -int nf_bridge_pad(struct sk_buff *skb) +static inline int nf_bridge_pad(const struct sk_buff *skb) { - if (skb->protocol == __constant_htons(ETH_P_IP)) - return 0; - if (skb->nf_bridge) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) - return 4; - } - return 0; + return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q)) + ? VLAN_HLEN : 0; } struct bridge_skb_cb { @@ -78,6 +71,7 @@ struct bridge_skb_cb { extern int brnf_deferred_hooks; #else #define nf_bridge_maybe_copy_header(skb) (0) +#define nf_bridge_pad(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 81b2795a4c20..97aee76fb746 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -426,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs; + unsigned int mtu, hlen, left, len, ll_rs, pad; int offset; __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; @@ -556,14 +556,13 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = raw + hlen; /* Where to start from */ -#ifdef CONFIG_BRIDGE_NETFILTER /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); - mtu -= nf_bridge_pad(skb); -#else - ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); -#endif + * we need to make room for the encapsulating header + */ + pad = nf_bridge_pad(skb); + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); + mtu -= pad; + /* * Fragment the datagram. */ -- cgit v1.2.3-71-gd317 From fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Thu, 31 Aug 2006 15:28:39 -0700 Subject: [NET]: Fix sk->sk_filter field access Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg needlock = 0, while socket is not locked at that moment. In order to avoid this and similar issues in the future, use rcu for sk->sk_filter field read protection. Signed-off-by: Dmitry Mishin Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev --- include/linux/filter.h | 13 +++++++------ include/net/sock.h | 34 +++++++++++++++++----------------- net/core/filter.c | 8 ++++---- net/core/sock.c | 22 +++++++++------------- net/dccp/ipv6.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/packet/af_packet.c | 43 ++++++++++++++++++------------------------- net/sctp/input.c | 2 +- 10 files changed, 61 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c6cb8f095088..91b2e3b9251e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -25,10 +25,10 @@ struct sock_filter /* Filter block */ { - __u16 code; /* Actual filter code */ - __u8 jt; /* Jump true */ - __u8 jf; /* Jump false */ - __u32 k; /* Generic multiuse field */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ }; struct sock_fprog /* Required for SO_ATTACH_FILTER. */ @@ -41,8 +41,9 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct sock_filter insns[0]; + unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; + struct sock_filter insns[0]; }; static inline unsigned int sk_filter_len(struct sk_filter *fp) diff --git a/include/net/sock.h b/include/net/sock.h index 337ebec84c70..edd4d73ce7f5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,30 +862,24 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); * */ -static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { int err; + struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; - if (sk->sk_filter) { - struct sk_filter *filter; - - if (needlock) - bh_lock_sock(sk); - - filter = sk->sk_filter; - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - - if (needlock) - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = sk->sk_filter; + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } + rcu_read_unlock_bh(); + return err; } @@ -897,6 +891,12 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) * Remove a filter from a socket and release its resources. */ +static inline void sk_filter_rcu_free(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + kfree(fp); +} + static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); @@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) atomic_sub(size, &sk->sk_omem_alloc); if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_rcu_free); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index 5b4486a60cf6..6732782a5a40 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (!err) { struct sk_filter *old_fp; - spin_lock_bh(&sk->sk_lock.slock); - old_fp = sk->sk_filter; - sk->sk_filter = fp; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); fp = old_fp; } diff --git a/net/core/sock.c b/net/core/sock.c index cfaf09039b02..b77e155cbe6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; } - /* It would be deadlock, if sock_queue_rcv_skb is used - with socket lock! We assume that users of this - function are lock free. - */ - err = sk_filter(sk, skb, 1); + err = sk_filter(sk, skb); if (err) goto out; @@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) { int rc = NET_RX_SUCCESS; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; @@ -606,15 +602,15 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - spin_lock_bh(&sk->sk_lock.slock); - filter = sk->sk_filter; + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); if (filter) { - sk->sk_filter = NULL; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_release(sk, filter); + rcu_read_unlock_bh(); break; } - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_unlock_bh(); ret = -ENONET; break; @@ -884,10 +880,10 @@ void sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = sk->sk_filter; + filter = rcu_dereference(sk->sk_filter); if (filter) { sk_filter_release(sk, filter); - sk->sk_filter = NULL; + rcu_assign_pointer(sk->sk_filter, NULL); } sock_disable_timestamp(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f9c5e12d7038..7a47399cf31f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -970,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 86f7f3b28e70..72ecc6e62ec4 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig goto out; } - err = sk_filter(sk, skb, 0); + err = sk_filter(sk, skb); if (err) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23b46e36b147..39b179856082 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1104,7 +1104,7 @@ process: goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b18918f3011..2546fc9f0a78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1075,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* @@ -1232,7 +1232,7 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 300215bdbf46..f4ccb90e6739 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -427,21 +427,24 @@ out_unlock: } #endif -static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) +static inline int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned *snaplen) { struct sk_filter *filter; + int err = 0; - bh_lock_sock(sk); - filter = sk->sk_filter; - /* - * Our caller already checked that filter != NULL but we need to - * verify that under bh_lock_sock() to be safe - */ - if (likely(filter != NULL)) - res = sk_run_filter(skb, filter->insns, filter->len); - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) { + err = sk_run_filter(skb, filter->insns, filter->len); + if (!err) + err = -EPERM; + else if (*snaplen > err) + *snaplen = err; + } + rcu_read_unlock_bh(); - return res; + return err; } /* @@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -593,13 +591,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; diff --git a/net/sctp/input.c b/net/sctp/input.c index 8a34d95602ce..03f65de75d88 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset(skb); - if (sk_filter(sk, skb, 1)) + if (sk_filter(sk, skb)) goto discard_release; /* Create an SCTP packet structure. */ -- cgit v1.2.3-71-gd317 From eb328111efde7bca782f340fe805756039ec6a0c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:01:59 -0700 Subject: [GENL]: Provide more information to userspace about registered genl families Additionaly exports the following information when providing the list of registered generic netlink families: - protocol version - header size - maximum number of attributes - list of available operations including - id - flags - avaiability of policy and doit/dumpit function libnl HEAD provides a utility to read this new information: 0x0010 nlctrl version 1 hdrsize 0 maxattr 6 op GETFAMILY (0x03) [POLICY,DOIT,DUMPIT] 0x0011 NLBL_MGMT version 1 hdrsize 0 maxattr 0 op unknown (0x02) [DOIT] op unknown (0x03) [DOIT] .... Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/genetlink.h | 18 ++++++++++++++++++ include/net/genetlink.h | 2 -- net/netlink/genetlink.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 84f12a41dc01..9049dc65ae51 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -16,6 +16,8 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) +#define GENL_ADMIN_PERM 0x01 + /* * List of reserved static generic netlink identifiers: */ @@ -43,9 +45,25 @@ enum { CTRL_ATTR_UNSPEC, CTRL_ATTR_FAMILY_ID, CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, __CTRL_ATTR_MAX, }; #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP_DOIT, + CTRL_ATTR_OP_DUMPIT, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + #endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 97d6d3aba9d2..4a38d85e4e25 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -27,8 +27,6 @@ struct genl_family struct list_head family_list; /* private */ }; -#define GENL_ADMIN_PERM 0x01 - /** * struct genl_info - receiving information * @snd_seq: sending sequence number diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 3ac942cdb677..49bc2db7982b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -387,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len) static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { + struct nlattr *nla_ops; + struct genl_ops *ops; void *hdr; + int idx = 1; hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, family->version); @@ -396,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); + NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); + NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); + NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); + + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) + goto nla_put_failure; + + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + + if (ops->policy) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); + + if (ops->doit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); + + if (ops->dumpit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); + + nla_nest_end(skb, nest); + } + + nla_nest_end(skb, nla_ops); return genlmsg_end(skb, hdr); @@ -411,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + if (chains_to_skip != 0) + genl_lock(); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -428,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: + if (chains_to_skip != 0) + genl_unlock(); + cb->args[0] = i; cb->args[1] = n; -- cgit v1.2.3-71-gd317 From 1bf38a36b6a0e810dafae048fdbb999e587f0f2f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:57:09 -0700 Subject: [NETFILTER]: remove unused include file Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_logging.h | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 include/linux/netfilter_logging.h (limited to 'include/linux') diff --git a/include/linux/netfilter_logging.h b/include/linux/netfilter_logging.h deleted file mode 100644 index 562bb6aad4e1..000000000000 --- a/include/linux/netfilter_logging.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Internal logging interface, which relies on the real - LOG target modules */ -#ifndef __LINUX_NETFILTER_LOGGING_H -#define __LINUX_NETFILTER_LOGGING_H - -#ifdef __KERNEL__ -#include - -struct nf_logging_t { - void (*nf_log_packet)(struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const char *prefix); - void (*nf_log)(char *pfh, size_t len, - const char *prefix); -}; - -extern void nf_log_register(int pf, const struct nf_logging_t *logging); -extern void nf_log_unregister(int pf, const struct nf_logging_t *logging); - -extern void nf_log_packet(int pf, - struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const char *fmt, ...); -extern void nf_log(int pf, - char *pfh, size_t len, - const char *fmt, ...); -#endif /*__KERNEL__*/ - -#endif /*__LINUX_NETFILTER_LOGGING_H*/ -- cgit v1.2.3-71-gd317 From df0933dcb027e156cb5253570ad694b81bd52b69 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:57:53 -0700 Subject: [NETFILTER]: kill listhelp.h Kill listhelp.h and use the list.h functions instead. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 - include/linux/netfilter_ipv4/listhelp.h | 123 ----------------- net/bridge/netfilter/ebtables.c | 76 ++++++----- net/ipv4/netfilter/arp_tables.c | 2 - net/ipv4/netfilter/ip_conntrack_core.c | 189 ++++++++++++--------------- net/ipv4/netfilter/ip_conntrack_proto_gre.c | 24 ++-- net/ipv4/netfilter/ip_conntrack_standalone.c | 1 - net/ipv4/netfilter/ip_nat_core.c | 4 - net/ipv4/netfilter/ip_nat_helper.c | 4 - net/ipv4/netfilter/ip_nat_rule.c | 4 - net/ipv4/netfilter/ip_nat_standalone.c | 4 - net/ipv6/netfilter/ip6_tables.c | 3 - net/netfilter/nf_conntrack_core.c | 185 ++++++++++++-------------- net/netfilter/nf_conntrack_standalone.c | 1 - net/netfilter/x_tables.c | 17 ++- 15 files changed, 237 insertions(+), 404 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/listhelp.h (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 03d1027fb0e8..c832295dbf61 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -138,10 +138,6 @@ struct xt_counters_info #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include - #ifdef CONFIG_COMPAT #define COMPAT_TO_USER 1 #define COMPAT_FROM_USER -1 diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h deleted file mode 100644 index 5d92cf044d91..000000000000 --- a/include/linux/netfilter_ipv4/listhelp.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef _LISTHELP_H -#define _LISTHELP_H -#include - -/* Header to do more comprehensive job than linux/list.h; assume list - is first entry in structure. */ - -/* Return pointer to first true entry, if any, or NULL. A macro - required to allow inlining of cmpfn. */ -#define LIST_FIND(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_READ_LOCK(head); \ - list_for_each(__i, (head)) \ - if (cmpfn((const type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -#define LIST_FIND_W(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_WRITE_LOCK(head); \ - list_for_each(__i, (head)) \ - if (cmpfn((type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -/* Just like LIST_FIND but we search backwards */ -#define LIST_FIND_B(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_READ_LOCK(head); \ - list_for_each_prev(__i, (head)) \ - if (cmpfn((const type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -static inline int -__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } - -/* Is this entry in the list? */ -static inline int -list_inlist(struct list_head *head, const void *entry) -{ - return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL; -} - -/* Delete from list. */ -#ifdef CONFIG_NETFILTER_DEBUG -#define LIST_DELETE(head, oldentry) \ -do { \ - ASSERT_WRITE_LOCK(head); \ - if (!list_inlist(head, oldentry)) \ - printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ - __FILE__, __LINE__, #oldentry, oldentry, #head); \ - else list_del((struct list_head *)oldentry); \ -} while(0) -#else -#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) -#endif - -/* Append. */ -static inline void -list_append(struct list_head *head, void *new) -{ - ASSERT_WRITE_LOCK(head); - list_add((new), (head)->prev); -} - -/* Prepend. */ -static inline void -list_prepend(struct list_head *head, void *new) -{ - ASSERT_WRITE_LOCK(head); - list_add(new, head); -} - -/* Insert according to ordering function; insert before first true. */ -#define LIST_INSERT(head, new, cmpfn) \ -do { \ - struct list_head *__i; \ - ASSERT_WRITE_LOCK(head); \ - list_for_each(__i, (head)) \ - if ((new), (typeof (new))__i) \ - break; \ - list_add((struct list_head *)(new), __i->prev); \ -} while(0) - -/* If the field after the list_head is a nul-terminated string, you - can use these functions. */ -static inline int __list_cmp_name(const void *i, const char *name) -{ - return strcmp(name, i+sizeof(struct list_head)) == 0; -} - -/* Returns false if same name already in list, otherwise does insert. */ -static inline int -list_named_insert(struct list_head *head, void *new) -{ - if (LIST_FIND(head, __list_cmp_name, void *, - new + sizeof(struct list_head))) - return 0; - list_prepend(head, new); - return 1; -} - -/* Find this named element in the list. */ -#define list_named_find(head, name) \ -LIST_FIND(head, __list_cmp_name, void *, name) - -#endif /*_LISTHELP_H*/ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index d06a5075b5f6..3df55b2bd91d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -31,12 +32,6 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -/* list_named_find */ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include -#include - #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ @@ -278,18 +273,22 @@ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, struct mutex *mutex) { - void *ret; + struct { + struct list_head list; + char name[EBT_FUNCTION_MAXNAMELEN]; + } *e; *error = mutex_lock_interruptible(mutex); if (*error != 0) return NULL; - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - mutex_unlock(mutex); + list_for_each_entry(e, head, list) { + if (strcmp(e->name, name) == 0) + return e; } - return ret; + *error = -ENOENT; + mutex_unlock(mutex); + return NULL; } #ifndef CONFIG_KMOD @@ -1043,15 +1042,19 @@ free_newinfo: int ebt_register_target(struct ebt_target *target) { + struct ebt_target *t; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_targets, target)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(t, &ebt_targets, list) { + if (strcmp(t->name, target->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&target->list, &ebt_targets); mutex_unlock(&ebt_mutex); return 0; @@ -1060,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target) void ebt_unregister_target(struct ebt_target *target) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_targets, target); + list_del(&target->list); mutex_unlock(&ebt_mutex); } int ebt_register_match(struct ebt_match *match) { + struct ebt_match *m; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_matches, match)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(m, &ebt_matches, list) { + if (strcmp(m->name, match->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&match->list, &ebt_matches); mutex_unlock(&ebt_mutex); return 0; @@ -1083,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match) void ebt_unregister_match(struct ebt_match *match) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_matches, match); + list_del(&match->list); mutex_unlock(&ebt_mutex); } int ebt_register_watcher(struct ebt_watcher *watcher) { + struct ebt_watcher *w; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_watchers, watcher)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(w, &ebt_watchers, list) { + if (strcmp(w->name, watcher->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&watcher->list, &ebt_watchers); mutex_unlock(&ebt_mutex); return 0; @@ -1106,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher) void ebt_unregister_watcher(struct ebt_watcher *watcher) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_watchers, watcher); + list_del(&watcher->list); mutex_unlock(&ebt_mutex); } int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; + struct ebt_table *t; int ret, i, countersize; if (!table || !table->table ||!table->table->entries || @@ -1158,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table) if (ret != 0) goto free_chainstack; - if (list_named_find(&ebt_tables, table->name)) { - ret = -EEXIST; - BUGPRINT("Table name already exists\n"); - goto free_unlock; + list_for_each_entry(t, &ebt_tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + BUGPRINT("Table name already exists\n"); + goto free_unlock; + } } /* Hold a reference count if the chains aren't empty */ @@ -1169,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_prepend(&ebt_tables, table); + list_add(&table->list, &ebt_tables); mutex_unlock(&ebt_mutex); return 0; free_unlock: @@ -1195,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table) return; } mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_tables, table); + list_del(&table->list); mutex_unlock(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { @@ -1465,7 +1479,7 @@ static int __init ebtables_init(void) int ret; mutex_lock(&ebt_mutex); - list_named_insert(&ebt_targets, &ebt_standard_target); + list_add(&ebt_standard_target.list, &ebt_targets); mutex_unlock(&ebt_mutex); if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4f10b06413a1..aaeaa9ce0f28 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -56,8 +56,6 @@ do { \ #define ARP_NF_ASSERT(x) #endif -#include - static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, char *hdr_addr, int len) { diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 5da25ad50309..2568d480e9a9 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -47,7 +47,6 @@ #include #include #include -#include #define IP_CONNTRACK_VERSION "2.4" @@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) static void clean_from_lists(struct ip_conntrack *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&ip_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ ip_ct_remove_expectations(ct); @@ -367,16 +361,6 @@ static void death_by_timeout(unsigned long ul_conntrack) ip_conntrack_put(ct); } -static inline int -conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - ASSERT_READ_LOCK(&ip_conntrack_lock); - return tuplehash_to_ctrack(i) != ignored_conntrack - && ip_ct_tuple_equal(tuple, &i->tuple); -} - struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) @@ -386,7 +370,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (tuplehash_to_ctrack(h) != ignored_conntrack && + ip_ct_tuple_equal(tuple, &h->tuple)) { CONNTRACK_STAT_INC(found); return h; } @@ -417,10 +402,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &ip_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &ip_conntrack_hash[repl_hash]); } void ip_conntrack_hash_insert(struct ip_conntrack *ct) @@ -440,6 +425,7 @@ int __ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -470,43 +456,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &ip_conntrack_hash[hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __ip_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + CONNTRACK_STAT_INC(insert); + write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + return NF_ACCEPT; +out: CONNTRACK_STAT_INC(insert_failed); write_unlock_bh(&ip_conntrack_lock); - return NF_DROP; } @@ -527,23 +513,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; + struct ip_conntrack *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { - ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&ip_conntrack_lock); @@ -559,18 +543,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) -{ - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct ip_conntrack_helper * __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct ip_conntrack_helper * @@ -1062,7 +1044,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&ip_conntrack_lock); return 0; @@ -1081,24 +1063,24 @@ __ip_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static inline void unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } - return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -1108,10 +1090,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); - for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < ip_conntrack_htable_size; i++) { + list_for_each_entry(h, &ip_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1237,46 +1221,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nf_conntrack_get(nskb->nfct); } -static inline int -do_iter(const struct ip_conntrack_tuple_hash *i, - int (*iter)(struct ip_conntrack *i, void *data), - void *data) -{ - return iter(tuplehash_to_ctrack(i), data); -} - /* Bring out ya dead! */ -static struct ip_conntrack_tuple_hash * +static struct ip_conntrack * get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + } + list_for_each_entry(h, &unconfirmed, list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); + return NULL; - return h; +found: + atomic_inc(&ct->ct_general.use); + write_unlock_bh(&ip_conntrack_lock); + return ct; } void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) { - struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct ip_conntrack *ct = tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 4ee016c427b4..92c6d8b178c9 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) -#include #include #include #include @@ -82,10 +81,12 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) __be16 key = 0; read_lock_bh(&ip_ct_gre_lock); - km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (km) - key = km->tuple.src.u.gre.key; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t)) { + key = km->tuple.src.u.gre.key; + break; + } + } read_unlock_bh(&ip_ct_gre_lock); DEBUGP("lookup src key 0x%x up key for ", key); @@ -99,7 +100,7 @@ int ip_ct_gre_keymap_add(struct ip_conntrack *ct, struct ip_conntrack_tuple *t, int reply) { - struct ip_ct_gre_keymap **exist_km, *km, *old; + struct ip_ct_gre_keymap **exist_km, *km; if (!ct->helper || strcmp(ct->helper->name, "pptp")) { DEBUGP("refusing to add GRE keymap to non-pptp session\n"); @@ -113,13 +114,10 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, if (*exist_km) { /* check whether it's a retransmission */ - old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (old == *exist_km) { - DEBUGP("retransmission\n"); - return 0; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *exist_km) + return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; @@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, DUMP_TUPLE_GRE(&km->tuple); write_lock_bh(&ip_ct_gre_lock); - list_append(&gre_keymap_list, km); + list_add_tail(&km->list, &gre_keymap_list); write_unlock_bh(&ip_ct_gre_lock); return 0; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 3f5d495b853b..02135756562e 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -35,7 +35,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 4c540d03d48e..71f3e09cbc84 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -22,9 +22,6 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include @@ -33,7 +30,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 021c3daae3ed..7f6a75984f6c 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -27,16 +27,12 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index e59f5a8ecb6b..7b703839aa58 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -19,14 +19,10 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f3b778355432..9c577db62047 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -30,9 +30,6 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include @@ -40,7 +37,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d1c315364ee7..73d477ce216b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -70,9 +70,6 @@ do { \ #define IP_NF_ASSERT(x) #endif - -#include - #if 0 /* All the better to debug you with... */ #define static diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3b64dbee6620..927137b8b3b5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -57,7 +57,6 @@ #include #include #include -#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -539,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct) static void clean_from_lists(struct nf_conn *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&nf_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -617,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_put(ct); } -static inline int -conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) -{ - ASSERT_READ_LOCK(&nf_conntrack_lock); - return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack - && nf_ct_tuple_equal(tuple, &i->tuple); -} - struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) @@ -636,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, ASSERT_READ_LOCK(&nf_conntrack_lock); list_for_each_entry(h, &nf_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); return h; } @@ -667,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int repl_hash) { ct->id = ++nf_conntrack_next_id; - list_prepend(&nf_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&nf_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &nf_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -690,7 +675,9 @@ int __nf_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + struct nf_conn_help *help; enum ip_conntrack_info ctinfo; ct = nf_ct_get(*pskb, &ctinfo); @@ -720,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&nf_conntrack_hash[hash], - conntrack_tuple_cmp, - struct nf_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&nf_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct nf_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - struct nf_conn_help *help; - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &nf_conntrack_hash[hash], list) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __nf_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); - write_unlock_bh(&nf_conntrack_lock); - help = nfct_help(ct); - if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __nf_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + NF_CT_STAT_INC(insert); + write_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); + if (help && help->helper) + nf_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + nf_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; +out: NF_CT_STAT_INC(insert_failed); write_unlock_bh(&nf_conntrack_lock); return NF_DROP; @@ -777,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct nf_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, - &nf_ct_tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct = NULL; + struct nf_conn *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); - if (h) { - ct = nf_ct_tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = nf_ct_tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&nf_conntrack_lock); @@ -810,18 +794,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct nf_conntrack_helper *i, - const struct nf_conntrack_tuple *rtuple) -{ - return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct nf_conntrack_helper *, - tuple); + struct nf_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct nf_conntrack_helper * @@ -1323,7 +1305,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) return ret; } write_lock_bh(&nf_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&nf_conntrack_lock); return 0; @@ -1342,8 +1324,8 @@ __nf_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) +static inline void unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); @@ -1352,17 +1334,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, nf_conntrack_event(IPCT_HELPER, ct); help->helper = NULL; } - return 0; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { unsigned int i; + struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { @@ -1374,10 +1356,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); - for (i = 0; i < nf_conntrack_htable_size; i++) - LIST_FIND_W(&nf_conntrack_hash[i], unhelp, - struct nf_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < nf_conntrack_htable_size; i++) { + list_for_each_entry(h, &nf_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&nf_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1510,37 +1494,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i, } /* Bring out ya dead! */ -static struct nf_conntrack_tuple_hash * +static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { - struct nf_conntrack_tuple_hash *h = NULL; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; write_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, - struct nf_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct nf_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + list_for_each_entry(h, &unconfirmed, list) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + return NULL; +found: + atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&nf_conntrack_lock); - - return h; + return ct; } void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) { - struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9a1de0ca475b..5954f6773810 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -37,7 +37,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8037ba63d587..be7baf4f6846 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -81,7 +81,7 @@ xt_unregister_target(struct xt_target *target) int af = target->family; mutex_lock(&xt[af].mutex); - LIST_DELETE(&xt[af].target, target); + list_del(&target->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_target); @@ -138,7 +138,7 @@ xt_unregister_match(struct xt_match *match) int af = match->family; mutex_lock(&xt[af].mutex); - LIST_DELETE(&xt[af].match, match); + list_del(&match->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_match); @@ -575,15 +575,18 @@ int xt_register_table(struct xt_table *table, { int ret; struct xt_table_info *private; + struct xt_table *t; ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) return ret; /* Don't autoload: we'd eat our tail... */ - if (list_named_find(&xt[table->af].tables, table->name)) { - ret = -EEXIST; - goto unlock; + list_for_each_entry(t, &xt[table->af].tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + goto unlock; + } } /* Simplifies replace_table code. */ @@ -598,7 +601,7 @@ int xt_register_table(struct xt_table *table, /* save number of initial entries */ private->initial_entries = private->number; - list_prepend(&xt[table->af].tables, table); + list_add(&table->list, &xt[table->af].tables); ret = 0; unlock: @@ -613,7 +616,7 @@ void *xt_unregister_table(struct xt_table *table) mutex_lock(&xt[table->af].mutex); private = table->private; - LIST_DELETE(&xt[table->af].tables, table); + list_del(&table->list); mutex_unlock(&xt[table->af].mutex); return private; -- cgit v1.2.3-71-gd317 From 9123de2c043996050bacf77031cad845f5976f5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:59:42 -0700 Subject: [NETFILTER]: ip6table_mangle: reroute when nfmark changes in NF_IP6_LOCAL_OUT Now that IPv6 supports policy routing we need to reroute in NF_IP6_LOCAL_OUT when the mark value changes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 1 + include/net/ip6_route.h | 2 -- net/ipv6/netfilter/ip6table_mangle.c | 8 ++------ 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 52a7b9e76428..d97e268cdfe5 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -73,6 +73,7 @@ enum nf_ip6_hook_priorities { }; #ifdef CONFIG_NETFILTER +extern int ip6_route_me_harder(struct sk_buff *skb); extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 297909570041..6ca6b71dfe0f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -57,8 +57,6 @@ extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl); -extern int ip6_route_me_harder(struct sk_buff *skb); - extern void ip6_route_init(void); extern void ip6_route_cleanup(void); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 32db04fd8310..386ea260e767 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -180,12 +180,8 @@ ip6t_local_hook(unsigned int hook, && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) || (*pskb)->nfmark != nfmark - || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { - - /* something which could affect routing has changed */ - - DEBUGP("ip6table_mangle: we'd need to re-route a packet\n"); - } + || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) + return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; return ret; } -- cgit v1.2.3-71-gd317 From c1fe3ca5106d9568791433fa6c7f27e71ac69e1b Mon Sep 17 00:00:00 2001 From: George Hansper Date: Wed, 20 Sep 2006 12:03:23 -0700 Subject: [NETFILTER]: TCP conntrack: improve dead connection detection Don't count window updates as retransmissions. Signed-off-by: George Hansper Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 1 + net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 +++- net/netfilter/nf_conntrack_proto_tcp.c | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index b2feeffde384..6b01ba297727 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -49,6 +49,7 @@ struct ip_ct_tcp u_int32_t last_seq; /* Last sequence number seen in dir */ u_int32_t last_ack; /* Last sequence number seen in opposite dir */ u_int32_t last_end; /* Last seq + len */ + u_int16_t last_win; /* Last window advertisement seen in dir */ }; #endif /* __KERNEL__ */ diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 75a7237eb8c1..03ae9a04cb37 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9fc0ee61f92a..238bbb5b72ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -688,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } -- cgit v1.2.3-71-gd317 From 9fa492cdc160cd27ce1046cb36f47d3b2b1efa21 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:05:37 -0700 Subject: [NETFILTER]: x_tables: simplify compat API Split the xt_compat_match/xt_compat_target into smaller type-safe functions performing just one operation. Handle all alignment and size-related conversions centrally in these function instead of requiring each module to implement a full-blown conversion function. Replace ->compat callback by ->compat_from_user and ->compat_to_user callbacks, responsible for converting just a single private structure. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 29 +++--- net/ipv4/netfilter/ip_tables.c | 115 +++++----------------- net/netfilter/x_tables.c | 192 +++++++++++++++++++++---------------- 3 files changed, 151 insertions(+), 185 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c832295dbf61..739a98eebe2c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -138,12 +138,6 @@ struct xt_counters_info #include -#ifdef CONFIG_COMPAT -#define COMPAT_TO_USER 1 -#define COMPAT_FROM_USER -1 -#define COMPAT_CALC_SIZE 0 -#endif - struct xt_match { struct list_head list; @@ -176,7 +170,8 @@ struct xt_match void (*destroy)(const struct xt_match *match, void *matchinfo); /* Called when userspace align differs from kernel space one */ - int (*compat)(void *match, void **dstptr, int *size, int convert); + void (*compat_from_user)(void *dst, void *src); + int (*compat_to_user)(void __user *dst, void *src); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -186,6 +181,7 @@ struct xt_match char *table; unsigned int matchsize; + unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -224,13 +220,15 @@ struct xt_target void (*destroy)(const struct xt_target *target, void *targinfo); /* Called when userspace align differs from kernel space one */ - int (*compat)(void *target, void **dstptr, int *size, int convert); + void (*compat_from_user)(void *dst, void *src); + int (*compat_to_user)(void __user *dst, void *src); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; char *table; unsigned int targetsize; + unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -387,9 +385,18 @@ struct compat_xt_counters_info extern void xt_compat_lock(int af); extern void xt_compat_unlock(int af); -extern int xt_compat_match(void *match, void **dstptr, int *size, int convert); -extern int xt_compat_target(void *target, void **dstptr, int *size, - int convert); + +extern int xt_compat_match_offset(struct xt_match *match); +extern void xt_compat_match_from_user(struct xt_entry_match *m, + void **dstptr, int *size); +extern int xt_compat_match_to_user(struct xt_entry_match *m, + void * __user *dstptr, int *size); + +extern int xt_compat_target_offset(struct xt_target *target); +extern void xt_compat_target_from_user(struct xt_entry_target *t, + void **dstptr, int *size); +extern int xt_compat_target_to_user(struct xt_entry_target *t, + void * __user *dstptr, int *size); #endif /* CONFIG_COMPAT */ #endif /* __KERNEL__ */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 673581db986e..800067d69a9a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -942,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset) return delta; } -struct compat_ipt_standard_target +static void compat_standard_from_user(void *dst, void *src) { - struct compat_xt_entry_target target; - compat_int_t verdict; -}; - -struct compat_ipt_standard -{ - struct compat_ipt_entry entry; - struct compat_ipt_standard_target target; -}; + int v = *(compat_int_t *)src; -#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) -#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) -#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) + if (v > 0) + v += compat_calc_jump(v); + memcpy(dst, &v, sizeof(v)); +} -static int compat_ipt_standard_fn(void *target, - void **dstptr, int *size, int convert) +static int compat_standard_to_user(void __user *dst, void *src) { - struct compat_ipt_standard_target compat_st, *pcompat_st; - struct ipt_standard_target st, *pst; - int ret; + compat_int_t cv = *(int *)src; - ret = 0; - switch (convert) { - case COMPAT_TO_USER: - pst = target; - memcpy(&compat_st.target, &pst->target, - sizeof(compat_st.target)); - compat_st.verdict = pst->verdict; - if (compat_st.verdict > 0) - compat_st.verdict -= - compat_calc_jump(compat_st.verdict); - compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; - if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) - ret = -EFAULT; - *size -= IPT_ST_OFFSET; - *dstptr += IPT_ST_COMPAT_LEN; - break; - case COMPAT_FROM_USER: - pcompat_st = target; - memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); - st.verdict = pcompat_st->verdict; - if (st.verdict > 0) - st.verdict += compat_calc_jump(st.verdict); - st.target.u.user.target_size = IPT_ST_LEN; - memcpy(*dstptr, &st, IPT_ST_LEN); - *size += IPT_ST_OFFSET; - *dstptr += IPT_ST_LEN; - break; - case COMPAT_CALC_SIZE: - *size += IPT_ST_OFFSET; - break; - default: - ret = -ENOPROTOOPT; - break; - } - return ret; + if (cv > 0) + cv -= compat_calc_jump(cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static inline int compat_calc_match(struct ipt_entry_match *m, int * size) { - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(m->u.kernel.match); return 0; } @@ -1023,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, entry_offset = (void *)e - base; IPT_MATCH_ITERATE(e, compat_calc_match, &off); t = ipt_get_target(e); - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1412,17 +1364,13 @@ struct compat_ipt_replace { }; static inline int compat_copy_match_to_user(struct ipt_entry_match *m, - void __user **dstptr, compat_uint_t *size) + void * __user *dstptr, compat_uint_t *size) { - if (m->u.kernel.match->compat) - return m->u.kernel.match->compat(m, dstptr, size, - COMPAT_TO_USER); - else - return xt_compat_match(m, dstptr, size, COMPAT_TO_USER); + return xt_compat_match_to_user(m, dstptr, size); } static int compat_copy_entry_to_user(struct ipt_entry *e, - void __user **dstptr, compat_uint_t *size) + void * __user *dstptr, compat_uint_t *size) { struct ipt_entry_target __user *t; struct compat_ipt_entry __user *ce; @@ -1442,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e, if (ret) goto out; t = ipt_get_target(e); - if (t->u.kernel.target->compat) - ret = t->u.kernel.target->compat(t, dstptr, size, - COMPAT_TO_USER); - else - ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER); + ret = xt_compat_target_to_user(t, dstptr, size); if (ret) goto out; ret = -EFAULT; @@ -1478,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m, return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(match); (*i)++; return 0; @@ -1543,10 +1483,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, } t->u.kernel.target = target; - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(target); *size += off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1584,10 +1521,7 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, dm = (struct ipt_entry_match *)*dstptr; match = m->u.kernel.match; - if (match->compat) - match->compat(m, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_match(m, dstptr, size, COMPAT_FROM_USER); + xt_compat_match_from_user(m, dstptr, size); ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), name, hookmask, ip->proto, @@ -1635,10 +1569,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; - if (target->compat) - target->compat(t, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_target(t, dstptr, size, COMPAT_FROM_USER); + xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_IP_NUMHOOKS; h++) { @@ -2205,7 +2136,9 @@ static struct ipt_target ipt_standard_target = { .targetsize = sizeof(int), .family = AF_INET, #ifdef CONFIG_COMPAT - .compat = &compat_ipt_standard_fn, + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, #endif }; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index be7baf4f6846..58522fc65d33 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -333,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_match(void *match, void **dstptr, int *size, int convert) +int xt_compat_match_offset(struct xt_match *match) { - struct xt_match *m; - struct compat_xt_entry_match *pcompat_m; - struct xt_entry_match *pm; - u_int16_t msize; - int off, ret; + u_int16_t csize = match->compatsize ? : match->matchsize; + return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); +} +EXPORT_SYMBOL_GPL(xt_compat_match_offset); - ret = 0; - m = ((struct xt_entry_match *)match)->u.kernel.match; - off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize); - switch (convert) { - case COMPAT_TO_USER: - pm = (struct xt_entry_match *)match; - msize = pm->u.user.match_size; - if (copy_to_user(*dstptr, pm, msize)) { - ret = -EFAULT; - break; - } - msize -= off; - if (put_user(msize, (u_int16_t *)*dstptr)) - ret = -EFAULT; - *size -= off; - *dstptr += msize; - break; - case COMPAT_FROM_USER: - pcompat_m = (struct compat_xt_entry_match *)match; - pm = (struct xt_entry_match *)*dstptr; - msize = pcompat_m->u.user.match_size; - memcpy(pm, pcompat_m, msize); - msize += off; - pm->u.user.match_size = msize; - *size += off; - *dstptr += msize; - break; - case COMPAT_CALC_SIZE: - *size += off; - break; - default: - ret = -ENOPROTOOPT; - break; +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + int *size) +{ + struct xt_match *match = m->u.kernel.match; + struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; + int pad, off = xt_compat_match_offset(match); + u_int16_t msize = cm->u.user.match_size; + + m = *dstptr; + memcpy(m, cm, sizeof(*cm)); + if (match->compat_from_user) + match->compat_from_user(m->data, cm->data); + else + memcpy(m->data, cm->data, msize - sizeof(*cm)); + pad = XT_ALIGN(match->matchsize) - match->matchsize; + if (pad > 0) + memset(m->data + match->matchsize, 0, pad); + + msize += off; + m->u.user.match_size = msize; + + *size += off; + *dstptr += msize; +} +EXPORT_SYMBOL_GPL(xt_compat_match_from_user); + +int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, + int *size) +{ + struct xt_match *match = m->u.kernel.match; + struct compat_xt_entry_match __user *cm = *dstptr; + int off = xt_compat_match_offset(match); + u_int16_t msize = m->u.user.match_size - off; + + if (copy_to_user(cm, m, sizeof(*cm)) || + put_user(msize, &cm->u.user.match_size)) + return -EFAULT; + + if (match->compat_to_user) { + if (match->compat_to_user((void __user *)cm->data, m->data)) + return -EFAULT; + } else { + if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) + return -EFAULT; } - return ret; + + *size -= off; + *dstptr += msize; + return 0; } -EXPORT_SYMBOL_GPL(xt_compat_match); -#endif +EXPORT_SYMBOL_GPL(xt_compat_match_to_user); +#endif /* CONFIG_COMPAT */ int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook_mask, @@ -410,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_target); #ifdef CONFIG_COMPAT -int xt_compat_target(void *target, void **dstptr, int *size, int convert) +int xt_compat_target_offset(struct xt_target *target) { - struct xt_target *t; - struct compat_xt_entry_target *pcompat; - struct xt_entry_target *pt; - u_int16_t tsize; - int off, ret; + u_int16_t csize = target->compatsize ? : target->targetsize; + return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); +} +EXPORT_SYMBOL_GPL(xt_compat_target_offset); - ret = 0; - t = ((struct xt_entry_target *)target)->u.kernel.target; - off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize); - switch (convert) { - case COMPAT_TO_USER: - pt = (struct xt_entry_target *)target; - tsize = pt->u.user.target_size; - if (copy_to_user(*dstptr, pt, tsize)) { - ret = -EFAULT; - break; - } - tsize -= off; - if (put_user(tsize, (u_int16_t *)*dstptr)) - ret = -EFAULT; - *size -= off; - *dstptr += tsize; - break; - case COMPAT_FROM_USER: - pcompat = (struct compat_xt_entry_target *)target; - pt = (struct xt_entry_target *)*dstptr; - tsize = pcompat->u.user.target_size; - memcpy(pt, pcompat, tsize); - tsize += off; - pt->u.user.target_size = tsize; - *size += off; - *dstptr += tsize; - break; - case COMPAT_CALC_SIZE: - *size += off; - break; - default: - ret = -ENOPROTOOPT; - break; +void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, + int *size) +{ + struct xt_target *target = t->u.kernel.target; + struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; + int pad, off = xt_compat_target_offset(target); + u_int16_t tsize = ct->u.user.target_size; + + t = *dstptr; + memcpy(t, ct, sizeof(*ct)); + if (target->compat_from_user) + target->compat_from_user(t->data, ct->data); + else + memcpy(t->data, ct->data, tsize - sizeof(*ct)); + pad = XT_ALIGN(target->targetsize) - target->targetsize; + if (pad > 0) + memset(t->data + target->targetsize, 0, pad); + + tsize += off; + t->u.user.target_size = tsize; + + *size += off; + *dstptr += tsize; +} +EXPORT_SYMBOL_GPL(xt_compat_target_from_user); + +int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, + int *size) +{ + struct xt_target *target = t->u.kernel.target; + struct compat_xt_entry_target __user *ct = *dstptr; + int off = xt_compat_target_offset(target); + u_int16_t tsize = t->u.user.target_size - off; + + if (copy_to_user(ct, t, sizeof(*ct)) || + put_user(tsize, &ct->u.user.target_size)) + return -EFAULT; + + if (target->compat_to_user) { + if (target->compat_to_user((void __user *)ct->data, t->data)) + return -EFAULT; + } else { + if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) + return -EFAULT; } - return ret; + + *size -= off; + *dstptr += tsize; + return 0; } -EXPORT_SYMBOL_GPL(xt_compat_target); +EXPORT_SYMBOL_GPL(xt_compat_target_to_user); #endif struct xt_table_info *xt_alloc_table_info(unsigned int size) -- cgit v1.2.3-71-gd317 From edd5a329cf69c112882e03c8ab55e985062a5d2a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:07:39 -0700 Subject: [NETFILTER]: PPTP conntrack: fix whitespace errors Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 26 +++---- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 76 ++++++++++---------- net/ipv4/netfilter/ip_conntrack_proto_gre.c | 28 ++++---- net/ipv4/netfilter/ip_nat_helper_pptp.c | 92 ++++++++++++------------ net/ipv4/netfilter/ip_nat_proto_gre.c | 20 +++--- 5 files changed, 121 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 816144c75de0..88f66d3c8765 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -285,19 +285,19 @@ struct PptpSetLinkInfo { }; union pptp_ctrl_union { - struct PptpStartSessionRequest sreq; - struct PptpStartSessionReply srep; - struct PptpStopSessionRequest streq; - struct PptpStopSessionReply strep; - struct PptpOutCallRequest ocreq; - struct PptpOutCallReply ocack; - struct PptpInCallRequest icreq; - struct PptpInCallReply icack; - struct PptpInCallConnected iccon; - struct PptpClearCallRequest clrreq; - struct PptpCallDisconnectNotify disc; - struct PptpWanErrorNotify wanerr; - struct PptpSetLinkInfo setlink; + struct PptpStartSessionRequest sreq; + struct PptpStartSessionReply srep; + struct PptpStopSessionRequest streq; + struct PptpStopSessionReply strep; + struct PptpOutCallRequest ocreq; + struct PptpOutCallReply ocack; + struct PptpInCallRequest icreq; + struct PptpInCallReply icack; + struct PptpInCallConnected iccon; + struct PptpClearCallRequest clrreq; + struct PptpCallDisconnectNotify disc; + struct PptpWanErrorNotify wanerr; + struct PptpSetLinkInfo setlink; }; extern int diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index b020a33e65e9..6c94dd5d476c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -20,11 +20,11 @@ * - We can only support one single call within each session * * TODO: - * - testing of incoming PPTP calls + * - testing of incoming PPTP calls * - * Changes: + * Changes: * 2002-02-05 - Version 1.3 - * - Call ip_conntrack_unexpect_related() from + * - Call ip_conntrack_unexpect_related() from * pptp_destroy_siblings() to destroy expectations in case * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen * (Philip Craig ) @@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, invert_tuplepr(&inv_t, &exp->tuple); DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&inv_t); - + exp_other = ip_conntrack_expect_find(&inv_t); if (exp_other) { /* delete other expectation. */ @@ -194,7 +194,7 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) { struct ip_conntrack_tuple t; - /* Since ct->sibling_list has literally rusted away in 2.6.11, + /* Since ct->sibling_list has literally rusted away in 2.6.11, * we now need another way to find out about our sibling * contrack and expects... -HW */ @@ -264,7 +264,7 @@ exp_gre(struct ip_conntrack *master, exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; - + exp_orig->master = master; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; @@ -322,7 +322,7 @@ out_unexpect_orig: goto out_put_both; } -static inline int +static inline int pptp_inbound_pkt(struct sk_buff **pskb, struct tcphdr *tcph, unsigned int nexthdr_off, @@ -336,7 +336,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 *cid, *pcid; - u_int32_t seq; + u_int32_t seq; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) { @@ -373,7 +373,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, } if (pptpReq->srep.resultCode == PPTP_START_OK) info->sstate = PPTP_SESSION_CONFIRMED; - else + else info->sstate = PPTP_SESSION_ERROR; break; @@ -420,22 +420,22 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = &pptpReq->ocack.peersCallID; info->pac_call_id = ntohs(*cid); - + if (htons(info->pns_call_id) != *pcid) { DEBUGP("%s for unknown callid %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; } - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], ntohs(*cid), ntohs(*pcid)); - + info->cstate = PPTP_CALL_OUT_CONF; seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader) + ((void *)pcid - (void *)pptpReq); - + if (exp_gre(ct, seq, *cid, *pcid) != 0) printk("ip_conntrack_pptp: error during exp_gre\n"); break; @@ -479,7 +479,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, cid = &info->pac_call_id; if (info->pns_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown CallID %u\n", + DEBUGP("%s for unknown CallID %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; } @@ -491,7 +491,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader) + ((void *)pcid - (void *)pptpReq); - + if (exp_gre(ct, seq, *cid, *pcid) != 0) printk("ip_conntrack_pptp: error during exp_gre\n"); @@ -554,7 +554,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; nexthdr_off += sizeof(_ctlh); datalen -= sizeof(_ctlh); - + reqlen = datalen; if (reqlen > sizeof(*pptpReq)) reqlen = sizeof(*pptpReq); @@ -606,7 +606,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* client answers incoming call */ if (info->cstate != PPTP_CALL_IN_REQ && info->cstate != PPTP_CALL_IN_REP) { - DEBUGP("%s without incall_req\n", + DEBUGP("%s without incall_req\n", pptp_msg_name[msg]); break; } @@ -616,7 +616,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, } pcid = &pptpReq->icack.peersCallID; if (info->pac_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown call %u\n", + DEBUGP("%s for unknown call %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; } @@ -644,12 +644,12 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0], msg); /* unknown: no need to create GRE masq table entry */ break; } - + if (ip_nat_pptp_hook_outbound) return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, pptpReq); @@ -659,7 +659,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* track caller id inside control connection, call expect_related */ -static int +static int conntrack_pptp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) @@ -676,12 +676,12 @@ conntrack_pptp_help(struct sk_buff **pskb, int ret; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { DEBUGP("ctinfo = %u, skipping\n", ctinfo); return NF_ACCEPT; } - + nexthdr_off = (*pskb)->nh.iph->ihl*4; tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); @@ -735,28 +735,28 @@ conntrack_pptp_help(struct sk_buff **pskb, } /* control protocol helper */ -static struct ip_conntrack_helper pptp = { +static struct ip_conntrack_helper pptp = { .list = { NULL, NULL }, - .name = "pptp", + .name = "pptp", .me = THIS_MODULE, .max_expected = 2, .timeout = 5 * 60, - .tuple = { .src = { .ip = 0, - .u = { .tcp = { .port = - __constant_htons(PPTP_CONTROL_PORT) } } - }, - .dst = { .ip = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, .protonum = IPPROTO_TCP - } + } }, - .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = __constant_htons(0xffff) } } - }, - .dst = { .ip = 0, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, - .protonum = 0xff - } + .protonum = 0xff + } }, .help = conntrack_pptp_help }; @@ -768,7 +768,7 @@ extern int __init ip_ct_proto_gre_init(void); static int __init ip_conntrack_helper_pptp_init(void) { int retcode; - + retcode = ip_ct_proto_gre_init(); if (retcode < 0) return retcode; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 92c6d8b178c9..5fe026f467d3 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -1,15 +1,15 @@ /* - * ip_conntrack_proto_gre.c - Version 3.0 + * ip_conntrack_proto_gre.c - Version 3.0 * * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -61,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); #define DEBUGP(x, args...) #define DUMP_TUPLE_GRE(x) #endif - + /* GRE KEYMAP HANDLING FUNCTIONS */ static LIST_HEAD(gre_keymap_list); @@ -88,7 +88,7 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) } } read_unlock_bh(&ip_ct_gre_lock); - + DEBUGP("lookup src key 0x%x up key for ", key); DUMP_TUPLE_GRE(t); @@ -107,7 +107,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, return -1; } - if (!reply) + if (!reply) exist_km = &ct->help.ct_pptp_info.keymap_orig; else exist_km = &ct->help.ct_pptp_info.keymap_reply; @@ -118,7 +118,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, if (gre_key_cmpfn(km, t) && km == *exist_km) return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", + DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; } @@ -152,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) write_lock_bh(&ip_ct_gre_lock); if (ct->help.ct_pptp_info.keymap_orig) { - DEBUGP("removing %p from list\n", + DEBUGP("removing %p from list\n", ct->help.ct_pptp_info.keymap_orig); list_del(&ct->help.ct_pptp_info.keymap_orig->list); kfree(ct->help.ct_pptp_info.keymap_orig); @@ -220,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, static int gre_print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple) { - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); } @@ -250,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct, } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); - + return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static int gre_new(struct ip_conntrack *ct, const struct sk_buff *skb) -{ +{ DEBUGP(": "); DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); @@ -283,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct) } /* protocol helper struct */ -static struct ip_conntrack_protocol gre = { +static struct ip_conntrack_protocol gre = { .proto = IPPROTO_GRE, - .name = "gre", + .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, @@ -323,7 +323,7 @@ void ip_ct_proto_gre_fini(void) } write_unlock_bh(&ip_ct_gre_lock); - ip_conntrack_protocol_unregister(&gre); + ip_conntrack_protocol_unregister(&gre); } EXPORT_SYMBOL(ip_ct_gre_keymap_add); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 1d149964dc38..5dde1da1c300 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -32,7 +32,7 @@ * 2005-06-10 - Version 3.0 * - kernel >= 2.6.11 version, * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * + * */ #include @@ -93,10 +93,10 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = + t.src.u.gre.key = htons(master->nat.help.nat_pptp_info.pns_call_id); t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = + t.dst.u.gre.key = htons(master->nat.help.nat_pptp_info.pac_call_id); t.dst.protonum = IPPROTO_GRE; } @@ -153,47 +153,47 @@ pptp_outbound_pkt(struct sk_buff **pskb, unsigned int cid_off; new_callid = htons(ct_pptp_info->pns_call_id); - + switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); - /* FIXME: ideally we would want to reserve a call ID - * here. current netfilter NAT core is not able to do - * this :( For now we use TCP source port. This breaks - * multiple calls within one control session */ - - /* save original call ID in nat_info */ - nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - - /* don't use tcph->source since we are at a DSTmanip - * hook (e.g. PREROUTING) and pkt is not mangled yet */ - new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; - - /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); - break; - case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icreq.callID); - break; - case PPTP_CALL_CLEAR_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); - break; - default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ - - case PPTP_SET_LINK_INFO: - /* only need to NAT in case PAC is behind NAT box */ - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icreq.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; } /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass @@ -216,9 +216,9 @@ static int pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { - struct ip_ct_pptp_master *ct_pptp_info = + struct ip_ct_pptp_master *ct_pptp_info = &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = + struct ip_nat_pptp *nat_pptp_info = &expect_orig->master->nat.help.nat_pptp_info; struct ip_conntrack *ct = expect_orig->master; @@ -324,7 +324,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0]); /* fall through */ diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 70a65372225a..a5226691f02c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -6,10 +6,10 @@ * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -60,7 +60,7 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, } /* generate unique tuple ... */ -static int +static int gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range, enum ip_nat_manip_type maniptype, @@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + DEBUGP("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb, greh = (void *)(*pskb)->data + hdroff; pgreh = (struct gre_hdr_pptp *) greh; - /* we only have destination manip of a packet, since 'source key' + /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype == IP_NAT_MANIP_DST) { /* key manipulation is always dest */ @@ -129,7 +129,7 @@ gre_manip_pkt(struct sk_buff **pskb, } if (greh->csum) { /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = + *(gre_csum(greh)) = nf_proto_csum_update(*pskb, ~*(gre_key(greh)), tuple->dst.u.gre.key, @@ -138,7 +138,7 @@ gre_manip_pkt(struct sk_buff **pskb, *(gre_key(greh)) = tuple->dst.u.gre.key; break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", + DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; @@ -152,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb, } /* nat helper struct */ -static struct ip_nat_protocol gre = { - .name = "GRE", +static struct ip_nat_protocol gre = { + .name = "GRE", .protonum = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, .in_range = gre_in_range, @@ -164,7 +164,7 @@ static struct ip_nat_protocol gre = { .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; - + int __init ip_nat_proto_gre_init(void) { return ip_nat_protocol_register(&gre); -- cgit v1.2.3-71-gd317 From 955b944293dd4c931ec866ebe19a6b2463b8f9a0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:03 -0700 Subject: [NETFILTER]: PPTP conntrack: get rid of unnecessary byte order conversions The conntrack structure contains the call ID in host byte order for no reason, get rid of back and forth conversions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 8 ++--- .../linux/netfilter_ipv4/ip_conntrack_proto_gre.h | 22 ++++++------ include/linux/netfilter_ipv4/ip_nat_pptp.h | 4 +-- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 22 ++++++------ net/ipv4/netfilter/ip_nat_helper_pptp.c | 42 +++++++++++----------- net/ipv4/netfilter/ip_nat_proto_gre.c | 2 +- 6 files changed, 50 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 88f66d3c8765..0d35623f9453 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -31,8 +31,8 @@ struct ip_ct_pptp_master { /* everything below is going to be per-expectation in newnat, * since there could be more than one call within one session */ enum pptp_ctrlcall_state cstate; /* call state */ - u_int16_t pac_call_id; /* call id of PAC, host byte order */ - u_int16_t pns_call_id; /* call id of PNS, host byte order */ + __be16 pac_call_id; /* call id of PAC, host byte order */ + __be16 pns_call_id; /* call id of PNS, host byte order */ /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack * and therefore imposes a fixed limit on the number of maps */ @@ -42,8 +42,8 @@ struct ip_ct_pptp_master { /* conntrack_expect private member */ struct ip_ct_pptp_expect { enum pptp_ctrlcall_state cstate; /* call state */ - u_int16_t pac_call_id; /* call id of PAC */ - u_int16_t pns_call_id; /* call id of PNS */ + __be16 pac_call_id; /* call id of PAC */ + __be16 pns_call_id; /* call id of PNS */ }; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h index 8d090ef82f5f..1d853aa873eb 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h @@ -49,18 +49,18 @@ struct gre_hdr { #else #error "Adjust your defines" #endif - __u16 protocol; + __be16 protocol; }; /* modified GRE header for PPTP */ struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __u16 payload_len; /* size of ppp payload, not inc. gre header */ - __u16 call_id; /* peer's call_id for this session */ - __u32 seq; /* sequence number. Present if S==1 */ - __u32 ack; /* seq number of highest packet recieved by */ + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __be16 payload_len; /* size of ppp payload, not inc. gre header */ + __be16 call_id; /* peer's call_id for this session */ + __be32 seq; /* sequence number. Present if S==1 */ + __be32 ack; /* seq number of highest packet recieved by */ /* sender in this session */ }; @@ -92,13 +92,13 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct); /* get pointer to gre key, if present */ -static inline u_int32_t *gre_key(struct gre_hdr *greh) +static inline __be32 *gre_key(struct gre_hdr *greh) { if (!greh->key) return NULL; if (greh->csum || greh->routing) - return (u_int32_t *) (greh+sizeof(*greh)+4); - return (u_int32_t *) (greh+sizeof(*greh)); + return (__be32 *) (greh+sizeof(*greh)+4); + return (__be32 *) (greh+sizeof(*greh)); } /* get pointer ot gre csum, if present */ diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h index eaf66c2e8f93..36668bf0f373 100644 --- a/include/linux/netfilter_ipv4/ip_nat_pptp.h +++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h @@ -4,8 +4,8 @@ /* conntrack private data */ struct ip_nat_pptp { - u_int16_t pns_call_id; /* NAT'ed PNS call id */ - u_int16_t pac_call_id; /* NAT'ed PAC call id */ + __be16 pns_call_id; /* NAT'ed PNS call id */ + __be16 pac_call_id; /* NAT'ed PAC call id */ }; #endif /* _NAT_PPTP_H */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 6c94dd5d476c..57637ca2b82c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -201,8 +201,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout original pns->pac ct/exp\n"); @@ -210,8 +210,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout reply pac->pns ct/exp\n"); @@ -419,9 +419,9 @@ pptp_inbound_pkt(struct sk_buff **pskb, cid = &pptpReq->ocack.callID; pcid = &pptpReq->ocack.peersCallID; - info->pac_call_id = ntohs(*cid); + info->pac_call_id = *cid; - if (htons(info->pns_call_id) != *pcid) { + if (info->pns_call_id != *pcid) { DEBUGP("%s for unknown callid %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; @@ -454,7 +454,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = &pptpReq->icack.peersCallID; DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = ntohs(*pcid); + info->pac_call_id = *pcid; break; case PPTP_IN_CALL_CONNECT: @@ -478,7 +478,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = &pptpReq->iccon.peersCallID; cid = &info->pac_call_id; - if (info->pns_call_id != ntohs(*pcid)) { + if (info->pns_call_id != *pcid) { DEBUGP("%s for unknown CallID %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; @@ -595,7 +595,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* track PNS call id */ cid = &pptpReq->ocreq.callID; DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->pns_call_id = ntohs(*cid); + info->pns_call_id = *cid; break; case PPTP_IN_CALL_REPLY: if (reqlen < sizeof(_pptpReq.icack)) { @@ -615,7 +615,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; } pcid = &pptpReq->icack.peersCallID; - if (info->pac_call_id != ntohs(*pcid)) { + if (info->pac_call_id != *pcid) { DEBUGP("%s for unknown call %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; @@ -623,7 +623,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); /* part two of the three-way handshake */ info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = ntohs(pptpReq->icack.callID); + info->pns_call_id = pptpReq->icack.callID; break; case PPTP_CALL_CLEAR_REQUEST: diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 5dde1da1c300..6e8bd6b3431f 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = - htons(master->nat.help.nat_pptp_info.pns_call_id); + t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = - htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; t.dst.protonum = IPPROTO_GRE; } @@ -149,10 +147,11 @@ pptp_outbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_callid; + u_int16_t msg; + __be16 new_callid; unsigned int cid_off; - new_callid = htons(ct_pptp_info->pns_call_id); + new_callid = ct_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REQUEST: @@ -170,7 +169,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); + ct_pptp_info->pns_call_id = new_callid; break; case PPTP_IN_CALL_REPLY: cid_off = offsetof(union pptp_ctrl_union, icreq.callID); @@ -235,14 +234,14 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, /* alter expectation for PNS->PAC direction */ invert_tuplepr(&inv_t, &expect_orig->tuple); - expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); - expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; expect_orig->dir = IP_CT_DIR_ORIGINAL; inv_t.src.ip = reply_t->src.ip; inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + inv_t.src.u.gre.key = nat_pptp_info->pac_call_id; + inv_t.dst.u.gre.key = ct_pptp_info->pns_call_id; if (!ip_conntrack_expect_related(expect_orig)) { DEBUGP("successfully registered expect\n"); @@ -253,14 +252,14 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, /* alter expectation for PAC->PNS direction */ invert_tuplepr(&inv_t, &expect_reply->tuple); - expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); - expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; expect_reply->dir = IP_CT_DIR_REPLY; inv_t.src.ip = orig_t->src.ip; inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + inv_t.src.u.gre.key = nat_pptp_info->pns_call_id; + inv_t.dst.u.gre.key = ct_pptp_info->pac_call_id; if (!ip_conntrack_expect_related(expect_reply)) { DEBUGP("successfully registered expect\n"); @@ -297,10 +296,11 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union *pptpReq) { struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_cid = 0, new_pcid; + u_int16_t msg, new_cid = 0; + __be16 new_pcid; unsigned int pcid_off, cid_off = 0; - new_pcid = htons(nat_pptp_info->pns_call_id); + new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REPLY: diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index a5226691f02c..bf91f9312b3c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -67,7 +67,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t key; - u_int16_t *keyptr; + __be16 *keyptr; unsigned int min, i, range_size; if (maniptype == IP_NAT_MANIP_SRC) -- cgit v1.2.3-71-gd317 From 6013c0a13e335674a783215e182c367406294392 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:56 -0700 Subject: [NETFILTER]: PPTP conntrack: fix header definitions Fix a few header definitions to match RFC2637. Most importantly the PptpOutCallRequest header included an invalid padding field and a size check was disabled because of this. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 9 +++++---- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 0d35623f9453..620bf06fabc2 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -107,8 +107,7 @@ struct PptpControlHeader { struct PptpStartSessionRequest { __be16 protocolVersion; - __u8 reserved1; - __u8 reserved2; + __u16 reserved1; __be32 framingCapability; __be32 bearerCapability; __be16 maxChannels; @@ -143,6 +142,8 @@ struct PptpStartSessionReply { struct PptpStopSessionRequest { __u8 reason; + __u8 reserved1; + __u16 reserved2; }; /* PptpStopSessionResultCode */ @@ -152,6 +153,7 @@ struct PptpStopSessionRequest { struct PptpStopSessionReply { __u8 resultCode; __u8 generalErrorCode; + __u16 reserved1; }; struct PptpEchoRequest { @@ -188,9 +190,8 @@ struct PptpOutCallRequest { __be32 framingType; __be16 packetWindow; __be16 packetProcDelay; - __u16 reserved1; __be16 phoneNumberLength; - __u16 reserved2; + __u16 reserved1; __u8 phoneNumber[64]; __u8 subAddress[64]; }; diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 0510ee50dc65..1a8da9015d8c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -569,7 +569,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, case PPTP_OUT_CALL_REQUEST: if (reqlen < sizeof(_pptpReq.ocreq)) { DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - /* FIXME: break; */ + break; } /* client initiating connection to server */ -- cgit v1.2.3-71-gd317 From cf9f81523ef3e95d9f222c896d266e4562999150 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:09:34 -0700 Subject: [NETFILTER]: PPTP conntrack: simplify expectation handling Remove duplicated expectation handling in the NAT helper and simplify the remains in the conntrack helper. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 2 +- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 92 ++++++++---------------- net/ipv4/netfilter/ip_nat_helper_pptp.c | 58 +-------------- 3 files changed, 35 insertions(+), 117 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 620bf06fabc2..2644b1faddd6 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -315,7 +315,7 @@ extern int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -extern int +extern void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig, struct ip_conntrack_expect *exp_reply); diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 5f7af6ef3881..57eac6e3871a 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -80,7 +80,7 @@ int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -int +void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply); @@ -219,93 +219,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int -exp_gre(struct ip_conntrack *master, +exp_gre(struct ip_conntrack *ct, __be16 callid, __be16 peer_callid) { - struct ip_conntrack_tuple inv_tuple; - struct ip_conntrack_tuple exp_tuples[] = { - /* tuple in original direction, PNS->PAC */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, - .u = { .gre = { .key = peer_callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, - .u = { .gre = { .key = callid } }, - .protonum = IPPROTO_GRE - }, - }, - /* tuple in reply direction, PAC->PNS */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, - .u = { .gre = { .key = callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, - .u = { .gre = { .key = peer_callid } }, - .protonum = IPPROTO_GRE - }, - } - }; struct ip_conntrack_expect *exp_orig, *exp_reply; int ret = 1; - exp_orig = ip_conntrack_expect_alloc(master); + exp_orig = ip_conntrack_expect_alloc(ct); if (exp_orig == NULL) goto out; - exp_reply = ip_conntrack_expect_alloc(master); + exp_reply = ip_conntrack_expect_alloc(ct); if (exp_reply == NULL) goto out_put_orig; - memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + /* original direction, PNS->PAC */ + exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp_orig->tuple.src.u.gre.key = peer_callid; + exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + exp_orig->tuple.dst.u.gre.key = callid; + exp_orig->tuple.dst.protonum = IPPROTO_GRE; exp_orig->mask.src.ip = 0xffffffff; exp_orig->mask.src.u.all = 0; - exp_orig->mask.dst.u.all = 0; exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; - exp_orig->master = master; + exp_orig->master = ct; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; /* both expectations are identical apart from tuple */ memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); - memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); - - if (ip_nat_pptp_hook_exp_gre) - ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); - else { - DEBUGP("calling expect_related PNS->PAC"); - DUMP_TUPLE(&exp_orig->tuple); + /* reply direction, PAC->PNS */ + exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + exp_reply->tuple.src.u.gre.key = callid; + exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + exp_reply->tuple.dst.u.gre.key = peer_callid; + exp_reply->tuple.dst.protonum = IPPROTO_GRE; - if (ip_conntrack_expect_related(exp_orig) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_put_both; - } - - DEBUGP("calling expect_related PAC->PNS"); - DUMP_TUPLE(&exp_reply->tuple); - - if (ip_conntrack_expect_related(exp_reply) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_unexpect_orig; - } - - /* Add GRE keymap entries */ - if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { - DEBUGP("cannot keymap_add() exp\n"); - goto out_unexpect_both; - } - - invert_tuplepr(&inv_tuple, &exp_reply->tuple); - if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { - ip_ct_gre_keymap_destroy(master); - DEBUGP("cannot keymap_add() exp_inv\n"); - goto out_unexpect_both; - } - ret = 0; + if (ip_nat_pptp_hook_exp_gre) + ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + if (ip_conntrack_expect_related(exp_orig) != 0) + goto out_put_both; + if (ip_conntrack_expect_related(exp_reply) != 0) + goto out_unexpect_orig; + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) + goto out_unexpect_both; + if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(ct); + goto out_unexpect_both; } + ret = 0; out_put_both: ip_conntrack_expect_put(exp_reply); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 0f5e753b481d..84f6bd09fcd4 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -211,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; } -static int +static void pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { - struct ip_ct_pptp_master *ct_pptp_info = - &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = - &expect_orig->master->nat.help.nat_pptp_info; - struct ip_conntrack *ct = expect_orig->master; - - struct ip_conntrack_tuple inv_t; - struct ip_conntrack_tuple *orig_t, *reply_t; + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; - /* alter expectation */ - orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - /* alter expectation for PNS->PAC direction */ - invert_tuplepr(&inv_t, &expect_orig->tuple); expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; expect_orig->dir = IP_CT_DIR_ORIGINAL; - inv_t.src.ip = reply_t->src.ip; - inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = nat_pptp_info->pac_call_id; - inv_t.dst.u.gre.key = ct_pptp_info->pns_call_id; - - if (!ip_conntrack_expect_related(expect_orig)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_orig)\n"); - return 1; - } /* alter expectation for PAC->PNS direction */ - invert_tuplepr(&inv_t, &expect_reply->tuple); expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; expect_reply->dir = IP_CT_DIR_REPLY; - inv_t.src.ip = orig_t->src.ip; - inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = nat_pptp_info->pns_call_id; - inv_t.dst.u.gre.key = ct_pptp_info->pac_call_id; - - if (!ip_conntrack_expect_related(expect_reply)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_reply)\n"); - ip_conntrack_unexpect_related(expect_orig); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { - DEBUGP("can't register original keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { - DEBUGP("can't register reply keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - ip_ct_gre_keymap_destroy(ct); - return 1; - } - - return 0; } /* inbound packets == from PAC to PNS */ -- cgit v1.2.3-71-gd317 From 4c5de695cf7f71c85ad8cfff509f6475b8bd4d27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:11:30 -0700 Subject: [NETFILTER]: PPTP conntrack: fix another GRE keymap leak When the master PPTP connection times out while still having unfullfilled expectations (and a GRE keymap entry) associated with it, the keymap entry is not destroyed. Add a destroy callback to struct ip_conntrack_helper and use it to destroy PPTP siblings when the master is destroyed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_helper.h | 2 ++ net/ipv4/netfilter/ip_conntrack_core.c | 5 +++++ net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 12 ++---------- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 8d69279ccfe4..77fe868d36ff 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -25,6 +25,8 @@ struct ip_conntrack_helper struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + void (*destroy)(struct ip_conntrack *ct); + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 2b6f24fc727e..c432b3163609 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -307,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; + struct ip_conntrack_helper *helper; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -315,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); + helper = ct->helper; + if (helper && helper->destroy) + helper->destroy(ct); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 98267b0d2a47..fb0aee691721 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -553,15 +553,6 @@ conntrack_pptp_help(struct sk_buff **pskb, nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - if (tcph->fin || tcph->rst) { - DEBUGP("RST/FIN received, timeouting GRE\n"); - /* can't do this after real newnat */ - info->cstate = PPTP_CALL_NONE; - - /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); - } - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { DEBUGP("no full PPTP header, can't track\n"); @@ -640,7 +631,8 @@ static struct ip_conntrack_helper pptp = { .protonum = 0xff } }, - .help = conntrack_pptp_help + .help = conntrack_pptp_help, + .destroy = pptp_destroy_siblings, }; extern void ip_ct_proto_gre_fini(void); -- cgit v1.2.3-71-gd317 From fbea49e1e2404baa2d88ab47e2db89e49551b53b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Sep 2006 14:43:49 -0700 Subject: [IPV6] NDISC: Add proxy_ndp sysctl. We do not always need proxy NDP functionality even we enable forwarding. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 3 +++ include/linux/ipv6.h | 2 ++ include/linux/sysctl.h | 1 + net/ipv6/addrconf.c | 11 +++++++++++ net/ipv6/ip6_output.c | 4 +++- net/ipv6/ndisc.c | 8 +++++++- 6 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 307cd4ec8edd..935e298f674a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -765,6 +765,9 @@ conf/all/forwarding - BOOLEAN This referred to as global forwarding. +proxy_ndp - BOOLEAN + Do proxy ndp. + conf/interface/*: Change special settings per interface. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1d6d3ccc9413..caca57df0d7d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -176,6 +176,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 proxy_ndp; void *sysctl; }; @@ -203,6 +204,7 @@ enum { DEVCONF_ACCEPT_RA_RTR_PREF, DEVCONF_RTR_PROBE_INTERVAL, DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN, + DEVCONF_PROXY_NDP, DEVCONF_MAX }; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index af61d9235409..736ed917a4f8 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -556,6 +556,7 @@ enum { NET_IPV6_ACCEPT_RA_RTR_PREF=20, NET_IPV6_RTR_PROBE_INTERVAL=21, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, + NET_IPV6_PROXY_NDP=23, __NET_IPV6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1e5a296d0a82..825a291d5aa5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -175,6 +175,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .proxy_ndp = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -205,6 +206,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .proxy_ndp = 0, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -3337,6 +3339,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; } /* Maximum length of ifinfomsg attributes */ @@ -3859,6 +3862,14 @@ static struct addrconf_sysctl_table }, #endif #endif + { + .ctl_name = NET_IPV6_PROXY_NDP, + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0, /* sentinel */ } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b2be749d2217..66716911962e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -412,7 +412,9 @@ int ip6_forward(struct sk_buff *skb) return -ETIMEDOUT; } - if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + /* XXX: idev->cnf.proxy_ndp? */ + if (ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ddf038636f01..76517a5f6576 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -824,6 +824,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && + (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && (pneigh = pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && @@ -966,8 +967,13 @@ static void ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) + ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + /* XXX: idev->cnf.prixy_ndp */ + WARN_ON(skb->dst != NULL && + ((struct rt6_info *)skb->dst)->rt6i_idev); goto out; + } neigh_update(neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, -- cgit v1.2.3-71-gd317 From 55ebaef1d5db9c1c76ba01a87fd986db5dee550d Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:27 -0700 Subject: [IPV6] ADDRCONF: Allow non-DAD'able addresses. IFA_F_NODAD flag, similar to IN6_IFF_NODAD in BSDs, is introduced to skip DAD. This flag should be set to Mobile IPv6 Home Address(es) on Mobile Node because DAD would fail if we should perform DAD; our Home Agent protects our Home Address(es). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + net/ipv6/addrconf.c | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index e1590454db59..ca24b9de13fb 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -38,6 +38,7 @@ enum #define IFA_F_SECONDARY 0x01 #define IFA_F_TEMPORARY IFA_F_SECONDARY +#define IFA_F_NODAD 0x02 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c09ebb7bb98a..adb583a26151 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1873,12 +1873,11 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, - __u32 prefered_lft, __u32 valid_lft) + __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; - __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); @@ -1971,7 +1970,7 @@ int addrconf_add_ifaddr(void __user *arg) rtnl_lock(); err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2514,7 +2513,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - !(ifp->flags&IFA_F_TENTATIVE)) { + !(ifp->flags&IFA_F_TENTATIVE) || + ifp->flags & IFA_F_NODAD) { ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); read_unlock_bh(&idev->lock); @@ -2912,28 +2912,25 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 prefered_lft, - u32 valid_lft) +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, + u32 prefered_lft, u32 valid_lft) { - int ifa_flags = 0; - if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; if (valid_lft == INFINITY_LIFE_TIME) - ifa_flags = IFA_F_PERMANENT; + ifa_flags |= IFA_F_PERMANENT; else if (valid_lft >= 0x7FFFFFFF/HZ) valid_lft = 0x7FFFFFFF/HZ; if (prefered_lft == 0) - ifa_flags = IFA_F_DEPRECATED; + ifa_flags |= IFA_F_DEPRECATED; else if ((prefered_lft >= 0x7FFFFFFF/HZ) && (prefered_lft != INFINITY_LIFE_TIME)) prefered_lft = 0x7FFFFFFF/HZ; spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; - + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -2955,7 +2952,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct in6_addr *pfx; struct inet6_ifaddr *ifa; struct net_device *dev; - u32 valid_lft, preferred_lft; + u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; + u8 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -2982,6 +2980,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dev == NULL) return -ENODEV; + /* We ignore other flags so far. */ + ifa_flags = ifm->ifa_flags & IFA_F_NODAD; + ifa = ipv6_get_ifaddr(pfx, dev, 1); if (ifa == NULL) { /* @@ -2989,14 +2990,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) * userspace alreay relies on not having to provide this. */ return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - preferred_lft, valid_lft); + ifa_flags, preferred_lft, valid_lft); } if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) err = -EEXIST; else - err = inet6_addr_modify(ifa, preferred_lft, valid_lft); + err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); in6_ifa_put(ifa); -- cgit v1.2.3-71-gd317 From 3b9f9a1c3903b64c38505f9fed3bb11e48dbc931 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:56 -0700 Subject: [IPV6] ADDRCONF: Mobile IPv6 Home Address support. IFA_F_HOMEADDRESS is introduced for Mobile IPv6 Home Addresses on Mobile Node. The IFA_F_HOMEADDRESS flag should be set for Mobile IPv6 Home Addresses for 2 purposes. 1) We need to check this on receipt of Type 2 Routing Header (RFC3775 Secion 6.4), 2) We prefer Home Address(es) in source address selection (RFC3484 Section 5 Rule 4). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + include/net/addrconf.h | 6 +----- net/ipv6/addrconf.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index ca24b9de13fb..dbe8f6120a40 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -39,6 +39,7 @@ enum #define IFA_F_TEMPORARY IFA_F_SECONDARY #define IFA_F_NODAD 0x02 +#define IFA_F_HOMEADDRESS 0x10 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 diff --git a/include/net/addrconf.h b/include/net/addrconf.h index aa2ed8f0a9dd..44f1b673f916 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,12 +61,8 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -/* XXX: this is a placeholder till addrconf supports */ #ifdef CONFIG_IPV6_MIP6 -static inline int ipv6_chk_home_addr(struct in6_addr *addr) -{ - return 0; -} +extern int ipv6_chk_home_addr(struct in6_addr *addr); #endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index adb583a26151..c18676352397 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1038,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - /* Rule 4: Prefer home address -- not implemented yet */ + /* Rule 4: Prefer home address */ +#ifdef CONFIG_IPV6_MIP6 + if (hiscore.rule < 4) { + if (ifa_result->flags & IFA_F_HOMEADDRESS) + hiscore.attrs |= IPV6_SADDR_SCORE_HOA; + hiscore.rule++; + } + if (ifa->flags & IFA_F_HOMEADDRESS) { + score.attrs |= IPV6_SADDR_SCORE_HOA; + if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { + score.rule = 4; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) + continue; + } +#else if (hiscore.rule < 4) hiscore.rule++; +#endif /* Rule 5: Prefer outgoing interface */ if (hiscore.rule < 5) { @@ -2759,6 +2777,26 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_IPV6_MIP6 +/* Check if address is a home address configured on any interface. */ +int ipv6_chk_home_addr(struct in6_addr *addr) +{ + int ret = 0; + struct inet6_ifaddr * ifp; + u8 hash = ipv6_addr_hash(addr); + read_lock_bh(&addrconf_hash_lock); + for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && + (ifp->flags & IFA_F_HOMEADDRESS)) { + ret = 1; + break; + } + } + read_unlock_bh(&addrconf_hash_lock); + return ret; +} +#endif + /* * Periodic address status verification */ @@ -2930,7 +2968,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, prefered_lft = 0x7FFFFFFF/HZ; spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags; + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -2981,7 +3019,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & IFA_F_NODAD; + ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); ifa = ipv6_get_ifaddr(pfx, dev, 1); if (ifa == NULL) { -- cgit v1.2.3-71-gd317 From 1c3c07e9f6cc50dab2aeb8051325e317d4f6c70e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:18 -0400 Subject: NFS: Add a new ACCESS rpc call cache to the linux nfs client The current access cache only allows one entry at a time to be cached for each inode. Add a per-inode red-black tree in order to allow more than one to be cached at a time. Should significantly cut down the time spent in path traversal for shared directories such as ${PATH}, /usr/share, etc. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 133 ++++++++++++++++++++++++++++++++++++++++++------- fs/nfs/inode.c | 13 ++--- include/linux/nfs_fs.h | 5 +- 3 files changed, 124 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e7ffb4deb3e5..094afded2b11 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1638,35 +1638,134 @@ out: return error; } -int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static void nfs_access_free_entry(struct nfs_access_entry *entry) +{ + put_rpccred(entry->cred); + kfree(entry); +} + +static void __nfs_access_zap_cache(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache = &nfsi->cache_access; + struct rb_root *root_node = &nfsi->access_cache; + struct rb_node *n, *dispose = NULL; + struct nfs_access_entry *entry; + + /* Unhook entries from the cache */ + while ((n = rb_first(root_node)) != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + rb_erase(n, root_node); + n->rb_left = dispose; + dispose = n; + } + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); - if (cache->cred != cred - || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) - return -ENOENT; - memcpy(res, cache, sizeof(*res)); - return 0; + /* Now kill them all! */ + while (dispose != NULL) { + n = dispose; + dispose = n->rb_left; + nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node)); + } } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_zap_cache(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache = &nfsi->cache_access; + spin_lock(&inode->i_lock); + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); +} - if (cache->cred != set->cred) { - if (cache->cred) - put_rpccred(cache->cred); - cache->cred = get_rpccred(set->cred); +static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) +{ + struct rb_node *n = NFS_I(inode)->access_cache.rb_node; + struct nfs_access_entry *entry; + + while (n != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + + if (cred < entry->cred) + n = n->rb_left; + else if (cred > entry->cred) + n = n->rb_right; + else + return entry; } - /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ + return NULL; +} + +int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; + int err = -ENOENT; + spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out_zap; + cache = nfs_access_search_rbtree(inode, cred); + if (cache == NULL) + goto out; + if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + goto out_stale; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + err = 0; +out: + spin_unlock(&inode->i_lock); + return err; +out_stale: + rb_erase(&cache->rb_node, &nfsi->access_cache); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(cache); + return -ENOENT; +out_zap: + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); + return -ENOENT; +} + +static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) +{ + struct rb_root *root_node = &NFS_I(inode)->access_cache; + struct rb_node **p = &root_node->rb_node; + struct rb_node *parent = NULL; + struct nfs_access_entry *entry; + + spin_lock(&inode->i_lock); + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct nfs_access_entry, rb_node); + + if (set->cred < entry->cred) + p = &parent->rb_left; + else if (set->cred > entry->cred) + p = &parent->rb_right; + else + goto found; + } + rb_link_node(&set->rb_node, parent, p); + rb_insert_color(&set->rb_node, root_node); spin_unlock(&inode->i_lock); + return; +found: + rb_replace_node(parent, &set->rb_node, root_node); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(entry); +} + +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); + if (cache == NULL) + return; + RB_CLEAR_NODE(&cache->rb_node); cache->jiffies = set->jiffies; + cache->cred = get_rpccred(set->cred); cache->mask = set->mask; + + nfs_access_add_rbtree(inode, cache); } static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d349fb2245da..b94ab060bb1e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync) void nfs_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred; - /* * The following should never happen... */ BUG_ON(nfs_have_writebacks(inode)); - BUG_ON (!list_empty(&nfsi->open_files)); + BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); nfs_zap_acl_cache(inode); - cred = nfsi->cache_access.cred; - if (cred) - put_rpccred(cred); - BUG_ON(atomic_read(&nfsi->data_updates) != 0); + nfs_access_zap_cache(inode); } /** @@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - nfsi->cache_access.cred = NULL; + nfsi->access_cache = RB_ROOT; unlock_new_inode(inode); } else diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6c2066caeaab..cc013ed2e52e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,7 @@ * NFSv3/v4 Access mode cache entry */ struct nfs_access_entry { + struct rb_node rb_node; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -145,7 +147,7 @@ struct nfs_inode { */ atomic_t data_updates; - struct nfs_access_entry cache_access; + struct rb_root access_cache; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -297,6 +299,7 @@ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); -- cgit v1.2.3-71-gd317 From cfcea3e8c66c2dcde98d5c2693d4bff50b5cac97 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:18 -0400 Subject: NFS: Add a global LRU list for the ACCESS cache ...in order to allow the addition of a memory shrinker. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 35 ++++++++++++++++++++++++++++++++++- fs/nfs/inode.c | 2 ++ include/linux/nfs_fs.h | 4 ++++ 3 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 094afded2b11..bf4f5ffda703 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1638,10 +1638,17 @@ out: return error; } +static DEFINE_SPINLOCK(nfs_access_lru_lock); +static LIST_HEAD(nfs_access_lru_list); +static atomic_long_t nfs_access_nr_entries; + static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); kfree(entry); + smp_mb__before_atomic_dec(); + atomic_long_dec(&nfs_access_nr_entries); + smp_mb__after_atomic_dec(); } static void __nfs_access_zap_cache(struct inode *inode) @@ -1655,6 +1662,7 @@ static void __nfs_access_zap_cache(struct inode *inode) while ((n = rb_first(root_node)) != NULL) { entry = rb_entry(n, struct nfs_access_entry, rb_node); rb_erase(n, root_node); + list_del(&entry->lru); n->rb_left = dispose; dispose = n; } @@ -1671,6 +1679,13 @@ static void __nfs_access_zap_cache(struct inode *inode) void nfs_access_zap_cache(struct inode *inode) { + /* Remove from global LRU init */ + if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { + spin_lock(&nfs_access_lru_lock); + list_del_init(&NFS_I(inode)->access_cache_inode_lru); + spin_unlock(&nfs_access_lru_lock); + } + spin_lock(&inode->i_lock); /* This will release the spinlock */ __nfs_access_zap_cache(inode); @@ -1711,12 +1726,14 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; + list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; out: spin_unlock(&inode->i_lock); return err; out_stale: rb_erase(&cache->rb_node, &nfsi->access_cache); + list_del(&cache->lru); spin_unlock(&inode->i_lock); nfs_access_free_entry(cache); return -ENOENT; @@ -1728,7 +1745,8 @@ out_zap: static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { - struct rb_root *root_node = &NFS_I(inode)->access_cache; + struct nfs_inode *nfsi = NFS_I(inode); + struct rb_root *root_node = &nfsi->access_cache; struct rb_node **p = &root_node->rb_node; struct rb_node *parent = NULL; struct nfs_access_entry *entry; @@ -1747,10 +1765,13 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry * } rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); + list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); spin_unlock(&inode->i_lock); return; found: rb_replace_node(parent, &set->rb_node, root_node); + list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); + list_del(&entry->lru); spin_unlock(&inode->i_lock); nfs_access_free_entry(entry); } @@ -1766,6 +1787,18 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) cache->mask = set->mask; nfs_access_add_rbtree(inode, cache); + + /* Update accounting */ + smp_mb__before_atomic_inc(); + atomic_long_inc(&nfs_access_nr_entries); + smp_mb__after_atomic_inc(); + + /* Add inode to global LRU list */ + if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { + spin_lock(&nfs_access_lru_lock); + list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); + spin_unlock(&nfs_access_lru_lock); + } } static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b94ab060bb1e..6ed018c9aad2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1104,6 +1104,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cc013ed2e52e..a36e01cd6321 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -71,6 +71,7 @@ */ struct nfs_access_entry { struct rb_node rb_node; + struct list_head lru; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -148,6 +149,8 @@ struct nfs_inode { atomic_t data_updates; struct rb_root access_cache; + struct list_head access_cache_entry_lru; + struct list_head access_cache_inode_lru; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -201,6 +204,7 @@ struct nfs_inode { #define NFS_INO_REVALIDATING (0) /* revalidating attrs */ #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ #define NFS_INO_STALE (2) /* possible stale inode */ +#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ static inline struct nfs_inode *NFS_I(struct inode *inode) { -- cgit v1.2.3-71-gd317 From 770bfad846ab6628444428467b11fa6773ae9ea1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:07 -0400 Subject: NFS: Add dentry materialisation op The attached patch adds a new directory cache management function that prepares a disconnected anonymous function to be connected into the dentry tree. The anonymous dentry is transferred the name and parentage from another dentry. The following changes were made in [try #2]: (*) d_materialise_dentry() now switches the parentage of the two nodes around correctly when one or other of them is self-referential. The following changes were made in [try #7]: (*) d_instantiate_unique() has had the interior part split out as function __d_instantiate_unique(). Callers of this latter function must be holding the appropriate locks. (*) _d_rehash() has been added as a wrapper around __d_rehash() to call it with the most obvious hash list (the one from the name). d_rehash() now calls _d_rehash(). (*) d_materialise_dentry() is now __d_materialise_dentry() and is static. (*) d_materialise_unique() added to perform the combination of d_find_alias(), d_materialise_dentry() and d_add_unique() that the NFS client was doing twice, all within a single dcache_lock critical section. This reduces the number of times two different spinlocks were being accessed. The following further changes were made: (*) Add the dentries onto their parents d_subdirs lists. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/dcache.c | 164 ++++++++++++++++++++++++++++++++++++++++++++----- include/linux/dcache.h | 1 + 2 files changed, 151 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 1b4a3a34ec57..17b392a2049e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode) * (or otherwise set) by the caller to indicate that it is now * in use by the dcache. */ -struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +static struct dentry *__d_instantiate_unique(struct dentry *entry, + struct inode *inode) { struct dentry *alias; int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; - BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); - if (!inode) - goto do_negative; + if (!inode) { + entry->d_inode = NULL; + return NULL; + } + list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct qstr *qstr = &alias->d_name; @@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) if (memcmp(qstr->name, name, len)) continue; dget_locked(alias); - spin_unlock(&dcache_lock); - BUG_ON(!d_unhashed(alias)); - iput(inode); return alias; } + list_add(&entry->d_alias, &inode->i_dentry); -do_negative: entry->d_inode = inode; fsnotify_d_instantiate(entry, inode); - spin_unlock(&dcache_lock); - security_d_instantiate(entry, inode); return NULL; } + +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +{ + struct dentry *result; + + BUG_ON(!list_empty(&entry->d_alias)); + + spin_lock(&dcache_lock); + result = __d_instantiate_unique(entry, inode); + spin_unlock(&dcache_lock); + + if (!result) { + security_d_instantiate(entry, inode); + return NULL; + } + + BUG_ON(!d_unhashed(result)); + iput(inode); + return result; +} + EXPORT_SYMBOL(d_instantiate_unique); /** @@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list) hlist_add_head_rcu(&entry->d_hash, list); } +static void _d_rehash(struct dentry * entry) +{ + __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); +} + /** * d_rehash - add an entry back to the hash * @entry: dentry to add to the hash @@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list) void d_rehash(struct dentry * entry) { - struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); - __d_rehash(entry, list); + _d_rehash(entry); spin_unlock(&entry->d_lock); spin_unlock(&dcache_lock); } @@ -1386,6 +1407,120 @@ already_unhashed: spin_unlock(&dcache_lock); } +/* + * Prepare an anonymous dentry for life in the superblock's dentry tree as a + * named dentry in place of the dentry to be replaced. + */ +static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) +{ + struct dentry *dparent, *aparent; + + switch_names(dentry, anon); + do_switch(dentry->d_name.len, anon->d_name.len); + do_switch(dentry->d_name.hash, anon->d_name.hash); + + dparent = dentry->d_parent; + aparent = anon->d_parent; + + dentry->d_parent = (aparent == anon) ? dentry : aparent; + list_del(&dentry->d_u.d_child); + if (!IS_ROOT(dentry)) + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&dentry->d_u.d_child); + + anon->d_parent = (dparent == dentry) ? anon : dparent; + list_del(&anon->d_u.d_child); + if (!IS_ROOT(anon)) + list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&anon->d_u.d_child); + + anon->d_flags &= ~DCACHE_DISCONNECTED; +} + +/** + * d_materialise_unique - introduce an inode into the tree + * @dentry: candidate dentry + * @inode: inode to bind to the dentry, to which aliases may be attached + * + * Introduces an dentry into the tree, substituting an extant disconnected + * root directory alias in its place if there is one + */ +struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) +{ + struct dentry *alias, *actual; + + BUG_ON(!d_unhashed(dentry)); + + spin_lock(&dcache_lock); + + if (!inode) { + actual = dentry; + dentry->d_inode = NULL; + goto found_lock; + } + + /* See if a disconnected directory already exists as an anonymous root + * that we should splice into the tree instead */ + if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) { + spin_lock(&alias->d_lock); + + /* Is this a mountpoint that we could splice into our tree? */ + if (IS_ROOT(alias)) + goto connect_mountpoint; + + if (alias->d_name.len == dentry->d_name.len && + alias->d_parent == dentry->d_parent && + memcmp(alias->d_name.name, + dentry->d_name.name, + dentry->d_name.len) == 0) + goto replace_with_alias; + + spin_unlock(&alias->d_lock); + + /* Doh! Seem to be aliasing directories for some reason... */ + dput(alias); + } + + /* Add a unique reference */ + actual = __d_instantiate_unique(dentry, inode); + if (!actual) + actual = dentry; + else if (unlikely(!d_unhashed(actual))) + goto shouldnt_be_hashed; + +found_lock: + spin_lock(&actual->d_lock); +found: + _d_rehash(actual); + spin_unlock(&actual->d_lock); + spin_unlock(&dcache_lock); + + if (actual == dentry) { + security_d_instantiate(dentry, inode); + return NULL; + } + + iput(inode); + return actual; + + /* Convert the anonymous/root alias into an ordinary dentry */ +connect_mountpoint: + __d_materialise_dentry(dentry, alias); + + /* Replace the candidate dentry with the alias in the tree */ +replace_with_alias: + __d_drop(alias); + actual = alias; + goto found; + +shouldnt_be_hashed: + spin_unlock(&dcache_lock); + BUG(); + goto shouldnt_be_hashed; +} + /** * d_path - return the path of a dentry * @dentry: dentry to report @@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate); EXPORT_SYMBOL(d_invalidate); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_move); +EXPORT_SYMBOL_GPL(d_materialise_unique); EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_rehash); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 471781ffeab1..44605be59409 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -221,6 +221,7 @@ static inline int dname_external(struct dentry *dentry) */ extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); +extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void d_delete(struct dentry *); /* allocate/de-allocate */ -- cgit v1.2.3-71-gd317 From adfa6f980bd46974e6b32b22dd0c45e3f52063f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:08 -0400 Subject: NFS: Rename struct nfs4_client to struct nfs_client Rename struct nfs4_client to struct nfs_client so that it can become the basis for a general client record for NFS2 and NFS3 in addition to NFS4. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 2 +- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/delegation.c | 24 +++++++++++------------ fs/nfs/delegation.h | 10 +++++----- fs/nfs/idmap.c | 12 ++++++------ fs/nfs/nfs4_fs.h | 30 ++++++++++++++-------------- fs/nfs/nfs4proc.c | 32 +++++++++++++++--------------- fs/nfs/nfs4renewd.c | 8 ++++---- fs/nfs/nfs4state.c | 50 +++++++++++++++++++++++------------------------ fs/nfs/nfs4xdr.c | 18 ++++++++--------- fs/nfs/super.c | 4 ++-- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_idmap.h | 14 ++++++------- 13 files changed, 105 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index b1f7dc415392..1b596b6d9dc2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -167,7 +167,7 @@ void nfs_callback_down(void) static int nfs_callback_authenticate(struct svc_rqst *rqstp) { struct in_addr *addr = &rqstp->rq_addr.sin_addr; - struct nfs4_client *clp; + struct nfs_client *clp; /* Don't talk to strangers */ clp = nfs4_find_client(addr); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7719483ecdfc..55d6e2ec157f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -15,7 +15,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) { - struct nfs4_client *clp; + struct nfs_client *clp; struct nfs_delegation *delegation; struct nfs_inode *nfsi; struct inode *inode; @@ -56,7 +56,7 @@ out: unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) { - struct nfs4_client *clp; + struct nfs_client *clp; struct inode *inode; unsigned res; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9540a316c05e..5a1105c258bd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st */ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int status = 0; @@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode) */ int __nfs_inode_return_delegation(struct inode *inode) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; @@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode) */ void nfs_return_all_delegations(struct super_block *sb) { - struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_client *clp = NFS_SB(sb)->nfs4_state; struct nfs_delegation *delegation; struct inode *inode; @@ -232,7 +232,7 @@ restart: int nfs_do_expire_all_delegations(void *ptr) { - struct nfs4_client *clp = ptr; + struct nfs_client *clp = ptr; struct nfs_delegation *delegation; struct inode *inode; @@ -258,7 +258,7 @@ out: module_put_and_exit(0); } -void nfs_expire_all_delegations(struct nfs4_client *clp) +void nfs_expire_all_delegations(struct nfs_client *clp) { struct task_struct *task; @@ -276,7 +276,7 @@ void nfs_expire_all_delegations(struct nfs4_client *clp) /* * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. */ -void nfs_handle_cb_pathdown(struct nfs4_client *clp) +void nfs_handle_cb_pathdown(struct nfs_client *clp) { struct nfs_delegation *delegation; struct inode *inode; @@ -299,7 +299,7 @@ restart: struct recall_threadargs { struct inode *inode; - struct nfs4_client *clp; + struct nfs_client *clp; const nfs4_stateid *stateid; struct completion started; @@ -310,7 +310,7 @@ static int recall_thread(void *data) { struct recall_threadargs *args = (struct recall_threadargs *)data; struct inode *inode = igrab(args->inode); - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -371,7 +371,7 @@ out_module_put: /* * Retrieve the inode associated with a delegation */ -struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; struct inode *res = NULL; @@ -389,7 +389,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf /* * Mark all delegations as needing to be reclaimed */ -void nfs_delegation_mark_reclaim(struct nfs4_client *clp) +void nfs_delegation_mark_reclaim(struct nfs_client *clp) { struct nfs_delegation *delegation; spin_lock(&clp->cl_lock); @@ -401,7 +401,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp) /* * Reap all unclaimed delegations after reboot recovery is done */ -void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) +void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { struct nfs_delegation *delegation, *n; LIST_HEAD(head); @@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 3858694652fa..2cfd4b24c7fe 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st int __nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); -struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); -void nfs_expire_all_delegations(struct nfs4_client *clp); -void nfs_handle_cb_pathdown(struct nfs4_client *clp); +void nfs_expire_all_delegations(struct nfs_client *clp); +void nfs_handle_cb_pathdown(struct nfs_client *clp); -void nfs_delegation_mark_reclaim(struct nfs4_client *clp); -void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); +void nfs_delegation_mark_reclaim(struct nfs_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 873deb96a6cd..d05148ec9414 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -109,7 +109,7 @@ static struct rpc_pipe_ops idmap_upcall_ops = { }; void -nfs_idmap_new(struct nfs4_client *clp) +nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; @@ -138,7 +138,7 @@ nfs_idmap_new(struct nfs4_client *clp) } void -nfs_idmap_delete(struct nfs4_client *clp) +nfs_idmap_delete(struct nfs_client *clp) { struct idmap *idmap = clp->cl_idmap; @@ -491,27 +491,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) return (hash); } -int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } -int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); } -int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) { struct idmap *idmap = clp->cl_idmap; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9a102860df37..4e334cb48498 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -43,9 +43,9 @@ enum nfs4_client_state { }; /* - * The nfs4_client identifies our client state to the server. + * The nfs_client identifies our client state to the server. */ -struct nfs4_client { +struct nfs_client { struct list_head cl_servers; /* Global list of servers */ struct in_addr cl_addr; /* Server identifier */ u64 cl_clientid; /* constant */ @@ -127,7 +127,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status struct nfs4_state_owner { spinlock_t so_lock; struct list_head so_list; /* per-clientid list of state_owners */ - struct nfs4_client *so_client; + struct nfs_client *so_client; u32 so_id; /* 32-bit identifier, unique */ atomic_t so_count; @@ -210,10 +210,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ extern int nfs4_map_errors(int err); -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *); -extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *); -extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); +extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); +extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); @@ -231,19 +231,19 @@ extern const u32 nfs4_fsinfo_bitmap[2]; extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ -extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); -extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(void *); /* nfs4state.c */ extern void init_nfsv4_state(struct nfs_server *); extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs4_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs4_client *clp); -extern struct nfs4_client *nfs4_find_client(struct in_addr *); -struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp); -extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); +extern struct nfs_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs_client *clp); +extern struct nfs_client *nfs4_find_client(struct in_addr *); +struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); @@ -252,7 +252,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); -extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b14145b7b87f..168f3ffb059f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; spin_lock(&clp->cl_lock); if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; @@ -792,7 +792,7 @@ out: int nfs4_recover_expired_lease(struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs4_schedule_state_recovery(clp); @@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred { struct nfs_delegation *delegation; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state_owner *sp = NULL; struct nfs4_state *state = NULL; @@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs4_opendata *opendata; int status; @@ -2521,7 +2521,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) */ static void nfs4_renew_done(struct rpc_task *task, void *data) { - struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { @@ -2543,7 +2543,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_call_done = nfs4_renew_done, }; -int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -2555,7 +2555,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) &nfs4_renew_ops, (void *)jiffies); } -int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; if (!clp || task->tk_status >= 0) return 0; @@ -2828,7 +2828,7 @@ static int nfs4_wait_bit_interruptible(void *word) return 0; } -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp) { sigset_t oldset; int res; @@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) */ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; int ret = errorcode; exception->retry = 0; @@ -2898,7 +2898,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct return nfs4_map_errors(ret); } -int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) +int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; struct nfs4_setclientid setclientid = { @@ -2945,7 +2945,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p return status; } -static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs_fsinfo fsinfo; struct rpc_message msg = { @@ -2969,7 +2969,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr return status; } -int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { long timeout; int err; @@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs_lockt_args arg = { .fh = NFS_FH(inode), .fl = request, @@ -3513,7 +3513,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_client *clp = state->owner->so_client; + struct nfs_client *clp = state->owner->so_client; unsigned char fl_flags = request->fl_flags; int status; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 5d764d8e6d8a..208764069f61 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -61,7 +61,7 @@ void nfs4_renew_state(void *data) { - struct nfs4_client *clp = (struct nfs4_client *)data; + struct nfs_client *clp = (struct nfs_client *)data; struct rpc_cred *cred; long lease, timeout; unsigned long last, now; @@ -108,7 +108,7 @@ out: /* Must be called with clp->cl_sem locked for writes */ void -nfs4_schedule_state_renewal(struct nfs4_client *clp) +nfs4_schedule_state_renewal(struct nfs_client *clp) { long timeout; @@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; if (!clp) return; @@ -140,7 +140,7 @@ nfs4_renewd_prepare_shutdown(struct nfs_server *server) /* Must be called with clp->cl_sem locked for writes */ void -nfs4_kill_renewd(struct nfs4_client *clp) +nfs4_kill_renewd(struct nfs_client *clp) { down_read(&clp->cl_sem); if (!list_empty(&clp->cl_superblocks)) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 090a36b07a22..c0b6439f1f71 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -83,10 +83,10 @@ destroy_nfsv4_state(struct nfs_server *server) * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ -static struct nfs4_client * +static struct nfs_client * nfs4_alloc_client(struct in_addr *addr) { - struct nfs4_client *clp; + struct nfs_client *clp; if (nfs_callback_up() < 0) return NULL; @@ -111,7 +111,7 @@ nfs4_alloc_client(struct in_addr *addr) } static void -nfs4_free_client(struct nfs4_client *clp) +nfs4_free_client(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -130,9 +130,9 @@ nfs4_free_client(struct nfs4_client *clp) nfs_callback_down(); } -static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) +static struct nfs_client *__nfs4_find_client(struct in_addr *addr) { - struct nfs4_client *clp; + struct nfs_client *clp; list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { atomic_inc(&clp->cl_count); @@ -142,19 +142,19 @@ static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) return NULL; } -struct nfs4_client *nfs4_find_client(struct in_addr *addr) +struct nfs_client *nfs4_find_client(struct in_addr *addr) { - struct nfs4_client *clp; + struct nfs_client *clp; spin_lock(&state_spinlock); clp = __nfs4_find_client(addr); spin_unlock(&state_spinlock); return clp; } -struct nfs4_client * +struct nfs_client * nfs4_get_client(struct in_addr *addr) { - struct nfs4_client *clp, *new = NULL; + struct nfs_client *clp, *new = NULL; spin_lock(&state_spinlock); for (;;) { @@ -180,7 +180,7 @@ nfs4_get_client(struct in_addr *addr) } void -nfs4_put_client(struct nfs4_client *clp) +nfs4_put_client(struct nfs_client *clp) { if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock)) return; @@ -192,7 +192,7 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } -static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) +static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport, cred); @@ -204,13 +204,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) } u32 -nfs4_alloc_lockowner_id(struct nfs4_client *clp) +nfs4_alloc_lockowner_id(struct nfs_client *clp) { return clp->cl_lockowner_id ++; } static struct nfs4_state_owner * -nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) +nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_state_owner *sp = NULL; @@ -224,7 +224,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) return sp; } -struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) +struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rpc_cred *cred = NULL; @@ -238,7 +238,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) return cred; } -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -251,7 +251,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) } static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_state_owner *sp, *res = NULL; @@ -294,7 +294,7 @@ nfs4_alloc_state_owner(void) void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - struct nfs4_client *clp = sp->so_client; + struct nfs_client *clp = sp->so_client; spin_lock(&clp->cl_lock); list_del_init(&sp->so_list); spin_unlock(&clp->cl_lock); @@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs4_state_owner *sp, *new; get_rpccred(cred); @@ -337,7 +337,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { - struct nfs4_client *clp = sp->so_client; + struct nfs_client *clp = sp->so_client; struct rpc_cred *cred = sp->so_cred; if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) @@ -540,7 +540,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; - struct nfs4_client *clp = state->owner->so_client; + struct nfs_client *clp = state->owner->so_client; lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) @@ -752,7 +752,7 @@ out: static int reclaimer(void *); -static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) +static inline void nfs4_clear_recover_bit(struct nfs_client *clp) { smp_mb__before_clear_bit(); clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); @@ -764,7 +764,7 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) /* * State recovery routine */ -static void nfs4_recover_state(struct nfs4_client *clp) +static void nfs4_recover_state(struct nfs_client *clp) { struct task_struct *task; @@ -782,7 +782,7 @@ static void nfs4_recover_state(struct nfs4_client *clp) /* * Schedule a state recovery attempt */ -void nfs4_schedule_state_recovery(struct nfs4_client *clp) +void nfs4_schedule_state_recovery(struct nfs_client *clp) { if (!clp) return; @@ -879,7 +879,7 @@ out_err: return status; } -static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +static void nfs4_state_mark_reclaim(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct nfs4_state *state; @@ -903,7 +903,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp) static int reclaimer(void *ptr) { - struct nfs4_client *clp = ptr; + struct nfs_client *clp = ptr; struct nfs4_state_owner *sp; struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1dee6ef7e5a9..04748ab9ed55 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con return 0; } -static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid) +static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid) { uint32_t *p; @@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien return 0; } -static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state) +static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) { uint32_t *p; @@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str /* * a RENEW request */ -static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf /* * a SETCLIENTID_CONFIRM request */ -static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -2132,7 +2132,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) } /* Dummy routine */ -static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp) { uint32_t *p; unsigned int strlen; @@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t return 0; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid) { uint32_t len, *p; @@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf return 0; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid) { uint32_t len, *p; @@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) return 0; } -static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) +static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) { uint32_t *p; uint32_t opnum; @@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) * a SETCLIENTID request */ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, - struct nfs4_client *clp) + struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 63497345806b..d03ede5b1aca 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1099,7 +1099,7 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type, static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) { - struct nfs4_client *clp; + struct nfs_client *clp; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int err = -EIO; @@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry) static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) { const struct dentry *dentry = data->dentry; - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct super_block *sb; server->fsid = data->fattr->fsid; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6b4a13c79474..4db90df2aed0 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -43,7 +43,7 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ + struct nfs_client * nfs4_state; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 102e56094296..678fe68982ef 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -62,15 +62,15 @@ struct idmap_msg { #ifdef __KERNEL__ /* Forward declaration to make this header independent of others */ -struct nfs4_client; +struct nfs_client; -void nfs_idmap_new(struct nfs4_client *); -void nfs_idmap_delete(struct nfs4_client *); +void nfs_idmap_new(struct nfs_client *); +void nfs_idmap_delete(struct nfs_client *); -int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); -int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); +int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs_client *, __u32, char *); +int nfs_map_gid_to_group(struct nfs_client *, __u32, char *); extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ -- cgit v1.2.3-71-gd317 From 7539bbab8062aadc1db95a22b377146843cfa88f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: NFS: Rename nfs_server::nfs4_state Rename nfs_server::nfs4_state to nfs_client as it will be used to represent the client state for NFS2 and NFS3 also. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 12 ++++++------ fs/nfs/nfs4proc.c | 26 +++++++++++++------------- fs/nfs/nfs4renewd.c | 2 +- fs/nfs/nfs4state.c | 10 +++++----- fs/nfs/nfs4xdr.c | 10 +++++----- fs/nfs/super.c | 6 +++--- include/linux/nfs_fs_sb.h | 2 +- 7 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5a1105c258bd..cfe239736ac0 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ case -NFS4ERR_EXPIRED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state); + nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client); goto out_err; } } @@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st */ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int status = 0; @@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode) */ int __nfs_inode_return_delegation(struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; @@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode) */ void nfs_return_all_delegations(struct super_block *sb) { - struct nfs_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_client *clp = NFS_SB(sb)->nfs_client; struct nfs_delegation *delegation; struct inode *inode; @@ -310,7 +310,7 @@ static int recall_thread(void *data) { struct recall_threadargs *args = (struct recall_threadargs *)data; struct inode *inode = igrab(args->inode); - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 168f3ffb059f..b46597fc81e1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; @@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags, - p->o_arg.clientid = server->nfs4_state->cl_clientid; + p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; @@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); return err; } err = nfs4_handle_exception(server, err, &exception); @@ -792,7 +792,7 @@ out: int nfs4_recover_expired_lease(struct nfs_server *server) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs4_schedule_state_recovery(clp); @@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred { struct nfs_delegation *delegation; struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state_owner *sp = NULL; struct nfs4_state *state = NULL; @@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; int status; @@ -1133,7 +1133,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); break; default: if (nfs4_async_handle_error(task, server) == -EAGAIN) { @@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (!clp || task->tk_status >= 0) return 0; @@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) */ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; int ret = errorcode; exception->retry = 0; @@ -3077,7 +3077,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); case 0: return 0; } @@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs_lockt_args arg = { .fh = NFS_FH(inode), .fl = request, @@ -3231,7 +3231,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(calldata->server->nfs4_state); + nfs4_schedule_state_recovery(calldata->server->nfs_client); break; default: if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { @@ -3343,7 +3343,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, if (p->arg.lock_seqid == NULL) goto out_free; p->arg.lock_stateid = &lsp->ls_stateid; - p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid; + p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id; p->lsp = lsp; atomic_inc(&lsp->ls_count); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 208764069f61..ff947ecb8b81 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (!clp) return; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c0b6439f1f71..fa51a7d4c022 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -61,7 +61,7 @@ static LIST_HEAD(nfs4_clientid_list); void init_nfsv4_state(struct nfs_server *server) { - server->nfs4_state = NULL; + server->nfs_client = NULL; INIT_LIST_HEAD(&server->nfs4_siblings); } @@ -70,9 +70,9 @@ destroy_nfsv4_state(struct nfs_server *server) { kfree(server->mnt_path); server->mnt_path = NULL; - if (server->nfs4_state) { - nfs4_put_client(server->nfs4_state); - server->nfs4_state = NULL; + if (server->nfs_client) { + nfs4_put_client(server->nfs_client); + server->nfs_client = NULL; } } @@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; get_rpccred(cred); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 04748ab9ed55..99926067eca4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); + owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); if (owner_namelen < 0) { printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); + owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); if (owner_grouplen < 0) { printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", iap->ia_gid); @@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons fattr->mode |= fmode; if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) goto xdr_error; - if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0) + if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0) goto xdr_error; - if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0) + if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0) goto xdr_error; if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) goto xdr_error; @@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) if (decode_space_limit(xdr, &res->maxsize) < 0) return -EIO; } - return decode_ace(xdr, NULL, res->server->nfs4_state); + return decode_ace(xdr, NULL, res->server->nfs_client); } static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d03ede5b1aca..ab4c78ee840c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1141,7 +1141,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) - server->nfs4_state = clp; + server->nfs_client = clp; up_write(&clp->cl_sem); clp = NULL; @@ -1151,7 +1151,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, return clnt; } - if (server->nfs4_state->cl_idmap == NULL) { + if (server->nfs_client->cl_idmap == NULL) { dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return ERR_PTR(-ENOMEM); } @@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry) static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) { const struct dentry *dentry = data->dentry; - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct super_block *sb; server->fsid = data->fattr->fsid; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4db90df2aed0..fc20d6b934fb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -43,7 +43,7 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs_client * nfs4_state; /* all NFSv4 state starts here */ + struct nfs_client * nfs_client; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ -- cgit v1.2.3-71-gd317 From b7162792b5c0e0f6e91b8997f8e6bbc76ec5420a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: NFS: Return an error when starting the idmapping pipe Return an error when starting the idmapping pipe so that we can detect it failing. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 12 ++++++++---- fs/nfs/super.c | 3 ++- include/linux/nfs_idmap.h | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index d05148ec9414..231c20ffc0ff 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -108,15 +108,17 @@ static struct rpc_pipe_ops idmap_upcall_ops = { .destroy_msg = idmap_pipe_destroy_msg, }; -void +int nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; + int error; if (clp->cl_idmap != NULL) - return; + return 0; + if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return; + return -ENOMEM; snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), "%s/idmap", clp->cl_rpcclient->cl_pathname); @@ -124,8 +126,9 @@ nfs_idmap_new(struct nfs_client *clp) idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { + error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); - return; + return error; } mutex_init(&idmap->idmap_lock); @@ -135,6 +138,7 @@ nfs_idmap_new(struct nfs_client *clp) idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; clp->cl_idmap = idmap; + return 0; } void diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ab4c78ee840c..3ee85c4e65d8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1136,7 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, clnt->cl_softrtry = 1; clp->cl_rpcclient = clnt; memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - nfs_idmap_new(clp); + if (nfs_idmap_new(clp) < 0) + goto out_fail; } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 678fe68982ef..15a9f3b7289a 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -64,7 +64,7 @@ struct idmap_msg { /* Forward declaration to make this header independent of others */ struct nfs_client; -void nfs_idmap_new(struct nfs_client *); +int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); -- cgit v1.2.3-71-gd317 From 2b3de4411b3ccaeb00018c99d1bbe7203554cf7f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: NFS: Add a lookupfh NFS RPC op Add a lookup filehandle NFS RPC op so that a file handle can be looked up without requiring dentries and inodes and other VFS stuff when doing an NFS4 pathwalk during mounting. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_xdr.h | 3 +++ 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b46597fc81e1..de2006f754ef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1583,6 +1583,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, return status; } +static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + struct nfs4_lookup_arg args = { + .bitmask = server->attr_bitmask, + .dir_fh = dirfh, + .name = name, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(fattr); + + dprintk("NFS call lookupfh %s\n", name->name); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply lookupfh: %d\n", status); + if (status == -NFS4ERR_MOVED) + status = -EREMOTE; + return status; +} + +static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_lookupfh(server, dirfh, name, + fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { @@ -3723,6 +3769,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, + .lookupfh = nfs4_proc_lookupfh, .lookup = nfs4_proc_lookup, .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 41e5a19199e9..26879771831d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -770,6 +770,9 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*lookupfh)(struct nfs_server *, struct nfs_fh *, + struct qstr *, struct nfs_fh *, + struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, -- cgit v1.2.3-71-gd317 From e9326dcab413848e70ab746c7c5363da13e5f801 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: NFS: Add a server capabilities NFS RPC op Add a set_capabilities NFS RPC op so that the server capabilities can be set. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de2006f754ef..850f0851023a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3790,6 +3790,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .statfs = nfs4_proc_statfs, .fsinfo = nfs4_proc_fsinfo, .pathconf = nfs4_proc_pathconf, + .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, .read_done = nfs4_read_done, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 26879771831d..dd9ae6761f71 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -809,6 +809,7 @@ struct nfs_rpc_ops { struct nfs_fsinfo *); int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); + int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); -- cgit v1.2.3-71-gd317 From 24c8dbbb5f777187d660393599641ab3307b4b97 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: NFS: Generalise the nfs_client structure Generalise the nfs_client structure by: (1) Moving nfs_client to a more general place (nfs_fs_sb.h). (2) Renaming its maintenance routines to be non-NFS4 specific. (3) Move those maintenance routines to a new non-NFS4 specific file (client.c) and move the declarations to internal.h. (4) Make nfs_find/get_client() take a full sockaddr_in to include the port number (will be required for NFS2/3). (5) Make nfs_find/get_client() take the NFS protocol version (again will be required to differentiate NFS2, 3 & 4 client records). Also: (6) Make nfs_client construction proceed akin to inodes, marking them as under construction and providing a function to indicate completion. (7) Make nfs_get_client() wait interruptibly if it finds a client that it can share, but that client is currently being constructed. (8) Make nfs4_create_client() use (6) and (7) instead of locking cl_sem. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 6 +- fs/nfs/callback.c | 9 +- fs/nfs/callback_proc.c | 9 +- fs/nfs/client.c | 312 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/delegation.c | 9 +- fs/nfs/internal.h | 6 + fs/nfs/nfs4_fs.h | 52 -------- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 128 +------------------ fs/nfs/super.c | 53 ++++---- include/linux/nfs_fs.h | 1 + include/linux/nfs_fs_sb.h | 60 +++++++++ 12 files changed, 425 insertions(+), 222 deletions(-) create mode 100644 fs/nfs/client.c (limited to 'include/linux') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 0b572a0c1967..3b993a6f8163 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,9 +4,9 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o \ - namespace.o +nfs-y := client.o dir.o file.o inode.o super.o nfs2xdr.o \ + pagelist.o proc.o read.o symlink.o unlink.o \ + write.o namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 1b596b6d9dc2..a3ee11364db0 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -19,6 +19,7 @@ #include "nfs4_fs.h" #include "callback.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -166,15 +167,15 @@ void nfs_callback_down(void) static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct in_addr *addr = &rqstp->rq_addr.sin_addr; + struct sockaddr_in *addr = &rqstp->rq_addr; struct nfs_client *clp; /* Don't talk to strangers */ - clp = nfs4_find_client(addr); + clp = nfs_find_client(addr, 4); if (clp == NULL) return SVC_DROP; - dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); - nfs4_put_client(clp); + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr)); + nfs_put_client(clp); switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 55d6e2ec157f..97cf8f71451f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -10,6 +10,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -22,7 +23,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - clp = nfs4_find_client(&args->addr->sin_addr); + clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -48,7 +49,7 @@ out_iput: up_read(&nfsi->rwsem); iput(inode); out_putclient: - nfs4_put_client(clp); + nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); return res->status; @@ -61,7 +62,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) unsigned res; res = htonl(NFS4ERR_BADHANDLE); - clp = nfs4_find_client(&args->addr->sin_addr); + clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) } iput(inode); out_putclient: - nfs4_put_client(clp); + nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); return res; diff --git a/fs/nfs/client.c b/fs/nfs/client.c new file mode 100644 index 000000000000..cb5e92463bdb --- /dev/null +++ b/fs/nfs/client.c @@ -0,0 +1,312 @@ +/* client.c: NFS client sharing and management code + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +static DEFINE_SPINLOCK(nfs_client_lock); +static LIST_HEAD(nfs_client_list); +static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); + +/* + * Allocate a shared client record + * + * Since these are allocated/deallocated very rarely, we don't + * bother putting them in a slab cache... + */ +static struct nfs_client *nfs_alloc_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) +{ + struct nfs_client *clp; + int error; + + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) + goto error_0; + + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); + __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); + goto error_1; + } + + if (nfsversion == 4) { + if (nfs_callback_up() < 0) + goto error_2; + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + + atomic_set(&clp->cl_count, 1); + clp->cl_cons_state = NFS_CS_INITING; + + clp->cl_nfsversion = nfsversion; + memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + + if (hostname) { + clp->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (!clp->cl_hostname) + goto error_3; + } + + INIT_LIST_HEAD(&clp->cl_superblocks); + clp->cl_rpcclient = ERR_PTR(-EINVAL); + +#ifdef CONFIG_NFS_V4 + init_rwsem(&clp->cl_sem); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_state_owners); + INIT_LIST_HEAD(&clp->cl_unused); + spin_lock_init(&clp->cl_lock); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_boot_time = CURRENT_TIME; + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; +#endif + + return clp; + +error_3: + nfs_callback_down(); + __clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state); +error_2: + rpciod_down(); + __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); +error_1: + kfree(clp); +error_0: + return NULL; +} + +/* + * Destroy a shared client record + */ +static void nfs_free_client(struct nfs_client *clp) +{ + dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); + +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) { + while (!list_empty(&clp->cl_unused)) { + struct nfs4_state_owner *sp; + + sp = list_entry(clp->cl_unused.next, + struct nfs4_state_owner, + so_list); + list_del(&sp->so_list); + kfree(sp); + } + BUG_ON(!list_empty(&clp->cl_state_owners)); + nfs_idmap_delete(clp); + } +#endif + + /* -EIO all pending I/O */ + if (!IS_ERR(clp->cl_rpcclient)) + rpc_shutdown_client(clp->cl_rpcclient); + + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(); + + if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) + rpciod_down(); + + kfree(clp->cl_hostname); + kfree(clp); + + dprintk("<-- nfs_free_client()\n"); +} + +/* + * Release a reference to a shared client record + */ +void nfs_put_client(struct nfs_client *clp) +{ + dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + + if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + list_del(&clp->cl_share_link); + spin_unlock(&nfs_client_lock); + + BUG_ON(!list_empty(&clp->cl_superblocks)); + + nfs_free_client(clp); + } +} + +/* + * Find a client by address + * - caller must hold nfs_client_lock + */ +static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +{ + struct nfs_client *clp; + + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + /* Different NFS versions cannot share the same nfs_client */ + if (clp->cl_nfsversion != nfsversion) + continue; + + if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr, + sizeof(clp->cl_addr.sin_addr)) != 0) + continue; + + if (clp->cl_addr.sin_port == addr->sin_port) + goto found; + } + + return NULL; + +found: + atomic_inc(&clp->cl_count); + return clp; +} + +/* + * Find a client by IP address and protocol version + * - returns NULL if no such client + */ +struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + clp = __nfs_find_client(addr, nfsversion); + spin_unlock(&nfs_client_lock); + + BUG_ON(clp->cl_cons_state == 0); + + return clp; +} + +/* + * Look up a client by IP address and protocol version + * - creates a new record if one doesn't yet exist + */ +struct nfs_client *nfs_get_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) +{ + struct nfs_client *clp, *new = NULL; + int error; + + dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n", + hostname ?: "", NIPQUAD(addr->sin_addr), + addr->sin_port, nfsversion); + + /* see if the client already exists */ + do { + spin_lock(&nfs_client_lock); + + clp = __nfs_find_client(addr, nfsversion); + if (clp) + goto found_client; + if (new) + goto install_client; + + spin_unlock(&nfs_client_lock); + + new = nfs_alloc_client(hostname, addr, nfsversion); + } while (new); + + return ERR_PTR(-ENOMEM); + + /* install a new client and return with it unready */ +install_client: + clp = new; + list_add(&clp->cl_share_link, &nfs_client_list); + spin_unlock(&nfs_client_lock); + dprintk("--> nfs_get_client() = %p [new]\n", clp); + return clp; + + /* found an existing client + * - make sure it's ready before returning + */ +found_client: + spin_unlock(&nfs_client_lock); + + if (new) + nfs_free_client(new); + + if (clp->cl_cons_state == NFS_CS_INITING) { + DECLARE_WAITQUEUE(myself, current); + + add_wait_queue(&nfs_client_active_wq, &myself); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current) || + clp->cl_cons_state > NFS_CS_READY) + break; + schedule(); + } + + remove_wait_queue(&nfs_client_active_wq, &myself); + + if (signal_pending(current)) { + nfs_put_client(clp); + return ERR_PTR(-ERESTARTSYS); + } + } + + if (clp->cl_cons_state < NFS_CS_READY) { + error = clp->cl_cons_state; + nfs_put_client(clp); + return ERR_PTR(error); + } + + dprintk("--> nfs_get_client() = %p [share]\n", clp); + return clp; +} + +/* + * Mark a server as ready or failed + */ +void nfs_mark_client_ready(struct nfs_client *clp, int state) +{ + clp->cl_cons_state = state; + wake_up_all(&nfs_client_active_wq); +} diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index cfe239736ac0..57133678db16 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -18,6 +18,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "internal.h" static struct nfs_delegation *nfs_alloc_delegation(void) { @@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct sizeof(delegation->stateid)) != 0 || delegation->type != nfsi->delegation->type) { printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", - __FUNCTION__, NIPQUAD(clp->cl_addr)); + __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr)); status = -EIO; } } @@ -254,7 +255,7 @@ restart: } out: spin_unlock(&clp->cl_lock); - nfs4_put_client(clp); + nfs_put_client(clp); module_put_and_exit(0); } @@ -266,10 +267,10 @@ void nfs_expire_all_delegations(struct nfs_client *clp) atomic_inc(&clp->cl_count); task = kthread_run(nfs_do_expire_all_delegations, clp, "%u.%u.%u.%u-delegreturn", - NIPQUAD(clp->cl_addr)); + NIPQUAD(clp->cl_addr.sin_addr)); if (!IS_ERR(task)) return; - nfs4_put_client(clp); + nfs_put_client(clp); module_put(THIS_MODULE); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4802157963f8..ac370d5d4494 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -15,6 +15,12 @@ struct nfs_clone_mount { rpc_authflavor_t authflavor; }; +/* client.c */ +extern void nfs_put_client(struct nfs_client *); +extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); +extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int); +extern void nfs_mark_client_ready(struct nfs_client *, int); + /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4e334cb48498..e7879245361e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -42,55 +42,6 @@ enum nfs4_client_state { NFS4CLNT_LEASE_EXPIRED, }; -/* - * The nfs_client identifies our client state to the server. - */ -struct nfs_client { - struct list_head cl_servers; /* Global list of servers */ - struct in_addr cl_addr; /* Server identifier */ - u64 cl_clientid; /* constant */ - nfs4_verifier cl_confirm; - unsigned long cl_state; - - u32 cl_lockowner_id; - - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - - struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; - spinlock_t cl_lock; - atomic_t cl_count; - - struct rpc_clnt * cl_rpcclient; - - struct list_head cl_superblocks; /* List of nfs_server structs */ - - unsigned long cl_lease_time; - unsigned long cl_last_renewal; - struct work_struct cl_renewd; - struct work_struct cl_recoverd; - - struct rpc_wait_queue cl_rpcwaitq; - - /* used for the setclientid verifier */ - struct timespec cl_boot_time; - - /* idmapper */ - struct idmap * cl_idmap; - - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char cl_ipaddr[16]; - unsigned char cl_id_uniquifier; -}; - /* * struct rpc_sequence ensures that RPC calls are sent in the exact * order that they appear on the list. @@ -239,9 +190,6 @@ extern void nfs4_renew_state(void *); /* nfs4state.c */ extern void init_nfsv4_state(struct nfs_server *); extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs_client *clp); -extern struct nfs_client *nfs4_find_client(struct in_addr *); struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 850f0851023a..803c31b88bb5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2968,7 +2968,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po for(;;) { setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", - clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr), cred->cr_ops->cr_name, clp->cl_id_uniquifier); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fa51a7d4c022..058811e39555 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -50,12 +50,12 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "internal.h" #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; -static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); void @@ -71,127 +71,11 @@ destroy_nfsv4_state(struct nfs_server *server) kfree(server->mnt_path); server->mnt_path = NULL; if (server->nfs_client) { - nfs4_put_client(server->nfs_client); + nfs_put_client(server->nfs_client); server->nfs_client = NULL; } } -/* - * nfs4_get_client(): returns an empty client structure - * nfs4_put_client(): drops reference to client structure - * - * Since these are allocated/deallocated very rarely, we don't - * bother putting them in a slab cache... - */ -static struct nfs_client * -nfs4_alloc_client(struct in_addr *addr) -{ - struct nfs_client *clp; - - if (nfs_callback_up() < 0) - return NULL; - if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { - nfs_callback_down(); - return NULL; - } - memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); - init_rwsem(&clp->cl_sem); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); - spin_lock_init(&clp->cl_lock); - atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); - INIT_LIST_HEAD(&clp->cl_superblocks); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_rpcclient = ERR_PTR(-EINVAL); - clp->cl_boot_time = CURRENT_TIME; - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - return clp; -} - -static void -nfs4_free_client(struct nfs_client *clp) -{ - struct nfs4_state_owner *sp; - - while (!list_empty(&clp->cl_unused)) { - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); - nfs_idmap_delete(clp); - if (!IS_ERR(clp->cl_rpcclient)) - rpc_shutdown_client(clp->cl_rpcclient); - kfree(clp); - nfs_callback_down(); -} - -static struct nfs_client *__nfs4_find_client(struct in_addr *addr) -{ - struct nfs_client *clp; - list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { - if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { - atomic_inc(&clp->cl_count); - return clp; - } - } - return NULL; -} - -struct nfs_client *nfs4_find_client(struct in_addr *addr) -{ - struct nfs_client *clp; - spin_lock(&state_spinlock); - clp = __nfs4_find_client(addr); - spin_unlock(&state_spinlock); - return clp; -} - -struct nfs_client * -nfs4_get_client(struct in_addr *addr) -{ - struct nfs_client *clp, *new = NULL; - - spin_lock(&state_spinlock); - for (;;) { - clp = __nfs4_find_client(addr); - if (clp != NULL) - break; - clp = new; - if (clp != NULL) { - list_add(&clp->cl_servers, &nfs4_clientid_list); - new = NULL; - break; - } - spin_unlock(&state_spinlock); - new = nfs4_alloc_client(addr); - spin_lock(&state_spinlock); - if (new == NULL) - break; - } - spin_unlock(&state_spinlock); - if (new) - nfs4_free_client(new); - return clp; -} - -void -nfs4_put_client(struct nfs_client *clp) -{ - if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock)) - return; - list_del(&clp->cl_servers); - spin_unlock(&state_spinlock); - BUG_ON(!list_empty(&clp->cl_superblocks)); - rpc_wake_up(&clp->cl_rpcwaitq); - nfs4_kill_renewd(clp); - nfs4_free_client(clp); -} - static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, @@ -771,11 +655,11 @@ static void nfs4_recover_state(struct nfs_client *clp) __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", - NIPQUAD(clp->cl_addr)); + NIPQUAD(clp->cl_addr.sin_addr)); if (!IS_ERR(task)) return; nfs4_clear_recover_bit(clp); - nfs4_put_client(clp); + nfs_put_client(clp); module_put(THIS_MODULE); } @@ -970,12 +854,12 @@ out: if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); - nfs4_put_client(clp); + nfs_put_client(clp); module_put_and_exit(0); return 0; out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", - NIPQUAD(clp->cl_addr.s_addr), -status); + NIPQUAD(clp->cl_addr.sin_addr), -status); set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3ee85c4e65d8..f97d7d9c5c32 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1104,47 +1104,46 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, struct rpc_clnt *clnt = NULL; int err = -EIO; - clp = nfs4_get_client(&server->addr.sin_addr); + clp = nfs_get_client(server->hostname, &server->addr, 4); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return ERR_PTR(err); } /* Now create transport and client */ - down_write(&clp->cl_sem); - if (IS_ERR(clp->cl_rpcclient)) { + if (clp->cl_cons_state == NFS_CS_INITING) { xprt = xprt_create_proto(proto, &server->addr, timeparms); if (IS_ERR(xprt)) { - up_write(&clp->cl_sem); err = PTR_ERR(xprt); dprintk("%s: cannot create RPC transport. Error = %d\n", __FUNCTION__, err); - goto out_fail; + goto client_init_error; } /* Bind to a reserved port! */ xprt->resvport = 1; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, flavor); if (IS_ERR(clnt)) { - up_write(&clp->cl_sem); err = PTR_ERR(clnt); dprintk("%s: cannot create RPC client. Error = %d\n", __FUNCTION__, err); - goto out_fail; + goto client_init_error; } clnt->cl_intr = 1; clnt->cl_softrtry = 1; clp->cl_rpcclient = clnt; memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - if (nfs_idmap_new(clp) < 0) - goto out_fail; + err = nfs_idmap_new(clp); + if (err < 0) { + dprintk("%s: failed to create idmapper.\n", + __FUNCTION__); + goto client_init_error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + nfs_mark_client_ready(clp, 0); } - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); - if (!IS_ERR(clnt)) - server->nfs_client = clp; - up_write(&clp->cl_sem); - clp = NULL; if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %d\n", @@ -1152,11 +1151,6 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, return clnt; } - if (server->nfs_client->cl_idmap == NULL) { - dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return ERR_PTR(-ENOMEM); - } - if (clnt->cl_auth->au_flavor != flavor) { struct rpc_auth *auth; @@ -1166,11 +1160,16 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, return (struct rpc_clnt *)auth; } } + + server->nfs_client = clp; + down_write(&clp->cl_sem); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); return clnt; - out_fail: - if (clp) - nfs4_put_client(clp); +client_init_error: + nfs_mark_client_ready(clp, err); + nfs_put_client(clp); return ERR_PTR(err); } @@ -1329,14 +1328,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type, goto out_free; } - /* Fire up rpciod if not yet running */ - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto out_free; - } - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); @@ -1383,8 +1374,6 @@ static void nfs4_kill_super(struct super_block *sb) destroy_nfsv4_state(server); - rpciod_down(); - nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a36e01cd6321..70e1dc9162e2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -586,6 +586,7 @@ extern void * nfs_root_data(void); #define NFSDBG_FILE 0x0040 #define NFSDBG_ROOT 0x0080 #define NFSDBG_CALLBACK 0x0100 +#define NFSDBG_CLIENT 0x0200 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fc20d6b934fb..a727657e0ad3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -6,6 +6,66 @@ struct nfs_iostats; +/* + * The nfs_client identifies our client state to the server. + */ +struct nfs_client { + atomic_t cl_count; + int cl_cons_state; /* current construction state (-ve: init error) */ +#define NFS_CS_READY 0 /* ready to be used */ +#define NFS_CS_INITING 1 /* busy initialising */ + int cl_nfsversion; /* NFS protocol version */ + unsigned long cl_res_state; /* NFS resources state */ +#define NFS_CS_RPCIOD 0 /* - rpciod started */ +#define NFS_CS_CALLBACK 1 /* - callback started */ +#define NFS_CS_IDMAP 2 /* - idmap started */ + struct sockaddr_in cl_addr; /* server identifier */ + char * cl_hostname; /* hostname of server */ + struct list_head cl_share_link; /* link in global client list */ + struct list_head cl_superblocks; /* List of nfs_server structs */ + + struct rpc_clnt * cl_rpcclient; + +#ifdef CONFIG_NFS_V4 + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +#endif +}; + /* * NFS client parameters stored in the superblock. */ -- cgit v1.2.3-71-gd317 From 509de8111656a7d89b4a1a5f430f4460ce510f0f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: NFS: Add extra const qualifiers Add some extra const qualifiers into NFS. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 3 ++- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4namespace.c | 8 ++++---- fs/nfs/nfs4proc.c | 2 +- fs/nfs/proc.c | 2 +- fs/nfs/super.c | 10 +++++----- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 6 +++--- 8 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 86b3169c8cac..85d9ed1dcf42 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -172,7 +172,8 @@ void nfs_release_automount_timer(void) /* * Clone a mountpoint of the appropriate type */ -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, + const char *devname, struct nfs_clone_mount *mountdata) { #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7143b1f82cea..3e5371241cea 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -886,7 +886,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); } -struct nfs_rpc_ops nfs_v3_clientops = { +const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs3_dir_inode_operations, diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index ea38d27b74e6..faed9bcba50f 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -23,7 +23,7 @@ /* * Check if fs_root is valid */ -static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, +static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, char *buffer, ssize_t buflen) { char *end = buffer + buflen; @@ -34,7 +34,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, n = pathname->ncomponents; while (--n >= 0) { - struct nfs4_string *component = &pathname->components[n]; + const struct nfs4_string *component = &pathname->components[n]; buflen -= component->len + 1; if (buflen < 0) goto Elong; @@ -60,7 +60,7 @@ Elong: */ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, const struct dentry *dentry, - struct nfs4_fs_locations *locations) + const struct nfs4_fs_locations *locations) { struct vfsmount *mnt = ERR_PTR(-ENOENT); struct nfs_clone_mount mountdata = { @@ -108,7 +108,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, loc = 0; while (loc < locations->nlocations && IS_ERR(mnt)) { - struct nfs4_fs_location *location = &locations->locations[loc]; + const struct nfs4_fs_location *location = &locations->locations[loc]; char *mnt_path; if (location == NULL || location->nservers <= 0 || diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 803c31b88bb5..061be713b206 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3761,7 +3761,7 @@ static struct inode_operations nfs4_file_inode_operations = { .listxattr = nfs4_listxattr, }; -struct nfs_rpc_ops nfs_v4_clientops = { +const struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b3899ea3229e..77676903e0f5 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -671,7 +671,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) } -struct nfs_rpc_ops nfs_v2_clientops = { +const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a41d516ed595..c97f30967955 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -329,10 +329,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) */ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { - static struct proc_nfs_info { + static const struct proc_nfs_info { int flag; - char *str; - char *nostr; + const char *str; + const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, @@ -342,9 +342,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; - struct proc_nfs_info *nfs_infop; + const struct proc_nfs_info *nfs_infop; char buf[12]; - char *proto; + const char *proto; seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a727657e0ad3..95f32d5f6e9c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -73,7 +73,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index dd9ae6761f71..2426b11b6cce 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -833,9 +833,9 @@ struct nfs_rpc_ops { /* * Function vectors etc. for the NFS client */ -extern struct nfs_rpc_ops nfs_v2_clientops; -extern struct nfs_rpc_ops nfs_v3_clientops; -extern struct nfs_rpc_ops nfs_v4_clientops; +extern const struct nfs_rpc_ops nfs_v2_clientops; +extern const struct nfs_rpc_ops nfs_v3_clientops; +extern const struct nfs_rpc_ops nfs_v4_clientops; extern struct rpc_version nfs_version2; extern struct rpc_version nfs_version3; extern struct rpc_version nfs_version4; -- cgit v1.2.3-71-gd317 From 27951bd26031f6c27d38df9e94623bbe208a2464 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: NFS: Maintain a common server record for NFS2/3 as well as for NFS4 Maintain a common server record for NFS2/3 as well as for NFS4 so that common stuff can be moved there from struct nfs_server. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 21 ++++++++++++++++++++- include/linux/nfs_fs_sb.h | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c97f30967955..d1b4a5b36e33 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -658,11 +658,19 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned static struct rpc_clnt * nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) { + struct nfs_client *clp; struct rpc_timeout timeparms; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + clp = nfs_get_client(server->hostname, &server->addr, + server->rpc_ops->version); + if (!clp) { + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); + return ERR_PTR(PTR_ERR(clp)); + } + nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); server->retrans_timeo = timeparms.to_initval; @@ -673,6 +681,8 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) if (IS_ERR(xprt)) { dprintk("%s: cannot create RPC transport. Error = %ld\n", __FUNCTION__, PTR_ERR(xprt)); + nfs_mark_client_ready(clp, PTR_ERR(xprt)); + nfs_put_client(clp); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, @@ -686,9 +696,13 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) clnt->cl_intr = 1; clnt->cl_softrtry = 1; + nfs_mark_client_ready(clp, 0); + server->nfs_client = clp; return clnt; out_fail: + nfs_mark_client_ready(clp, PTR_ERR(xprt)); + nfs_put_client(clp); return clnt; } @@ -764,6 +778,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data, if (server == NULL) goto out_err; memcpy(server, parent, sizeof(*server)); + atomic_inc(&server->nfs_client->cl_count); hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; len = strlen(hostname) + 1; server->hostname = kmalloc(len, GFP_KERNEL); @@ -796,6 +811,7 @@ out_deactivate: out_rpciod_down: rpciod_down(); kfree(server->hostname); + nfs_put_client(server->nfs_client); kfree(server); return simple_set_mnt(mnt, sb); kill_rpciod: @@ -803,6 +819,7 @@ kill_rpciod: free_hostname: kfree(server->hostname); free_server: + nfs_put_client(server->nfs_client); kfree(server); out_err: return error; @@ -1071,6 +1088,7 @@ static void nfs_kill_super(struct super_block *s) nfs_free_iostats(server->io_stats); kfree(server->hostname); + nfs_put_client(server->nfs_client); kfree(server); nfs_release_automount_timer(); } @@ -1421,7 +1439,6 @@ static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_c nfs4_server_capabilities(server, &server->fh); down_write(&clp->cl_sem); - atomic_inc(&clp->cl_count); list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); up_write(&clp->cl_sem); return sb; @@ -1476,6 +1493,8 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf retrans = 1; nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + nfs_put_client(server->nfs_client); + server->nfs_client = NULL; server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); if (IS_ERR((err = server->client))) goto out_err; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 95f32d5f6e9c..e7d7662f51fd 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -70,6 +70,7 @@ struct nfs_client { * NFS client parameters stored in the superblock. */ struct nfs_server { + struct nfs_client * nfs_client; /* shared client and NFS4 state */ struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ @@ -103,7 +104,6 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs_client * nfs_client; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ -- cgit v1.2.3-71-gd317 From 8fa5c000d7f986ef9cdc6d95f9f7fcee20e0a7d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: NFS: Move rpc_ops from nfs_server to nfs_client Move the rpc_ops from the nfs_server struct to the nfs_client struct as they're common to all server records of a particular NFS protocol version. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/namespace.c | 6 +++-- fs/nfs/nfs4proc.c | 2 +- fs/nfs/super.c | 59 ++++++++++++++++++++++++++--------------------- include/linux/nfs_fs.h | 2 +- include/linux/nfs_fs_sb.h | 2 +- 7 files changed, 43 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 067d144d141b..19362712452f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1147,7 +1147,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, } if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); - error = server->rpc_ops->getattr(server, fhandle, fattr); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) goto out_err; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6ed018c9aad2..771c3b833757 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -237,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 85d9ed1dcf42..d8b8d56266cb 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -104,7 +104,9 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_follow; /* Look it up again */ parent = dget_parent(nd->dentry); - err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, + &nd->dentry->d_name, + &fh, &fattr); dput(parent); if (err != 0) goto out_err; @@ -178,7 +180,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, { #ifdef CONFIG_NFS_V4 struct vfsmount *mnt = NULL; - switch (server->rpc_ops->version) { + switch (server->nfs_client->cl_nfsversion) { case 2: case 3: mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b731b1945270..1573eeb07ce1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); + return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); return 0; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d1b4a5b36e33..e1e5eab0259b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -252,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) lock_kernel(); - error = server->rpc_ops->statfs(server, fh, &res); + error = server->nfs_client->rpc_ops->statfs(server, fh, &res); buf->f_type = NFS_SUPER_MAGIC; if (error < 0) goto out_err; @@ -343,10 +343,11 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; + struct nfs_client *clp = nfss->nfs_client; char buf[12]; const char *proto; - seq_printf(m, ",vers=%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", clp->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); if (nfss->acregmin != 3*HZ || showdefaults) @@ -427,7 +428,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",namelen=%d", nfss->namelen); #ifdef CONFIG_NFS_V4 - if (nfss->rpc_ops->version == 4) { + if (nfss->nfs_client->cl_nfsversion == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); @@ -503,7 +504,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f struct nfs_server *server = NFS_SB(sb); int error; - error = server->rpc_ops->getroot(server, rootfh, fsinfo); + error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); return ERR_PTR(error); @@ -553,14 +554,14 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) no_root_error = -ENOMEM; goto out_no_root; } - sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; /* mount time stamp, in seconds */ server->mount_time = jiffies; /* Get some general file system info */ if (server->namelen == 0 && - server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) + server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) server->namelen = pathinfo.max_namelen; /* Work out a lot of parameters */ if (server->rsize == 0) @@ -663,9 +664,14 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + int nfsversion = 2; - clp = nfs_get_client(server->hostname, &server->addr, - server->rpc_ops->version); +#ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) + nfsversion = 3; +#endif + + clp = nfs_get_client(server->hostname, &server->addr, nfsversion); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return ERR_PTR(PTR_ERR(clp)); @@ -676,6 +682,19 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) server->retrans_timeo = timeparms.to_initval; server->retrans_count = timeparms.to_retries; + /* Check NFS protocol revision and initialize RPC op vector + * and file handle pool. */ +#ifdef CONFIG_NFS_V3 + if (nfsversion == 3) { + clp->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + clp->rpc_ops = &nfs_v2_clientops; + } +#else + clp->rpc_ops = &nfs_v2_clientops; +#endif + /* create transport and client */ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { @@ -686,7 +705,7 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, data->pseudoflavor); + clp->cl_nfsversion, data->pseudoflavor); if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %ld\n", __FUNCTION__, PTR_ERR(xprt)); @@ -750,7 +769,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl fsinfo.fattr = data->fattr; if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; sb->s_flags |= MS_ACTIVE; return server; out_put_root: @@ -865,19 +884,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return -ENOMEM; strcpy(server->hostname, data->hostname); - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) { - server->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - server->rpc_ops = &nfs_v2_clientops; - } -#else - server->rpc_ops = &nfs_v2_clientops; -#endif - /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) data->pseudoflavor = RPC_AUTH_UNIX; @@ -888,6 +894,7 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) server->client = nfs_create_client(server, data); if (IS_ERR(server->client)) return PTR_ERR(server->client); + /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { struct rpc_auth *auth; @@ -1129,6 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, /* Now create transport and client */ if (clp->cl_cons_state == NFS_CS_INITING) { + clp->rpc_ops = &nfs_v4_clientops; + xprt = xprt_create_proto(proto, &server->addr, timeparms); if (IS_ERR(xprt)) { err = PTR_ERR(xprt); @@ -1139,7 +1148,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, /* Bind to a reserved port! */ xprt->resvport = 1; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, flavor); + clp->cl_nfsversion, flavor); if (IS_ERR(clnt)) { err = PTR_ERR(clnt); dprintk("%s: cannot create RPC client. Error = %d\n", @@ -1215,8 +1224,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; - server->rpc_ops = &nfs_v4_clientops; - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); server->retrans_timeo = timeparms.to_initval; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 70e1dc9162e2..51e9bd90dedc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -215,7 +215,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_FH(inode) (&NFS_I(inode)->fh) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) -#define NFS_PROTO(inode) (NFS_SERVER(inode)->rpc_ops) +#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e7d7662f51fd..aae7c117597a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -25,6 +25,7 @@ struct nfs_client { struct list_head cl_superblocks; /* List of nfs_server structs */ struct rpc_clnt * cl_rpcclient; + const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ @@ -74,7 +75,6 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ -- cgit v1.2.3-71-gd317 From 5006a76cca8f86c6975c16fcf67e83b8b0eee2b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: NFS: Eliminate client_sys in favour of cl_rpcclient Eliminate nfs_server::client_sys in favour of nfs_client::cl_rpcclient as we only really need one per server that we're talking to since it doesn't have any security on it. The retransmission management variables are also moved to the common struct as they're required to set up the cl_rpcclient connection. The NFS2/3 client and client_acl connections are thenceforth derived by cloning the cl_rpcclient connection and post-applying the authorisation flavour. The code for setting up the initial common connection has been moved to client.c as nfs_create_rpc_client(). All the NFS program definition tables are also moved there as that's where they're now required rather than super.c. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 119 +++++++++++++++++++++++++ fs/nfs/internal.h | 2 + fs/nfs/nfs3proc.c | 6 +- fs/nfs/proc.c | 4 +- fs/nfs/super.c | 222 +++++++++++----------------------------------- include/linux/nfs_fs_sb.h | 5 +- 6 files changed, 179 insertions(+), 179 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index cb5e92463bdb..c08cab935ad5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -50,6 +50,48 @@ static DEFINE_SPINLOCK(nfs_client_lock); static LIST_HEAD(nfs_client_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); +/* + * RPC cruft for NFS + */ +static struct rpc_version *nfs_version[5] = { + [2] = &nfs_version2, +#ifdef CONFIG_NFS_V3 + [3] = &nfs_version3, +#endif +#ifdef CONFIG_NFS_V4 + [4] = &nfs_version4, +#endif +}; + +struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = ARRAY_SIZE(nfs_version), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; + + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + /* * Allocate a shared client record * @@ -310,3 +352,80 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } + +/* + * Initialise the timeout values for a connection + */ +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, + unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + if (!to->to_retries) + to->to_retries = 2; + + switch (proto) { + case IPPROTO_TCP: + if (!to->to_initval) + to->to_initval = 60 * HZ; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + to->to_exponential = 0; + break; + case IPPROTO_UDP: + default: + if (!to->to_initval) + to->to_initval = 11 * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + } +} + +/* + * Create an RPC client handle + */ +int nfs_create_rpc_client(struct nfs_client *clp, int proto, + unsigned int timeo, + unsigned int retrans, + rpc_authflavor_t flavor) +{ + struct rpc_timeout timeparms; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + + if (!IS_ERR(clp->cl_rpcclient)) + return 0; + + nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + clp->retrans_timeo = timeparms.to_initval; + clp->retrans_count = timeparms.to_retries; + + /* create transport and client */ + xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms); + if (IS_ERR(xprt)) { + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + return PTR_ERR(xprt); + } + + /* Bind to a reserved port! */ + xprt->resvport = 1; + /* Create the client RPC handle */ + clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program, + clp->rpc_ops->version, RPC_AUTH_UNIX); + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(clnt)); + return PTR_ERR(clnt); + } + + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clp->cl_rpcclient = clnt; + return 0; +} diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ac370d5d4494..2f3aa52fbefc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -20,6 +20,8 @@ extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int); extern void nfs_mark_client_ready(struct nfs_client *, int); +extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int, + unsigned int, rpc_authflavor_t); /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3e5371241cea..0622af0122be 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, int status; status = do_proc_get_root(server->client, fhandle, info); - if (status && server->client_sys != server->client) - status = do_proc_get_root(server->client_sys, fhandle, info); + if (status && server->nfs_client->cl_rpcclient != server->client) + status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info); return status; } @@ -785,7 +785,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 77676903e0f5..5a8b9407ee9a 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("%s: call getattr\n", __FUNCTION__); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) return status; dprintk("%s: call statfs\n", __FUNCTION__); msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; msg.rpc_resp = &fsinfo; - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); if (status) return status; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e1e5eab0259b..85583414a3ca 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -60,52 +60,6 @@ */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) -/* - * RPC cruft for NFS - */ -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2, -#if defined(CONFIG_NFS_V3) - &nfs_version3, -#elif defined(CONFIG_NFS_V4) - NULL, -#endif -#if defined(CONFIG_NFS_V4) - &nfs_version4, -#endif -}; - -static struct rpc_program nfs_program = { - .name = "nfs", - .number = NFS_PROGRAM, - .nrvers = ARRAY_SIZE(nfs_version), - .version = nfs_version, - .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", -}; - -struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; - - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ - static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); @@ -376,8 +330,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, proto = buf; } seq_printf(m, ",proto=%s", proto); - seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); - seq_printf(m, ",retrans=%u", nfss->retrans_count); + seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", clp->retrans_count); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); } @@ -621,38 +575,6 @@ out_no_root: return no_root_error; } -/* - * Initialise the timeout values for a connection - */ -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) -{ - to->to_initval = timeo * HZ / 10; - to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; - - switch (proto) { - case IPPROTO_TCP: - if (!to->to_initval) - to->to_initval = 60 * HZ; - if (to->to_initval > NFS_MAX_TCP_TIMEOUT) - to->to_initval = NFS_MAX_TCP_TIMEOUT; - to->to_increment = to->to_initval; - to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); - to->to_exponential = 0; - break; - case IPPROTO_UDP: - default: - if (!to->to_initval) - to->to_initval = 11 * HZ / 10; - if (to->to_initval > NFS_MAX_UDP_TIMEOUT) - to->to_initval = NFS_MAX_UDP_TIMEOUT; - to->to_maxval = NFS_MAX_UDP_TIMEOUT; - to->to_exponential = 1; - break; - } -} - /* * Create an RPC client handle. */ @@ -660,11 +582,10 @@ static struct rpc_clnt * nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) { struct nfs_client *clp; - struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; + struct rpc_clnt *clnt; int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; int nfsversion = 2; + int err; #ifdef CONFIG_NFS_V3 if (server->flags & NFS_MOUNT_VER3) @@ -677,52 +598,54 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return ERR_PTR(PTR_ERR(clp)); } - nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ + if (clp->cl_cons_state == NFS_CS_INITING) { + /* Check NFS protocol revision and initialize RPC op + * vector and file handle pool. */ #ifdef CONFIG_NFS_V3 - if (nfsversion == 3) { - clp->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - clp->rpc_ops = &nfs_v2_clientops; - } + if (nfsversion == 3) { + clp->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + clp->rpc_ops = &nfs_v2_clientops; + } #else - clp->rpc_ops = &nfs_v2_clientops; + clp->rpc_ops = &nfs_v2_clientops; #endif - /* create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - nfs_mark_client_ready(clp, PTR_ERR(xprt)); - nfs_put_client(clp); - return (struct rpc_clnt *)xprt; + /* create transport and client */ + err = nfs_create_rpc_client(clp, proto, data->timeo, + data->retrans, RPC_AUTH_UNIX); + if (err < 0) + goto client_init_error; + + nfs_mark_client_ready(clp, 0); } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - clp->cl_nfsversion, data->pseudoflavor); + + /* create an nfs_server-specific client */ + clnt = rpc_clone_client(clp->cl_rpcclient); if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - goto out_fail; + dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); + nfs_put_client(clp); + return ERR_PTR(PTR_ERR(clnt)); } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; + if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(data->pseudoflavor, server->client); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return ERR_PTR(PTR_ERR(auth)); + } + } - nfs_mark_client_ready(clp, 0); server->nfs_client = clp; return clnt; -out_fail: - nfs_mark_client_ready(clp, PTR_ERR(xprt)); +client_init_error: + nfs_mark_client_ready(clp, err); nfs_put_client(clp); - return clnt; + return ERR_PTR(err); } /* @@ -741,7 +664,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl sb->s_blocksize_bits = data->sb->s_blocksize_bits; sb->s_maxbytes = data->sb->s_maxbytes; - server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client_acl = ERR_PTR(-EINVAL); server->io_stats = nfs_alloc_iostats(); if (server->io_stats == NULL) goto out; @@ -750,11 +673,6 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl if (IS_ERR((err = server->client))) goto out; - if (!IS_ERR(parent->client_sys)) { - server->client_sys = rpc_clone_client(parent->client_sys); - if (IS_ERR((err = server->client_sys))) - goto out; - } if (!IS_ERR(parent->client_acl)) { server->client_acl = rpc_clone_client(parent->client_acl); if (IS_ERR((err = server->client_acl))) @@ -813,7 +731,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data, error = PTR_ERR(sb); goto kill_rpciod; } - + if (sb->s_root) goto out_rpciod_down; @@ -896,19 +814,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ - if (authflavor != RPC_AUTH_UNIX) { - struct rpc_auth *auth; - - server->client_sys = rpc_clone_client(server->client); - if (IS_ERR(server->client_sys)) - return PTR_ERR(server->client_sys); - auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); - if (IS_ERR(auth)) - return PTR_ERR(auth); - } else { - atomic_inc(&server->client->cl_count); - server->client_sys = server->client; - } if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3_ACL if (!(server->flags & NFS_MOUNT_NOACL)) { @@ -1012,7 +917,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, goto out_err_noserver; /* Zero out the NFS state stuff */ init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) @@ -1083,8 +988,6 @@ static void nfs_kill_super(struct super_block *s) if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_sys)) - rpc_shutdown_client(server->client_sys); if (!IS_ERR(server->client_acl)) rpc_shutdown_client(server->client_acl); @@ -1121,10 +1024,9 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type, #ifdef CONFIG_NFS_V4 static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, - struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) + int timeo, int retrans, int proto, rpc_authflavor_t flavor) { struct nfs_client *clp; - struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int err = -EIO; @@ -1138,26 +1040,10 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, if (clp->cl_cons_state == NFS_CS_INITING) { clp->rpc_ops = &nfs_v4_clientops; - xprt = xprt_create_proto(proto, &server->addr, timeparms); - if (IS_ERR(xprt)) { - err = PTR_ERR(xprt); - dprintk("%s: cannot create RPC transport. Error = %d\n", - __FUNCTION__, err); + err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor); + if (err < 0) goto client_init_error; - } - /* Bind to a reserved port! */ - xprt->resvport = 1; - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - clp->cl_nfsversion, flavor); - if (IS_ERR(clnt)) { - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto client_init_error; - } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clp->cl_rpcclient = clnt; + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); err = nfs_idmap_new(clp); if (err < 0) { @@ -1205,7 +1091,6 @@ client_init_error: static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; - struct rpc_timeout timeparms; rpc_authflavor_t authflavour; int err = -EIO; @@ -1224,11 +1109,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { @@ -1244,7 +1124,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } } - server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); + server->client = nfs4_create_client(server, data->timeo, data->retrans, + data->proto, authflavour); if (IS_ERR(server->client)) { err = PTR_ERR(server->client); dprintk("%s: cannot create RPC client. Error = %d\n", @@ -1318,7 +1199,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, return -ENOMEM; /* Zero out the NFS state stuff */ init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) @@ -1489,7 +1370,6 @@ err: static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) { struct nfs_server *server = NFS_SB(sb); - struct rpc_timeout timeparms; int proto, timeo, retrans; void *err; @@ -1498,11 +1378,11 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf set the timeouts and retries to low values */ timeo = 2; retrans = 1; - nfs_init_timeout_values(&timeparms, proto, timeo, retrans); nfs_put_client(server->nfs_client); server->nfs_client = NULL; - server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); + server->client = nfs4_create_client(server, timeo, retrans, proto, + data->authflavor); if (IS_ERR((err = server->client))) goto out_err; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index aae7c117597a..d404ceca9168 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -26,6 +26,8 @@ struct nfs_client { struct rpc_clnt * cl_rpcclient; const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ @@ -73,7 +75,6 @@ struct nfs_client { struct nfs_server { struct nfs_client * nfs_client; /* shared client and NFS4 state */ struct rpc_clnt * client; /* RPC client handle */ - struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; @@ -90,8 +91,6 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; - unsigned long retrans_timeo; /* retransmit timeout */ - unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; -- cgit v1.2.3-71-gd317 From 54ceac4515986030c2502960be620198dd8fe25b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:13 -0400 Subject: NFS: Share NFS superblocks per-protocol per-server per-FSID The attached patch makes NFS share superblocks between mounts from the same server and FSID over the same protocol. It does this by creating each superblock with a false root and returning the real root dentry in the vfsmount presented by get_sb(). The root dentry set starts off as an anonymous dentry if we don't already have the dentry for its inode, otherwise it simply returns the dentry we already have. We may thus end up with several trees of dentries in the superblock, and if at some later point one of anonymous tree roots is discovered by normal filesystem activity to be located in another tree within the superblock, the anonymous root is named and materialises attached to the second tree at the appropriate point. Why do it this way? Why not pass an extra argument to the mount() syscall to indicate the subpath and then pathwalk from the server root to the desired directory? You can't guarantee this will work for two reasons: (1) The root and intervening nodes may not be accessible to the client. With NFS2 and NFS3, for instance, mountd is called on the server to get the filehandle for the tip of a path. mountd won't give us handles for anything we don't have permission to access, and so we can't set up NFS inodes for such nodes, and so can't easily set up dentries (we'd have to have ghost inodes or something). With this patch we don't actually create dentries until we get handles from the server that we can use to set up their inodes, and we don't actually bind them into the tree until we know for sure where they go. (2) Inaccessible symbolic links. If we're asked to mount two exports from the server, eg: mount warthog:/warthog/aaa/xxx /mmm mount warthog:/warthog/bbb/yyy /nnn We may not be able to access anything nearer the root than xxx and yyy, but we may find out later that /mmm/www/yyy, say, is actually the same directory as the one mounted on /nnn. What we might then find out, for example, is that /warthog/bbb was actually a symbolic link to /warthog/aaa/xxx/www, but we can't actually determine that by talking to the server until /warthog is made available by NFS. This would lead to having constructed an errneous dentry tree which we can't easily fix. We can end up with a dentry marked as a directory when it should actually be a symlink, or we could end up with an apparently hardlinked directory. With this patch we need not make assumptions about the type of a dentry for which we can't retrieve information, nor need we assume we know its place in the grand scheme of things until we actually see that place. This patch reduces the possibility of aliasing in the inode and page caches for inodes that may be accessed by more than one NFS export. It also reduces the number of superblocks required for NFS where there are many NFS exports being used from a server (home directory server + autofs for example). This in turn makes it simpler to do local caching of network filesystems, as it can then be guaranteed that there won't be links from multiple inodes in separate superblocks to the same cache file. Obviously, cache aliasing between different levels of NFS protocol could still be a problem, but at least that gives us another key to use when indexing the cache. This patch makes the following changes: (1) The server record construction/destruction has been abstracted out into its own set of functions to make things easier to get right. These have been moved into fs/nfs/client.c. All the code in fs/nfs/client.c has to do with the management of connections to servers, and doesn't touch superblocks in any way; the remaining code in fs/nfs/super.c has to do with VFS superblock management. (2) The sequence of events undertaken by NFS mount is now reordered: (a) A volume representation (struct nfs_server) is allocated. (b) A server representation (struct nfs_client) is acquired. This may be allocated or shared, and is keyed on server address, port and NFS version. (c) If allocated, the client representation is initialised. The state member variable of nfs_client is used to prevent a race during initialisation from two mounts. (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we are given the root FH in advance. (e) The volume FSID is probed for on the root FH. (f) The volume representation is initialised from the FSINFO record retrieved on the root FH. (g) sget() is called to acquire a superblock. This may be allocated or shared, keyed on client pointer and FSID. (h) If allocated, the superblock is initialised. (i) If the superblock is shared, then the new nfs_server record is discarded. (j) The root dentry for this mount is looked up from the root FH. (k) The root dentry for this mount is assigned to the vfsmount. (3) nfs_readdir_lookup() creates dentries for each of the entries readdir() returns; this function now attaches disconnected trees from alternate roots that happen to be discovered attached to a directory being read (in the same way nfs_lookup() is made to do for lookup ops). The new d_materialise_unique() function is now used to do this, thus permitting the whole thing to be done under one set of locks, and thus avoiding any race between mount and lookup operations on the same directory. (4) The client management code uses a new debug facility: NFSDBG_CLIENT which is set by echoing 1024 to /proc/net/sunrpc/nfs_debug. (5) Clone mounts are now called xdev mounts. (6) Use the dentry passed to the statfs() op as the handle for retrieving fs statistics rather than the root dentry of the superblock (which is now a dummy). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/client.c | 735 ++++++++++++++++++++++++++- fs/nfs/dir.c | 16 +- fs/nfs/getroot.c | 306 ++++++++++++ fs/nfs/idmap.c | 3 +- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 82 +-- fs/nfs/namespace.c | 25 +- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4_fs.h | 6 - fs/nfs/nfs4namespace.c | 110 ++++- fs/nfs/nfs4proc.c | 59 +-- fs/nfs/nfs4renewd.c | 13 - fs/nfs/nfs4state.c | 18 - fs/nfs/read.c | 2 +- fs/nfs/super.c | 1207 +++++++++++++++++---------------------------- fs/nfs/write.c | 2 +- include/linux/nfs_fs_sb.h | 21 +- 18 files changed, 1655 insertions(+), 956 deletions(-) create mode 100644 fs/nfs/getroot.c (limited to 'include/linux') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 3b993a6f8163..f4580b44eef4 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := client.o dir.o file.o inode.o super.o nfs2xdr.o \ +nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ pagelist.o proc.o read.o symlink.o unlink.o \ write.o namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c08cab935ad5..dafba608c0a0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -48,6 +48,7 @@ static DEFINE_SPINLOCK(nfs_client_lock); static LIST_HEAD(nfs_client_list); +static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); /* @@ -268,9 +269,9 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -struct nfs_client *nfs_get_client(const char *hostname, - const struct sockaddr_in *addr, - int nfsversion) +static struct nfs_client *nfs_get_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) { struct nfs_client *clp, *new = NULL; int error; @@ -340,6 +341,8 @@ found_client: return ERR_PTR(error); } + BUG_ON(clp->cl_cons_state != NFS_CS_READY); + dprintk("--> nfs_get_client() = %p [share]\n", clp); return clp; } @@ -347,7 +350,7 @@ found_client: /* * Mark a server as ready or failed */ -void nfs_mark_client_ready(struct nfs_client *clp, int state) +static void nfs_mark_client_ready(struct nfs_client *clp, int state) { clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); @@ -389,10 +392,10 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, /* * Create an RPC client handle */ -int nfs_create_rpc_client(struct nfs_client *clp, int proto, - unsigned int timeo, - unsigned int retrans, - rpc_authflavor_t flavor) +static int nfs_create_rpc_client(struct nfs_client *clp, int proto, + unsigned int timeo, + unsigned int retrans, + rpc_authflavor_t flavor) { struct rpc_timeout timeparms; struct rpc_xprt *xprt = NULL; @@ -429,3 +432,719 @@ int nfs_create_rpc_client(struct nfs_client *clp, int proto, clp->cl_rpcclient = clnt; return 0; } + +/* + * Version 2 or 3 client destruction + */ +static void nfs_destroy_server(struct nfs_server *server) +{ + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ +} + +/* + * Version 2 or 3 lockd setup + */ +static int nfs_start_lockd(struct nfs_server *server) +{ + int error = 0; + + if (server->nfs_client->cl_nfsversion > 3) + goto out; + if (server->flags & NFS_MOUNT_NONLM) + goto out; + error = lockd_up(); + if (error < 0) + server->flags |= NFS_MOUNT_NONLM; + else + server->destroy = nfs_destroy_server; +out: + return error; +} + +/* + * Initialise an NFSv3 ACL client connection + */ +#ifdef CONFIG_NFS_V3_ACL +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->nfs_client->cl_nfsversion != 3) + goto out_noacl; + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +/* + * Create a general RPC client + */ +static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour) +{ + struct nfs_client *clp = server->nfs_client; + + server->client = rpc_clone_client(clp->cl_rpcclient); + if (IS_ERR(server->client)) { + dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); + return PTR_ERR(server->client); + } + + if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(pseudoflavour, server->client); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); + } + } + server->client->cl_softrtry = 0; + if (server->flags & NFS_MOUNT_SOFT) + server->client->cl_softrtry = 1; + + server->client->cl_intr = 0; + if (server->flags & NFS4_MOUNT_INTR) + server->client->cl_intr = 1; + + return 0; +} + +/* + * Initialise an NFS2 or NFS3 client + */ +static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) +{ + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is already initialised */ + dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v2_clientops; +#ifdef CONFIG_NFS_V3 + if (clp->cl_nfsversion == 3) + clp->rpc_ops = &nfs_v3_clientops; +#endif + /* + * Create a client RPC handle for doing FSSTAT with UNIX auth only + * - RFC 2623, sec 2.3.2 + */ + error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, + RPC_AUTH_UNIX); + if (error < 0) + goto error; + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs_init_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 2 or 3 client + */ +static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) +{ + struct nfs_client *clp; + int error, nfsvers = 2; + + dprintk("--> nfs_init_server()\n"); + +#ifdef CONFIG_NFS_V3 + if (data->flags & NFS_MOUNT_VER3) + nfsvers = 3; +#endif + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(data->hostname, &data->addr, nfsvers); + if (IS_ERR(clp)) { + dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); + return PTR_ERR(clp); + } + + error = nfs_init_client(clp, data); + if (error < 0) + goto error; + + server->nfs_client = clp; + + /* Initialise the client representation from the mount data */ + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + /* Start lockd here, before we might error out */ + error = nfs_start_lockd(server); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, data->pseudoflavor); + if (error < 0) + goto error; + + server->namelen = data->namlen; + /* Create a client RPC handle for the NFSv3 ACL management interface */ + nfs_init_server_aclclient(server); + if (clp->cl_nfsversion == 3) { + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); + return 0; + +error: + server->nfs_client = NULL; + nfs_put_client(clp); + dprintk("<-- nfs_init_server() = xerror %d\n", error); + return error; +} + +/* + * Load up the server record from information gained in an fsinfo record + */ +static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) +{ + unsigned long max_rpc_payload; + + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo->rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo->wtpref, NULL); + + if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax) + server->rsize = nfs_block_size(fsinfo->rtmax, NULL); + if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) + server->wsize = nfs_block_size(fsinfo->wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + } + + server->maxfilesize = fsinfo->maxfilesize; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); +} + +/* + * Probe filesystem information, including the FSID on v2/v3 + */ +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +{ + struct nfs_fsinfo fsinfo; + struct nfs_client *clp = server->nfs_client; + int error; + + dprintk("--> nfs_probe_fsinfo()\n"); + + if (clp->rpc_ops->set_capabilities != NULL) { + error = clp->rpc_ops->set_capabilities(server, mntfh); + if (error < 0) + goto out_error; + } + + fsinfo.fattr = fattr; + nfs_fattr_init(fattr); + error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); + if (error < 0) + goto out_error; + + nfs_server_set_fsinfo(server, &fsinfo); + + /* Get some general file system info */ + if (server->namelen == 0) { + struct nfs_pathconf pathinfo; + + pathinfo.fattr = fattr; + nfs_fattr_init(fattr); + + if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + } + + dprintk("<-- nfs_probe_fsinfo() = 0\n"); + return 0; + +out_error: + dprintk("nfs_probe_fsinfo: error = %d\n", -error); + return error; +} + +/* + * Copy useful information when duplicating a server record + */ +static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +{ + target->flags = source->flags; + target->acregmin = source->acregmin; + target->acregmax = source->acregmax; + target->acdirmin = source->acdirmin; + target->acdirmax = source->acdirmax; + target->caps = source->caps; +} + +/* + * Allocate and initialise a server record + */ +static struct nfs_server *nfs_alloc_server(void) +{ + struct nfs_server *server; + + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return NULL; + + server->client = server->client_acl = ERR_PTR(-EINVAL); + + /* Zero out the NFS state stuff */ + INIT_LIST_HEAD(&server->client_link); + INIT_LIST_HEAD(&server->master_link); + + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + kfree(server); + return NULL; + } + + return server; +} + +/* + * Free up a server record + */ +void nfs_free_server(struct nfs_server *server) +{ + dprintk("--> nfs_free_server()\n"); + + spin_lock(&nfs_client_lock); + list_del(&server->client_link); + list_del(&server->master_link); + spin_unlock(&nfs_client_lock); + + if (server->destroy != NULL) + server->destroy(server); + if (!IS_ERR(server->client)) + rpc_shutdown_client(server->client); + + nfs_put_client(server->nfs_client); + + nfs_free_iostats(server->io_stats); + kfree(server); + nfs_release_automount_timer(); + dprintk("<-- nfs_free_server()\n"); +} + +/* + * Create a version 2 or 3 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_server *server; + struct nfs_fattr fattr; + int error; + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client representation */ + error = nfs_init_server(server, data); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + if (!(fattr.valid & NFS_ATTR_FATTR)) { + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_create_server: getattr error = %d\n", -error); + goto error; + } + } + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + + dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + return server; + +error: + nfs_free_server(server); + return ERR_PTR(error); +} + +#ifdef CONFIG_NFS_V4 +/* + * Initialise an NFS4 client record + */ +static int nfs4_init_client(struct nfs_client *clp, + int proto, int timeo, int retrans, + rpc_authflavor_t authflavour) +{ + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour); + if (error < 0) + goto error; + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __FUNCTION__, error); + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + goto error; + } + + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return error; +} + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, const struct sockaddr_in *addr, + rpc_authflavor_t authflavour, + int proto, int timeo, int retrans) +{ + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(hostname, addr, 4); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + error = nfs4_init_client(clp, proto, timeo, retrans, authflavour); + if (error < 0) + goto error_put; + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; + +error_put: + nfs_put_client(clp); +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) +{ + int error; + + dprintk("--> nfs4_init_server()\n"); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps |= NFS_CAP_ATOMIC_OPEN; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + error = nfs_init_server_rpcclient(server, authflavour); + + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, + const char *hostname, + const struct sockaddr_in *addr, + const char *mntpath, + const char *ip_addr, + rpc_authflavor_t authflavour, + struct nfs_fh *mntfh) +{ + struct nfs_fattr fattr; + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client record */ + error = nfs4_set_client(server, hostname, addr, authflavour, + data->proto, data->timeo, data->retrans); + if (error < 0) + goto error; + + /* set up the general RPC client */ + error = nfs4_init_server(server, data, authflavour); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs4_path_walk(server, mntfh, mntpath); + if (error < 0) + goto error; + + dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *fh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + struct nfs_fattr fattr; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, data->addr, + data->authflavor, + parent_server->client->cl_xprt->prot, + parent_client->retrans_timeo, + parent_client->retrans_count); + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN; + + error = nfs_init_server_rpcclient(server, data->authflavor); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr); + if (error < 0) + goto error; + + dprintk("Referral FSID: %llx:%llx\n", + server->fsid.major, server->fsid.minor); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} + +#endif /* CONFIG_NFS_V4 */ + +/* + * Clone an NFS2, NFS3 or NFS4 server record + */ +struct nfs_server *nfs_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_server *server; + struct nfs_fattr fattr_fsinfo; + int error; + + dprintk("--> nfs_clone_server(,%llx:%llx,)\n", + fattr->fsid.major, fattr->fsid.minor); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Copy data from the source */ + server->nfs_client = source->nfs_client; + atomic_inc(&server->nfs_client->cl_count); + nfs_server_copy_userdata(server, source); + + server->fsid = fattr->fsid; + + error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor); + if (error < 0) + goto out_free_server; + if (!IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); + if (error < 0) + goto out_free_server; + + dprintk("Cloned FSID: %llx:%llx\n", + server->fsid.major, server->fsid.minor); + + error = nfs_start_lockd(server); + if (error < 0) + goto out_free_server; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_clone_server() = %p\n", server); + return server; + +out_free_server: + nfs_free_server(server); + dprintk("<-- nfs_clone_server() = error %d\n", error); + return ERR_PTR(error); +} diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 19362712452f..9b496ef4abea 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -870,14 +871,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); return 0; } @@ -913,7 +914,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(dir, &fhandle, &fattr); + error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; @@ -922,8 +923,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = (struct dentry *)inode; if (IS_ERR(res)) goto out_unlock; + no_entry: - res = d_add_unique(dentry, inode); + res = d_materialise_unique(dentry, inode); if (res != NULL) dentry = res; nfs_renew_times(dentry); @@ -1117,11 +1119,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) dput(dentry); return NULL; } - alias = d_add_unique(dentry, inode); + + alias = d_materialise_unique(dentry, inode); if (alias != NULL) { dput(dentry); dentry = alias; } + nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return dentry; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c new file mode 100644 index 000000000000..977e59088eeb --- /dev/null +++ b/fs/nfs/getroot.c @@ -0,0 +1,306 @@ +/* getroot.c: get the root dentry for an NFS mount + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "nfs4_fs.h" +#include "delegation.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT +#define NFS_PARANOIA 1 + +/* + * get an NFS2/NFS3 root dentry from the root filehandle + */ +struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + /* create a dummy root dentry with dummy inode for this superblock */ + if (!sb->s_root) { + struct nfs_fh dummyfh; + struct dentry *root; + struct inode *iroot; + + memset(&dummyfh, 0, sizeof(dummyfh)); + memset(&fattr, 0, sizeof(fattr)); + nfs_fattr_init(&fattr); + fattr.valid = NFS_ATTR_FATTR; + fattr.type = NFDIR; + fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR; + fattr.nlink = 2; + + iroot = nfs_fhget(sb, &dummyfh, &fattr); + if (IS_ERR(iroot)) + return ERR_PTR(PTR_ERR(iroot)); + + root = d_alloc_root(iroot); + if (!root) { + iput(iroot); + return ERR_PTR(-ENOMEM); + } + + sb->s_root = root; + } + + /* get the actual root for this mount */ + fsinfo.fattr = &fattr; + + error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, fsinfo.fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_PTR(PTR_ERR(inode)); + } + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_alloc_anon(inode); + if (!mntroot) { + iput(inode); + dprintk("nfs_get_root: get root dentry failed\n"); + return ERR_PTR(-ENOMEM); + } + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + return mntroot; +} + +#ifdef CONFIG_NFS_V4 + +/* + * Do a simple pathwalk from the root FH of the server to the nominated target + * of the mountpoint + * - give error on symlinks + * - give error on ".." occurring in the path + * - follow traversals + */ +int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path) +{ + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct nfs_fh lastfh; + struct qstr name; + int ret; + //int referral_count = 0; + + dprintk("--> nfs4_path_walk(,,%s)\n", path); + + fsinfo.fattr = &fattr; + nfs_fattr_init(&fattr); + + if (*path++ != '/') { + dprintk("nfs4_get_root: Path does not begin with a slash\n"); + return -EINVAL; + } + + /* Start by getting the root filehandle from the server */ + ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (fattr.type != NFDIR) { + printk(KERN_ERR "nfs4_get_root:" + " getroot encountered non-directory\n"); + return -ENOTDIR; + } + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " getroot obtained referral\n"); + return -EREMOTE; + } + +next_component: + dprintk("Next: %s\n", path); + + /* extract the next bit of the path */ + if (!*path) + goto path_walk_complete; + + name.name = path; + while (*path && *path != '/') + path++; + name.len = path - (const char *) name.name; + +eat_dot_dir: + while (*path == '/') + path++; + + if (path[0] == '.' && (path[1] == '/' || !path[1])) { + path += 2; + goto eat_dot_dir; + } + + if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2]) + ) { + printk(KERN_ERR "nfs4_get_root:" + " Mount path contains reference to \"..\"\n"); + return -EINVAL; + } + + /* lookup the next FH in the sequence */ + memcpy(&lastfh, mntfh, sizeof(lastfh)); + + dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path); + + ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name, + mntfh, &fattr); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (fattr.type != NFDIR) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh encountered non-directory\n"); + return -ENOTDIR; + } + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh obtained referral\n"); + return -EREMOTE; + } + + goto next_component; + +path_walk_complete: + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + dprintk("<-- nfs4_path_walk() = 0\n"); + return 0; +} + +/* + * get an NFS4 root dentry from the root filehandle + */ +struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + dprintk("--> nfs4_get_root()\n"); + + /* create a dummy root dentry with dummy inode for this superblock */ + if (!sb->s_root) { + struct nfs_fh dummyfh; + struct dentry *root; + struct inode *iroot; + + memset(&dummyfh, 0, sizeof(dummyfh)); + memset(&fattr, 0, sizeof(fattr)); + nfs_fattr_init(&fattr); + fattr.valid = NFS_ATTR_FATTR; + fattr.type = NFDIR; + fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR; + fattr.nlink = 2; + + iroot = nfs_fhget(sb, &dummyfh, &fattr); + if (IS_ERR(iroot)) + return ERR_PTR(PTR_ERR(iroot)); + + root = d_alloc_root(iroot); + if (!root) { + iput(iroot); + return ERR_PTR(-ENOMEM); + } + + sb->s_root = root; + } + + /* get the info about the server and filesystem */ + error = nfs4_server_capabilities(server, mntfh); + if (error < 0) { + dprintk("nfs_get_root: getcaps error = %d\n", + -error); + return ERR_PTR(error); + } + + /* get the actual root for this mount */ + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, &fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_PTR(PTR_ERR(inode)); + } + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_alloc_anon(inode); + if (!mntroot) { + iput(inode); + dprintk("nfs_get_root: get root dentry failed\n"); + return ERR_PTR(-ENOMEM); + } + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + dprintk("<-- nfs4_get_root()\n"); + return mntroot; +} + +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 231c20ffc0ff..f96dfac7dc9a 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -114,8 +114,7 @@ nfs_idmap_new(struct nfs_client *clp) struct idmap *idmap; int error; - if (clp->cl_idmap != NULL) - return 0; + BUG_ON(clp->cl_idmap != NULL); if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return -ENOMEM; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 771c3b833757..a547c58a83e6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1020,7 +1020,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) out_fileid: printk(KERN_ERR "NFS: server %s error: fileid changed\n" "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", - NFS_SERVER(inode)->hostname, inode->i_sb->s_id, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)nfsi->fileid, (long long)fattr->fileid); goto out_err; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2f3aa52fbefc..e73ba4f1052a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -4,6 +4,18 @@ #include +struct nfs_string; +struct nfs_mount_data; +struct nfs4_mount_data; + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; @@ -16,12 +28,25 @@ struct nfs_clone_mount { }; /* client.c */ +extern struct rpc_program nfs_program; + extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); -extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int); -extern void nfs_mark_client_ready(struct nfs_client *, int); -extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int, - unsigned int, rpc_authflavor_t); +extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, + const char *, + const struct sockaddr_in *, + const char *, + const char *, + rpc_authflavor_t, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, + struct nfs_fh *); +extern void nfs_free_server(struct nfs_server *server); +extern struct nfs_server *nfs_clone_server(struct nfs_server *, + struct nfs_fh *, + struct nfs_fattr *); /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 @@ -89,10 +114,10 @@ extern void nfs4_clear_inode(struct inode *); #endif /* super.c */ -extern struct file_system_type nfs_referral_nfs4_fs_type; -extern struct file_system_type clone_nfs_fs_type; +extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 -extern struct file_system_type clone_nfs4_fs_type; +extern struct file_system_type nfs4_xdev_fs_type; +extern struct file_system_type nfs4_referral_fs_type; #endif extern struct rpc_stat nfs_rpcstat; @@ -101,28 +126,30 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); /* namespace.c */ -extern char *nfs_path(const char *base, const struct dentry *dentry, +extern char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, char *buffer, ssize_t buflen); -/* - * Determine the mount path as a string - */ +/* getroot.c */ +extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); #ifdef CONFIG_NFS_V4 -static inline char * -nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) -{ - return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); -} +extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); + +extern int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path); #endif /* * Determine the device name as a string */ static inline char *nfs_devname(const struct vfsmount *mnt_parent, - const struct dentry *dentry, - char *buffer, ssize_t buflen) + const struct dentry *dentry, + char *buffer, ssize_t buflen) { - return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); + return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, + dentry, buffer, buflen); } /* @@ -178,20 +205,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) sb->s_maxbytes = MAX_LFS_FILESIZE; } - -/* - * Check if the string represents a "valid" IPv4 address - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d8b8d56266cb..77b00684894d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -2,6 +2,7 @@ * linux/fs/nfs/namespace.c * * Copyright (C) 2005 Trond Myklebust + * - Modified by David Howells * * NFS namespace */ @@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - arbitrary string to prepend to the path + * @droot - pointer to root dentry for mountpoint * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer @@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition. */ -char *nfs_path(const char *base, const struct dentry *dentry, +char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, char *buffer, ssize_t buflen) { char *end = buffer+buflen; @@ -47,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry, *--end = '\0'; buflen--; spin_lock(&dcache_lock); - while (!IS_ROOT(dentry)) { + while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; if (buflen < 0) @@ -96,12 +100,13 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr fattr; int err; + dprintk("--> nfs_follow_mountpoint()\n"); + BUG_ON(IS_ROOT(dentry)); dprintk("%s: enter\n", __FUNCTION__); dput(nd->dentry); nd->dentry = dget(dentry); - if (d_mountpoint(nd->dentry)) - goto out_follow; + /* Look it up again */ parent = dget_parent(nd->dentry); err = server->nfs_client->rpc_ops->lookup(parent->d_inode, @@ -134,6 +139,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: dprintk("%s: done, returned %d\n", __FUNCTION__, err); + + dprintk("<-- nfs_follow_mountpoint() = %d\n", err); return ERR_PTR(err); out_err: path_release(nd); @@ -183,14 +190,14 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, switch (server->nfs_client->cl_nfsversion) { case 2: case 3: - mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); break; case 4: - mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); + mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); } return mnt; #else - return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); #endif } @@ -216,6 +223,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, char *page = (char *) __get_free_page(GFP_USER); char *devname; + dprintk("--> nfs_do_submount()\n"); + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, dentry->d_parent->d_name.name, dentry->d_name.name); @@ -230,5 +239,7 @@ free_page: free_page((unsigned long)page); out: dprintk("%s: done\n", __FUNCTION__); + + dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0622af0122be..9e8258ece6fd 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, } /* - * Bare-bones access to getattr: this is for nfs_read_super. + * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb */ static int nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e7879245361e..61095fe4b5ca 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -188,8 +188,6 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(void *); /* nfs4state.c */ -extern void init_nfsv4_state(struct nfs_server *); -extern void destroy_nfsv4_state(struct nfs_server *); struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); @@ -224,10 +222,6 @@ extern struct svc_version nfs4_callback_version1; #else -#define init_nfsv4_state(server) do { } while (0) -#define destroy_nfsv4_state(server) do { } while (0) -#define nfs4_put_state_owner(inode, owner) do { } while (0) -#define nfs4_put_open_state(state) do { } while (0) #define nfs4_close_state(a, b) do { } while (0) #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index faed9bcba50f..24e47f3bbd17 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -2,6 +2,7 @@ * linux/fs/nfs/nfs4namespace.c * * Copyright (C) 2005 Trond Myklebust + * - Modified by David Howells * * NFSv4 namespace */ @@ -47,6 +48,68 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +/* + * Determine the mount path as a string + */ +static char *nfs4_path(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + const char *srvpath; + + srvpath = strchr(mnt_parent->mnt_devname, ':'); + if (srvpath) + srvpath++; + else + srvpath = mnt_parent->mnt_devname; + + return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen); +} + +/* + * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we + * believe to be the server path to this dentry + */ +static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + const struct nfs4_fs_locations *locations, + char *page, char *page2) +{ + const char *path, *fs_path; + + path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + return PTR_ERR(path); + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + return PTR_ERR(fs_path); + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", + __FUNCTION__, path, fs_path); + return -ENOENT; + } + + return 0; +} + +/* + * Check if the string represents a "valid" IPv4 address + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} /** * nfs_follow_referral - set up mountpoint when hitting a referral on moved error @@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, .dentry = dentry, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, }; - char *page, *page2; - char *path, *fs_path; + char *page = NULL, *page2 = NULL; char *devname; - int loc, s; + int loc, s, error; if (locations == NULL || locations->nlocations <= 0) goto out; @@ -79,31 +141,25 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, dprintk("%s: referral at %s/%s\n", __FUNCTION__, dentry->d_parent->d_name.name, dentry->d_name.name); - /* Ensure fs path is a prefix of current dentry path */ page = (char *) __get_free_page(GFP_USER); - if (page == NULL) + if (!page) goto out; + page2 = (char *) __get_free_page(GFP_USER); - if (page2 == NULL) + if (!page2) goto out; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) - goto out_free; - - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) - goto out_free; - - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { - dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); - goto out_free; + /* Ensure fs path is a prefix of current dentry path */ + error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); + if (error < 0) { + mnt = ERR_PTR(error); + goto out; } devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); if (IS_ERR(devname)) { mnt = (struct vfsmount *)devname; - goto out_free; + goto out; } loc = 0; @@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, addr.sin_port = htons(NFS_PORT); mountdata.addr = &addr; - mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); + mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata); if (!IS_ERR(mnt)) { break; } @@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, loc++; } -out_free: - free_page((unsigned long)page); - free_page((unsigned long)page2); out: + free_page((unsigned long) page); + free_page((unsigned long) page2); dprintk("%s: done\n", __FUNCTION__); return mnt; } @@ -165,7 +220,7 @@ out: */ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) { - struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct dentry *parent; struct nfs4_fs_locations *fs_locations = NULL; struct page *page; @@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr goto out_free; /* Get locations */ + mnt = ERR_PTR(-ENOENT); + parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); + dprintk("%s: getting locations for %s/%s\n", + __FUNCTION__, parent->d_name.name, dentry->d_name.name); + err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); dput(parent); - if (err != 0 || fs_locations->nlocations <= 0 || + if (err != 0 || + fs_locations->nlocations <= 0 || fs_locations->fs_path.ncomponents <= 0) goto out_free; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1573eeb07ce1..a825547e8214 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1393,70 +1393,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +/* + * get the file handle for the "/" directory on the server + */ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsinfo *info) { - struct nfs_fattr * fattr = info->fattr; - unsigned char * p; - struct qstr q; - struct nfs4_lookup_arg args = { - .dir_fh = fhandle, - .name = &q, - .bitmask = nfs4_fattr_bitmap, - }; - struct nfs4_lookup_res res = { - .server = server, - .fattr = fattr, - .fh = fhandle, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], - .rpc_argp = &args, - .rpc_resp = &res, - }; int status; - /* - * Now we do a separate LOOKUP for each component of the mount path. - * The LOOKUPs are done separately so that we can conveniently - * catch an ERR_WRONGSEC if it occurs along the way... - */ status = nfs4_lookup_root(server, fhandle, info); - if (status) - goto out; - - p = server->mnt_path; - for (;;) { - struct nfs4_exception exception = { }; - - while (*p == '/') - p++; - if (!*p) - break; - q.name = p; - while (*p && (*p != '/')) - p++; - q.len = p - q.name; - - do { - nfs_fattr_init(fattr); - status = nfs4_handle_exception(server, - rpc_call_sync(server->client, &msg, 0), - &exception); - } while (exception.retry); - if (status == 0) - continue; - if (status == -ENOENT) { - printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path); - printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n"); - } - break; - } if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) status = nfs4_do_fsinfo(server, fhandle, info); -out: return nfs4_map_errors(status); } diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index ff947ecb8b81..f2c893690ac4 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -127,26 +127,13 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs_client *clp = server->nfs_client; - - if (!clp) - return; flush_scheduled_work(); - down_write(&clp->cl_sem); - if (!list_empty(&server->nfs4_siblings)) - list_del_init(&server->nfs4_siblings); - up_write(&clp->cl_sem); } -/* Must be called with clp->cl_sem locked for writes */ void nfs4_kill_renewd(struct nfs_client *clp) { down_read(&clp->cl_sem); - if (!list_empty(&clp->cl_superblocks)) { - up_read(&clp->cl_sem); - return; - } cancel_delayed_work(&clp->cl_renewd); up_read(&clp->cl_sem); flush_scheduled_work(); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 058811e39555..5fffbdfa971f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -58,24 +58,6 @@ const nfs4_stateid zero_stateid; static LIST_HEAD(nfs4_clientid_list); -void -init_nfsv4_state(struct nfs_server *server) -{ - server->nfs_client = NULL; - INIT_LIST_HEAD(&server->nfs4_siblings); -} - -void -destroy_nfsv4_state(struct nfs_server *server) -{ - kfree(server->mnt_path); - server->mnt_path = NULL; - if (server->nfs_client) { - nfs_put_client(server->nfs_client); - server->nfs_client = NULL; - } -} - static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f0aff824a291..dae33c1e8a77 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, rdata->args.offset = page_offset(page) + rdata->args.pgbase; dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", - NFS_SERVER(inode)->hostname, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)NFS_FILEID(inode), (unsigned long long)rdata->args.pgbase, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5842d510d732..867b5dcd3a40 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -13,6 +13,11 @@ * * Split from inode.c by David Howells * + * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a + * particular server are held in the same superblock + * - NFS superblocks can have several effective roots to the dentry tree + * - directory type roots are spliced into the tree when a path from one root reaches the root + * of another (see nfs_lookup()) */ #include @@ -52,20 +57,12 @@ #define NFSDBG_FACILITY NFSDBG_VFS -/* Maximum number of readahead requests - * FIXME: this should really be a sysctl so that users may tune it to suit - * their needs. People that do NFS over a slow network, might for - * instance want to reduce it to something closer to 1 for improved - * interactive response. - */ -#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) - static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); -static int nfs_clone_nfs_sb(struct file_system_type *fs_type, +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); @@ -77,10 +74,10 @@ static struct file_system_type nfs_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type clone_nfs_fs_type = { +struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs", - .get_sb = nfs_clone_nfs_sb, + .get_sb = nfs_xdev_get_sb, .kill_sb = nfs_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -99,10 +96,10 @@ static struct super_operations nfs_sops = { #ifdef CONFIG_NFS_V4 static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -113,18 +110,18 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type clone_nfs4_fs_type = { +struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs_clone_nfs4_sb, + .get_sb = nfs4_xdev_get_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type nfs_referral_nfs4_fs_type = { +struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs_referral_nfs4_sb, + .get_sb = nfs4_referral_get_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -345,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) nfs_show_mount_options(m, nfss, 0); seq_puts(m, ",addr="); - seq_escape(m, nfss->hostname, " \t\n\\"); + seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); return 0; } @@ -429,714 +426,351 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) /* * Begin unmount by attempting to remove all automounted mountpoints we added - * in response to traversals + * in response to xdev traversals and referrals */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { - struct nfs_server *server; - struct rpc_clnt *rpc; - shrink_submounts(vfsmnt, &nfs_automount_list); - if (!(flags & MNT_FORCE)) - return; - /* -EIO all pending I/O */ - server = NFS_SB(vfsmnt->mnt_sb); - rpc = server->client; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); - rpc = server->client_acl; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); } /* - * Obtain the root inode of the file system. + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle */ -static struct inode * -nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +static int nfs_validate_mount_data(struct nfs_mount_data *data, + struct nfs_fh *mntfh) { - struct nfs_server *server = NFS_SB(sb); - int error; - - error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - return ERR_PTR(error); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return -EINVAL; } - server->fsid = fsinfo->fattr->fsid; - return nfs_fhget(sb, rootfh, fsinfo->fattr); -} - -/* - * Do NFS version-independent mount processing, and sanity checking - */ -static int -nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) -{ - struct nfs_server *server; - struct inode *root_inode; - struct nfs_fattr fattr; - struct nfs_fsinfo fsinfo = { - .fattr = &fattr, - }; - struct nfs_pathconf pathinfo = { - .fattr = &fattr, - }; - int no_root_error = 0; - unsigned long max_rpc_payload; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - return -ENOMEM; + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + return -EINVAL; + } - root_inode = nfs_get_root(sb, &server->fh, &fsinfo); - /* Did getting the root inode fail? */ - if (IS_ERR(root_inode)) { - no_root_error = PTR_ERR(root_inode); - goto out_no_root; + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + return -EINVAL; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + return -EINVAL; + } + /* Fill in pseudoflavor for mount version < 5 */ + data->pseudoflavor = RPC_AUTH_UNIX; + case 5: + memset(data->context, 0, sizeof(data->context)); } - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) { - no_root_error = -ENOMEM; - goto out_no_root; + +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + return -EPROTONOSUPPORT; } - sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; - - /* mount time stamp, in seconds */ - server->mount_time = jiffies; - - /* Get some general file system info */ - if (server->namelen == 0 && - server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; - /* Work out a lot of parameters */ - if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - - if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) - server->rsize = nfs_block_size(fsinfo.rtmax, NULL); - if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) - server->wsize = nfs_block_size(fsinfo.wtmax, NULL); - - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); - if (server->rsize > max_rpc_payload) - server->rsize = max_rpc_payload; - if (server->rsize > NFS_MAX_FILE_IO_SIZE) - server->rsize = NFS_MAX_FILE_IO_SIZE; - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - if (server->wsize > NFS_MAX_FILE_IO_SIZE) - server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +#endif /* CONFIG_NFS_V3 */ - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - if (server->flags & NFS_MOUNT_NOAC) { - server->acregmin = server->acregmax = 0; - server->acdirmin = server->acdirmax = 0; - sb->s_flags |= MS_SYNCHRONOUS; + /* We now require that the mount process passes the remote address */ + if (data->addr.sin_addr.s_addr == INADDR_ANY) { + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + return -EINVAL; } - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + /* Prepare the root filehandle */ + if (data->flags & NFS_MOUNT_VER3) + mntfh->size = data->root.size; + else + mntfh->size = NFS2_FHSIZE; - server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + if (mntfh->size > sizeof(mntfh->data)) { + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + return -EINVAL; + } + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); - /* We're airborne Set socket buffersize */ - rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); return 0; - /* Yargs. It didn't work out. */ -out_no_root: - dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); - if (!IS_ERR(root_inode)) - iput(root_inode); - return no_root_error; } /* - * Create an RPC client handle. + * Initialise the common bits of the superblock */ -static struct rpc_clnt * -nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) +static inline void nfs_initialise_sb(struct super_block *sb) { - struct nfs_client *clp; - struct rpc_clnt *clnt; - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - int nfsversion = 2; - int err; - -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) - nfsversion = 3; -#endif - - clp = nfs_get_client(server->hostname, &server->addr, nfsversion); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return ERR_PTR(PTR_ERR(clp)); - } - - if (clp->cl_cons_state == NFS_CS_INITING) { - /* Check NFS protocol revision and initialize RPC op - * vector and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (nfsversion == 3) { - clp->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - clp->rpc_ops = &nfs_v2_clientops; - } -#else - clp->rpc_ops = &nfs_v2_clientops; -#endif - - /* create transport and client */ - err = nfs_create_rpc_client(clp, proto, data->timeo, - data->retrans, RPC_AUTH_UNIX); - if (err < 0) - goto client_init_error; - - nfs_mark_client_ready(clp, 0); - } + struct nfs_server *server = NFS_SB(sb); - /* create an nfs_server-specific client */ - clnt = rpc_clone_client(clp->cl_rpcclient); - if (IS_ERR(clnt)) { - dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); - nfs_put_client(clp); - return ERR_PTR(PTR_ERR(clnt)); - } + sb->s_magic = NFS_SUPER_MAGIC; - if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) { - struct rpc_auth *auth; + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - auth = rpcauth_create(data->pseudoflavor, server->client); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return ERR_PTR(PTR_ERR(auth)); - } - } + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); - server->nfs_client = clp; - return clnt; + if (server->flags & NFS_MOUNT_NOAC) + sb->s_flags |= MS_SYNCHRONOUS; -client_init_error: - nfs_mark_client_ready(clp, err); - nfs_put_client(clp); - return ERR_PTR(err); + nfs_super_set_maxbytes(sb, server->maxfilesize); } /* - * Clone a server record + * Finish setting up an NFS2/3 superblock */ -static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) +static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) { struct nfs_server *server = NFS_SB(sb); - struct nfs_server *parent = NFS_SB(data->sb); - struct inode *root_inode; - struct nfs_fsinfo fsinfo; - void *err = ERR_PTR(-ENOMEM); - - sb->s_op = data->sb->s_op; - sb->s_blocksize = data->sb->s_blocksize; - sb->s_blocksize_bits = data->sb->s_blocksize_bits; - sb->s_maxbytes = data->sb->s_maxbytes; - - server->client_acl = ERR_PTR(-EINVAL); - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - goto out; - - server->client = rpc_clone_client(parent->client); - if (IS_ERR((err = server->client))) - goto out; - - if (!IS_ERR(parent->client_acl)) { - server->client_acl = rpc_clone_client(parent->client_acl); - if (IS_ERR((err = server->client_acl))) - goto out; - } - root_inode = nfs_fhget(sb, data->fh, data->fattr); - if (!root_inode) - goto out; - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_put_root; - fsinfo.fattr = data->fattr; - if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; - sb->s_flags |= MS_ACTIVE; - return server; -out_put_root: - iput(root_inode); -out: - return err; -} - -/* - * Copy an existing superblock and attach revised data - */ -static int nfs_clone_generic_sb(struct nfs_clone_mount *data, - struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), - struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *), - struct vfsmount *mnt) -{ - struct nfs_server *server; - struct nfs_server *parent = NFS_SB(data->sb); - struct super_block *sb = ERR_PTR(-EINVAL); - char *hostname; - int error = -ENOMEM; - int len; - - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (server == NULL) - goto out_err; - memcpy(server, parent, sizeof(*server)); - atomic_inc(&server->nfs_client->cl_count); - hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; - len = strlen(hostname) + 1; - server->hostname = kmalloc(len, GFP_KERNEL); - if (server->hostname == NULL) - goto free_server; - memcpy(server->hostname, hostname, len); - - sb = fill_sb(server, data); - if (IS_ERR(sb)) { - error = PTR_ERR(sb); - goto free_hostname; - } - if (sb->s_root) - goto out_share; + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - server = fill_server(sb, data); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_deactivate; + if (server->flags & NFS_MOUNT_VER3) { + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_time_gran = 1; } - return simple_set_mnt(mnt, sb); -out_deactivate: - up_write(&sb->s_umount); - deactivate_super(sb); - return error; -out_share: - kfree(server->hostname); - nfs_put_client(server->nfs_client); - kfree(server); - return simple_set_mnt(mnt, sb); -free_hostname: - kfree(server->hostname); -free_server: - nfs_put_client(server->nfs_client); - kfree(server); -out_err: - return error; + + sb->s_op = &nfs_sops; + nfs_initialise_sb(sb); } /* - * Set up an NFS2/3 superblock - * - * The way this works is that the mount process passes a structure - * in the data argument which contains the server's IP address - * and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. + * Finish setting up a cloned NFS2/3 superblock */ -static int -nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) +static void nfs_clone_super(struct super_block *sb, + const struct super_block *old_sb) { - struct nfs_server *server; - rpc_authflavor_t authflavor; + struct nfs_server *server = NFS_SB(sb); + + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; - server = NFS_SB(sb); - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - if (data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Start lockd here, before we might error out */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - - server->namelen = data->namlen; - server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); - if (!server->hostname) - return -ENOMEM; - strcpy(server->hostname, data->hostname); - - /* Fill in pseudoflavor for mount version < 5 */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; - authflavor = data->pseudoflavor; /* save for sb_init() */ - /* XXX maybe we want to add a server->pseudoflavor field */ - - /* Create RPC client handles */ - server->client = nfs_create_client(server, data); - if (IS_ERR(server->client)) - return PTR_ERR(server->client); - - /* RFC 2623, sec 2.3.2 */ if (server->flags & NFS_MOUNT_VER3) { -#ifdef CONFIG_NFS_V3_ACL - if (!(server->flags & NFS_MOUNT_NOACL)) { - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - /* No errors! Assume that Sun nfsacls are supported */ - if (!IS_ERR(server->client_acl)) - server->caps |= NFS_CAP_ACLS; - } -#else - server->flags &= ~NFS_MOUNT_NOACL; -#endif /* CONFIG_NFS_V3_ACL */ - /* - * The VFS shouldn't apply the umask to mode bits. We will - * do so ourselves when necessary. + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. */ sb->s_flags |= MS_POSIXACL; - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; } - sb->s_op = &nfs_sops; - return nfs_sb_init(sb, authflavor); + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); } -static int nfs_set_super(struct super_block *s, void *data) +static int nfs_set_super(struct super_block *s, void *_server) { - s->s_fs_info = data; - return set_anon_super(s, data); + struct nfs_server *server = _server; + int ret; + + s->s_fs_info = server; + ret = set_anon_super(s, server); + if (ret == 0) + server->s_dev = s->s_dev; + return ret; } static int nfs_compare_super(struct super_block *sb, void *data) { - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); + struct nfs_server *server = data, *old = NFS_SB(sb); - if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) + if (old->nfs_client != server->nfs_client) return 0; - if (old->addr.sin_port != server->addr.sin_port) + if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; - return !nfs_compare_fh(&old->fh, &server->fh); + return 1; } static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - int error; struct nfs_server *server = NULL; struct super_block *s; - struct nfs_fh *root; + struct nfs_fh mntfh; struct nfs_mount_data *data = raw_data; + struct dentry *mntroot; + int error; - error = -EINVAL; - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err_noserver; - } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err_noserver; - } - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - goto out_err_noserver; - } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - goto out_err_noserver; - } - case 5: - memset(data->context, 0, sizeof(data->context)); - } -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - error = -EPROTONOSUPPORT; - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err_noserver; - } -#endif /* CONFIG_NFS_V3 */ + /* Validate the mount data */ + error = nfs_validate_mount_data(data, &mntfh); + if (error < 0) + return error; - error = -ENOMEM; - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) + /* Get a volume representation */ + server = nfs_create_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); goto out_err_noserver; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_acl = ERR_PTR(-EINVAL); - - root = &server->fh; - if (data->flags & NFS_MOUNT_VER3) - root->size = data->root.size; - else - root->size = NFS2_FHSIZE; - error = -EINVAL; - if (root->size > sizeof(root->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - goto out_err; - } - memcpy(root->data, data->root.data, root->size); - - /* We now require that the mount process passes the remote address */ - memcpy(&server->addr, &data->addr, sizeof(server->addr)); - if (server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - goto out_err; } + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); - goto out_err; + goto out_err_nosb; } - if (s->s_root) - goto out_share; + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } - s->s_flags = flags; + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs_fill_super(s, data); + } - error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return error; + mntroot = nfs_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; } - s->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, s); -out_share: - kfree(server); - return simple_set_mnt(mnt, s); + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + return 0; -out_err: - kfree(server); +out_err_nosb: + nfs_free_server(server); out_err_noserver: return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + return error; } +/* + * Destroy an NFS2/3 superblock + */ static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - - if (!IS_ERR(server->client)) - rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_acl)) - rpc_shutdown_client(server->client_acl); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - nfs_put_client(server->nfs_client); - kfree(server); - nfs_release_automount_timer(); + nfs_free_server(server); } -static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); - if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - return sb; -} - -static int nfs_clone_nfs_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +/* + * Clone an NFS2/3 server record on xdev traversal (FSID-change) + */ +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt); -} + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int error; -#ifdef CONFIG_NFS_V4 -static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, - int timeo, int retrans, int proto, rpc_authflavor_t flavor) -{ - struct nfs_client *clp; - struct rpc_clnt *clnt = NULL; - int err = -EIO; - - clp = nfs_get_client(server->hostname, &server->addr, 4); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return ERR_PTR(err); - } + dprintk("--> nfs_xdev_get_sb()\n"); - /* Now create transport and client */ - if (clp->cl_cons_state == NFS_CS_INITING) { - clp->rpc_ops = &nfs_v4_clientops; + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } - err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor); - if (err < 0) - goto client_init_error; + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } - memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - err = nfs_idmap_new(clp); - if (err < 0) { - dprintk("%s: failed to create idmapper.\n", - __FUNCTION__); - goto client_init_error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - nfs_mark_client_ready(clp, 0); + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; } - clnt = rpc_clone_client(clp->cl_rpcclient); + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs_clone_super(s, data->sb); + } - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - return clnt; + mntroot = nfs_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; } - if (clnt->cl_auth->au_flavor != flavor) { - struct rpc_auth *auth; + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; - auth = rpcauth_create(flavor, clnt); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return (struct rpc_clnt *)auth; - } - } + dprintk("<-- nfs_xdev_get_sb() = 0\n"); + return 0; - server->nfs_client = clp; - down_write(&clp->cl_sem); - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - up_write(&clp->cl_sem); - return clnt; +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); + return error; -client_init_error: - nfs_mark_client_ready(clp, err); - nfs_put_client(clp); - return ERR_PTR(err); +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); + return error; } +#ifdef CONFIG_NFS_V4 + /* - * Set up an NFS4 superblock + * Finish setting up a cloned NFS4 superblock */ -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +static void nfs4_clone_super(struct super_block *sb, + const struct super_block *old_sb) { - struct nfs_server *server; - rpc_authflavor_t authflavour; - int err = -EIO; - - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } - - server->client = nfs4_create_client(server, data->timeo, data->retrans, - data->proto, authflavour); - if (IS_ERR(server->client)) { - err = PTR_ERR(server->client); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; sb->s_time_gran = 1; - - sb->s_op = &nfs4_sops; - err = nfs_sb_init(sb, authflavour); - - out_fail: - return err; + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); } -static int nfs4_compare_super(struct super_block *sb, void *data) +/* + * Set up an NFS4 superblock + */ +static void nfs4_fill_super(struct super_block *sb) { - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (strcmp(server->hostname, old->hostname) != 0) - return 0; - if (strcmp(server->mnt_path, old->mnt_path) != 0) - return 0; - return 1; + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + nfs_initialise_sb(sb); } -static void * -nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) { void *p = NULL; @@ -1157,14 +791,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) return dst; } +/* + * Get the superblock for an NFS4 mountpoint + */ static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - int error; - struct nfs_server *server; - struct super_block *s; struct nfs4_mount_data *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct sockaddr_in addr; + rpc_authflavor_t authflavour; + struct nfs_fh mntfh; + struct dentry *mntroot; + char *mntpath = NULL, *hostname = NULL, ip_addr[16]; void *p; + int error; if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); @@ -1175,75 +817,107 @@ static int nfs4_get_sb(struct file_system_type *fs_type, return -EINVAL; } - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - return -ENOMEM; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_acl = ERR_PTR(-EINVAL); + /* We now require that the mount process passes the remote address */ + if (data->host_addrlen != sizeof(addr)) + return -EINVAL; + + if (copy_from_user(&addr, data->host_addr, sizeof(addr))) + return -EFAULT; + + if (addr.sin_family != AF_INET || + addr.sin_addr.s_addr == INADDR_ANY + ) { + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); + return -EINVAL; + } + + /* Grab the authentication type */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + error = -EINVAL; + goto out_err_noserver; + } + + if (copy_from_user(&authflavour, data->auth_flavours, + sizeof(authflavour))) { + error = -EFAULT; + goto out_err_noserver; + } + } p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) goto out_err; - server->hostname = p; + hostname = p; p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); if (IS_ERR(p)) goto out_err; - server->mnt_path = p; + mntpath = p; - p = nfs_copy_user_string(server->ip_addr, &data->client_addr, - sizeof(server->ip_addr) - 1); + dprintk("MNTPATH: %s\n", mntpath); + + p = nfs_copy_user_string(ip_addr, &data->client_addr, + sizeof(ip_addr) - 1); if (IS_ERR(p)) goto out_err; - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(server->addr)) { - error = -EINVAL; - goto out_free; - } - if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - error = -EFAULT; - goto out_free; - } - if (server->addr.sin_family != AF_INET || - server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - error = -EINVAL; - goto out_free; + /* Get a volume representation */ + server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, + authflavour, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; } - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; } - if (s->s_root) { - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); - return simple_set_mnt(mnt, s); - } + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; - s->s_flags = flags; + nfs4_fill_super(s); + } else { + nfs_free_server(server); + } - error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return error; + mntroot = nfs4_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; } + s->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, s); + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + kfree(mntpath); + kfree(hostname); + return 0; + out_err: error = PTR_ERR(p); + goto out_err_noserver; + out_free: - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); + nfs_free_server(server); +out_err_noserver: + kfree(mntpath); + kfree(hostname); return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + goto out_err_noserver; } static void nfs4_kill_super(struct super_block *sb) @@ -1254,133 +928,140 @@ static void nfs4_kill_super(struct super_block *sb) kill_anon_super(sb); nfs4_renewd_prepare_shutdown(server); - - if (server->client != NULL && !IS_ERR(server->client)) - rpc_shutdown_client(server->client); - - destroy_nfsv4_state(server); - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); - nfs_release_automount_timer(); + nfs_free_server(server); } /* - * Constructs the SERVER-side path + * Clone an NFS4 server record on xdev traversal (FSID-change) */ -static inline char *nfs4_dup_path(const struct dentry *dentry) +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { - char *page = (char *) __get_free_page(GFP_USER); - char *path; + struct nfs_clone_mount *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int error; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (!IS_ERR(path)) { - int len = PAGE_SIZE + page - path; - char *tmp = path; + dprintk("--> nfs4_xdev_get_sb()\n"); - path = kmalloc(len, GFP_KERNEL); - if (path) - memcpy(path, tmp, len); - else - path = ERR_PTR(-ENOMEM); + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; } - free_page((unsigned long)page); - return path; -} -static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - const struct dentry *dentry = data->dentry; - struct nfs_client *clp = server->nfs_client; - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - server->mnt_path = nfs4_dup_path(dentry); - if (IS_ERR(server->mnt_path)) { - sb = (struct super_block *)server->mnt_path; - goto err; + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; } - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - nfs4_server_capabilities(server, &server->fh); - - down_write(&clp->cl_sem); - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - up_write(&clp->cl_sem); - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} -static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) -{ - struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt); -} + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } -static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb = ERR_PTR(-ENOMEM); - int len; - - len = strlen(data->mnt_path) + 1; - server->mnt_path = kmalloc(len, GFP_KERNEL); - if (server->mnt_path == NULL) - goto err; - memcpy(server->mnt_path, data->mnt_path, len); - memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); - - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs4_clone_super(s, data->sb); + } -static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) -{ - struct nfs_server *server = NFS_SB(sb); - int proto, timeo, retrans; - void *err; - - proto = IPPROTO_TCP; - /* Since we are following a referral and there may be alternatives, - set the timeouts and retries to low values */ - timeo = 2; - retrans = 1; - - nfs_put_client(server->nfs_client); - server->nfs_client = NULL; - server->client = nfs4_create_client(server, timeo, retrans, proto, - data->authflavor); - if (IS_ERR((err = server->client))) - goto out_err; + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); - if (!IS_ERR(err)) - return server; -out_err: - return (struct nfs_server *)err; + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + dprintk("<-- nfs4_xdev_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); + return error; } -static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +/* + * Create an NFS4 server record on referral traversal + */ +static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt); + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + struct nfs_fh mntfh; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + /* create a new volume representation */ + server = nfs4_create_referral_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs4_fill_super(s); + } + + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + dprintk("<-- nfs4_referral_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); + return error; } -#endif +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7084ac9a6455..453d44666ea5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1273,7 +1273,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->hostname, + NFS_SERVER(data->inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d404ceca9168..6d0be0efd1b5 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -51,7 +51,6 @@ struct nfs_client { unsigned long cl_lease_time; unsigned long cl_last_renewal; struct work_struct cl_renewd; - struct work_struct cl_recoverd; struct rpc_wait_queue cl_rpcwaitq; @@ -74,6 +73,10 @@ struct nfs_client { */ struct nfs_server { struct nfs_client * nfs_client; /* shared client and NFS4 state */ + struct list_head client_link; /* List of other nfs_server structs + * that share the same client + */ + struct list_head master_link; /* link in master servers list */ struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_iostats * io_stats; /* I/O statistics */ @@ -92,20 +95,13 @@ struct nfs_server { unsigned int acdirmin; unsigned int acdirmax; unsigned int namelen; - char * hostname; /* remote hostname */ - struct nfs_fh fh; - struct sockaddr_in addr; + struct nfs_fsid fsid; + __u64 maxfilesize; /* maximum file size */ unsigned long mount_time; /* when this fs was mounted */ + dev_t s_dev; /* superblock dev numbers */ + #ifdef CONFIG_NFS_V4 - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char ip_addr[16]; - char * mnt_path; - struct list_head nfs4_siblings; /* List of other nfs_server structs - * that share the same clientid - */ u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ @@ -113,6 +109,7 @@ struct nfs_server { that are supported on this filesystem */ #endif + void (*destroy)(struct nfs_server *); }; /* Server capabilities */ -- cgit v1.2.3-71-gd317 From ec739ef03dc926d05051c8c5838971445504470a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:15 -0400 Subject: SUNRPC: Create a helper to tell whether a transport is bound Hide the contents and format of xprt->addr by eliminating direct uses of the xprt->addr.sin_port field. This change is required to support alternate RPC host address formats (eg IPv6). Test-plan: Destructive testing (unplugging the network temporarily). Repeated runs of Connectathon locking suite with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++++ net/sunrpc/clnt.c | 10 +++++----- net/sunrpc/pmap_clnt.c | 5 ++++- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 14 ++++++++++---- 5 files changed, 36 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3a0cca255b76..a71106723d71 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -269,6 +269,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_CONNECTED (1) #define XPRT_CONNECTING (2) #define XPRT_CLOSE_WAIT (3) +#define XPRT_BOUND (4) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -312,6 +313,21 @@ static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_CONNECTING, &xprt->state); } +static inline void xprt_set_bound(struct rpc_xprt *xprt) +{ + test_and_set_bit(XPRT_BOUND, &xprt->state); +} + +static inline int xprt_bound(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_BOUND, &xprt->state); +} + +static inline void xprt_clear_bound(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_BOUND, &xprt->state); +} + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3e19d321067a..0b8d03d08561 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -148,7 +148,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; clnt->cl_pmap = &clnt->cl_pmap_default; - clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_prot = xprt->prot; @@ -156,7 +155,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_metrics = rpc_alloc_iostats(clnt); rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); - if (!clnt->cl_port) + if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; clnt->cl_rtt = &clnt->cl_rtt_default; @@ -570,7 +569,7 @@ EXPORT_SYMBOL(rpc_max_payload); void rpc_force_rebind(struct rpc_clnt *clnt) { if (clnt->cl_autobind) - clnt->cl_port = 0; + xprt_clear_bound(clnt->cl_xprt); } EXPORT_SYMBOL(rpc_force_rebind); @@ -782,14 +781,15 @@ static void call_bind(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_connect; - if (!clnt->cl_port) { + if (!xprt_bound(xprt)) { task->tk_action = call_bind_status; - task->tk_timeout = task->tk_xprt->bind_timeout; + task->tk_timeout = xprt->bind_timeout; rpc_getport(task, clnt); } } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 623180f224c9..209ffdfee10b 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -142,15 +142,17 @@ pmap_getport_done(struct rpc_task *task) dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); - xprt->ops->set_port(xprt, 0); if (task->tk_status < 0) { /* Make the calling task exit with an error */ + xprt->ops->set_port(xprt, 0); task->tk_action = rpc_exit_task; } else if (clnt->cl_port == 0) { /* Program not registered */ + xprt->ops->set_port(xprt, 0); rpc_exit(task, -EACCES); } else { xprt->ops->set_port(xprt, clnt->cl_port); + xprt_set_bound(xprt); clnt->cl_port = htons(clnt->cl_port); } spin_lock(&pmap_lock); @@ -218,6 +220,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; xprt->ops->set_port(xprt, RPC_PMAP_PORT); + xprt_set_bound(xprt); if (!privileged) xprt->resvport = 0; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e8c2bc4977f3..e239ef985ef7 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task) dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - if (!xprt->addr.sin_port) { + if (!xprt_bound(xprt)) { task->tk_status = -EIO; return; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 441bd53f5eca..123ac1e5ba15 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1016,7 +1016,7 @@ static void xs_udp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); @@ -1099,7 +1099,7 @@ static void xs_tcp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); @@ -1307,8 +1307,11 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - xprt->prot = IPPROTO_UDP; + if (ntohs(xprt->addr.sin_port) != 0) + xprt_set_bound(xprt); xprt->port = xs_get_random_port(); + + xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ @@ -1348,8 +1351,11 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - xprt->prot = IPPROTO_TCP; + if (ntohs(xprt->addr.sin_port) != 0) + xprt_set_bound(xprt); xprt->port = xs_get_random_port(); + + xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; -- cgit v1.2.3-71-gd317 From 4a68179d38874c37be2802442a71b847f5d1a2a9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:15 -0400 Subject: SUNRPC: Make RPC portmapper use per-transport storage Move connection and bind state that was maintained in the rpc_clnt structure to the rpc_xprt structure. This will allow the creation of a clean API for plugging in different types of bind mechanisms. This brings improvements such as the elimination of a single spin lock to control serialization for all in-kernel RPC binding. A set of per-xprt bitops is used to serialize tasks during RPC binding, just like it now works for making RPC transport connections. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 23 +------ include/linux/sunrpc/xprt.h | 14 ++++ net/sunrpc/clnt.c | 8 +-- net/sunrpc/pmap_clnt.c | 158 +++++++++++++++++++++++++++++--------------- net/sunrpc/xprt.c | 1 + 5 files changed, 123 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 8fe9f35eba31..00e9dbaec9c5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -18,18 +18,6 @@ #include #include -/* - * This defines an RPC port mapping - */ -struct rpc_portmap { - __u32 pm_prog; - __u32 pm_vers; - __u32 pm_prot; - __u16 pm_port; - unsigned char pm_binding : 1; /* doing a getport() */ - struct rpc_wait_queue pm_bindwait; /* waiting on getport() */ -}; - struct rpc_inode; /* @@ -40,7 +28,9 @@ struct rpc_clnt { atomic_t cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ - u32 cl_maxproc; /* max procedure number */ + u32 cl_prog, /* RPC program number */ + cl_vers, /* RPC version number */ + cl_maxproc; /* max procedure number */ char * cl_server; /* server machine name */ char * cl_protname; /* protocol name */ @@ -55,7 +45,6 @@ struct rpc_clnt { cl_dead : 1;/* abandoned */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ - struct rpc_portmap * cl_pmap; /* port mapping */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; @@ -64,14 +53,8 @@ struct rpc_clnt { struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; - struct rpc_portmap cl_pmap_default; char cl_inline_name[32]; }; -#define cl_timeout cl_xprt->timeout -#define cl_prog cl_pmap->pm_prog -#define cl_vers cl_pmap->pm_vers -#define cl_port cl_pmap->pm_port -#define cl_prot cl_pmap->pm_prot /* * General RPC program info diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a71106723d71..4ce82616873d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -138,6 +138,7 @@ struct rpc_xprt { unsigned int tsh_size; /* size of transport specific header */ + struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ @@ -270,6 +271,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_CONNECTING (2) #define XPRT_CLOSE_WAIT (3) #define XPRT_BOUND (4) +#define XPRT_BINDING (5) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -328,6 +330,18 @@ static inline void xprt_clear_bound(struct rpc_xprt *xprt) clear_bit(XPRT_BOUND, &xprt->state); } +static inline void xprt_clear_binding(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_BINDING, &xprt->state); + smp_mb__after_clear_bit(); +} + +static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_BINDING, &xprt->state); +} + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0b8d03d08561..cee504162a3f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -147,13 +147,10 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_pmap = &clnt->cl_pmap_default; clnt->cl_prog = program->number; clnt->cl_vers = version->number; - clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); - rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -243,8 +240,6 @@ rpc_clone_client(struct rpc_clnt *clnt) atomic_set(&new->cl_users, 0); new->cl_parent = clnt; atomic_inc(&clnt->cl_count); - /* Duplicate portmapper */ - rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; @@ -254,8 +249,7 @@ rpc_clone_client(struct rpc_clnt *clnt) rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - new->cl_pmap = &new->cl_pmap_default; - new->cl_metrics = rpc_alloc_iostats(clnt); + new->cl_metrics = rpc_alloc_iostats(clnt); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 209ffdfee10b..59d542436ca9 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -24,11 +24,57 @@ #define PMAP_UNSET 2 #define PMAP_GETPORT 3 +struct portmap_args { + u32 pm_prog; + u32 pm_vers; + u32 pm_prot; + unsigned short pm_port; + struct rpc_task * pm_task; +}; + static struct rpc_procinfo pmap_procedures[]; static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); -static void pmap_getport_done(struct rpc_task *); +static void pmap_getport_done(struct rpc_task *, void *); static struct rpc_program pmap_program; -static DEFINE_SPINLOCK(pmap_lock); + +static void pmap_getport_prepare(struct rpc_task *task, void *calldata) +{ + struct portmap_args *map = calldata; + struct rpc_message msg = { + .rpc_proc = &pmap_procedures[PMAP_GETPORT], + .rpc_argp = map, + .rpc_resp = &map->pm_port, + }; + + rpc_call_setup(task, &msg, 0); +} + +static inline struct portmap_args *pmap_map_alloc(void) +{ + return kmalloc(sizeof(struct portmap_args), GFP_NOFS); +} + +static inline void pmap_map_free(struct portmap_args *map) +{ + kfree(map); +} + +static void pmap_map_release(void *data) +{ + pmap_map_free(data); +} + +static const struct rpc_call_ops pmap_getport_ops = { + .rpc_call_prepare = pmap_getport_prepare, + .rpc_call_done = pmap_getport_done, + .rpc_release = pmap_map_release, +}; + +static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt) +{ + xprt_clear_binding(xprt); + rpc_wake_up(&xprt->binding); +} /* * Obtain the port for a given RPC service on a given host. This one can @@ -37,67 +83,71 @@ static DEFINE_SPINLOCK(pmap_lock); void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { - struct rpc_portmap *map = clnt->cl_pmap; - struct sockaddr_in *sap = &clnt->cl_xprt->addr; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[PMAP_GETPORT], - .rpc_argp = map, - .rpc_resp = &clnt->cl_port, - .rpc_cred = NULL - }; + struct rpc_xprt *xprt = task->tk_xprt; + struct sockaddr_in *sap = &xprt->addr; + struct portmap_args *map; struct rpc_clnt *pmap_clnt; - struct rpc_task *child; + struct rpc_task *child; - dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", + dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n", task->tk_pid, clnt->cl_server, - map->pm_prog, map->pm_vers, map->pm_prot); + clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); - spin_lock(&pmap_lock); - if (map->pm_binding) { - rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); - spin_unlock(&pmap_lock); + if (xprt_test_and_set_binding(xprt)) { + task->tk_status = -EACCES; /* tell caller to check again */ + rpc_sleep_on(&xprt->binding, task, NULL, NULL); return; } - map->pm_binding = 1; - spin_unlock(&pmap_lock); + + /* Someone else may have bound if we slept */ + if (xprt_bound(xprt)) { + task->tk_status = 0; + goto bailout_nofree; + } + + map = pmap_map_alloc(); + if (!map) { + task->tk_status = -ENOMEM; + goto bailout_nofree; + } + map->pm_prog = clnt->cl_prog; + map->pm_vers = clnt->cl_vers; + map->pm_prot = xprt->prot; + map->pm_port = 0; + map->pm_task = task; pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); if (IS_ERR(pmap_clnt)) { task->tk_status = PTR_ERR(pmap_clnt); goto bailout; } - task->tk_status = 0; - /* - * Note: rpc_new_child will release client after a failure. - */ - if (!(child = rpc_new_child(pmap_clnt, task))) + child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); + if (IS_ERR(child)) { + task->tk_status = -EIO; goto bailout; + } + rpc_release_task(child); - /* Setup the call info struct */ - rpc_call_setup(child, &msg, 0); + rpc_sleep_on(&xprt->binding, task, NULL, NULL); - /* ... and run the child task */ task->tk_xprt->stat.bind_count++; - rpc_run_child(task, child, pmap_getport_done); return; bailout: - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); - rpc_exit(task, -EIO); + pmap_map_free(map); +bailout_nofree: + pmap_wake_portmap_waiters(xprt); } #ifdef CONFIG_ROOT_NFS int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) { - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -133,32 +183,32 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) #endif static void -pmap_getport_done(struct rpc_task *task) +pmap_getport_done(struct rpc_task *child, void *data) { - struct rpc_clnt *clnt = task->tk_client; + struct portmap_args *map = data; + struct rpc_task *task = map->pm_task; struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_portmap *map = clnt->cl_pmap; - - dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", - task->tk_pid, task->tk_status, clnt->cl_port); + int status = child->tk_status; - if (task->tk_status < 0) { - /* Make the calling task exit with an error */ + if (status < 0) { + /* Portmapper not available */ xprt->ops->set_port(xprt, 0); - task->tk_action = rpc_exit_task; - } else if (clnt->cl_port == 0) { - /* Program not registered */ + task->tk_status = status; + } else if (map->pm_port == 0) { + /* Requested RPC service wasn't registered */ xprt->ops->set_port(xprt, 0); - rpc_exit(task, -EACCES); + task->tk_status = -EACCES; } else { - xprt->ops->set_port(xprt, clnt->cl_port); + /* Succeeded */ + xprt->ops->set_port(xprt, map->pm_port); xprt_set_bound(xprt); - clnt->cl_port = htons(clnt->cl_port); + task->tk_status = 0; } - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); + + dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n", + child->tk_pid, child->tk_status, map->pm_port); + + pmap_wake_portmap_waiters(xprt); } /* @@ -172,7 +222,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -239,7 +289,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg * XDR encode/decode functions for PMAP */ static int -xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) +xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) { dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e239ef985ef7..b45abd0743cb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -928,6 +928,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); rpc_init_wait_queue(&xprt->resend, "xprt_resend"); -- cgit v1.2.3-71-gd317 From 5b1eacbcd78930d976eb50a93f1779d311b553d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: SUNRPC: Support for RPC child tasks no longer needed The previous patches removed the last user of RPC child tasks, so we can remove support for child tasks from net/sunrpc/sched.c now. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 5 --- net/sunrpc/sched.c | 82 -------------------------------------------- 2 files changed, 87 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 82a91bb22362..f399c138f79d 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -127,7 +127,6 @@ struct rpc_call_ops { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ -#define RPC_TASK_CHILD 0x0008 /* is child of other task */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ @@ -136,7 +135,6 @@ struct rpc_call_ops { #define RPC_TASK_NOINTR 0x0400 /* uninterruptible task */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) -#define RPC_IS_CHILD(t) ((t)->tk_flags & RPC_TASK_CHILD) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) @@ -253,7 +251,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); -struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); @@ -261,8 +258,6 @@ void rpc_release_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); int rpc_execute(struct rpc_task *); -void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, - rpc_action action); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5c3eee768504..015ffe423a2f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -44,12 +44,6 @@ static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); static void rpc_async_schedule(void *); -/* - * RPC tasks that create another task (e.g. for contacting the portmapper) - * will wait on this queue for their child's completion - */ -static RPC_WAITQ(childq, "childq"); - /* * RPC tasks sit here while waiting for conditions to improve. */ @@ -323,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task) wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } -/* - * Place a newly initialized task on the workqueue. - */ -static inline void -rpc_schedule_run(struct rpc_task *task) -{ - rpc_set_active(task); - rpc_make_runnable(task); -} - /* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. @@ -933,72 +917,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, } EXPORT_SYMBOL(rpc_run_task); -/** - * rpc_find_parent - find the parent of a child task. - * @child: child task - * @parent: parent task - * - * Checks that the parent task is still sleeping on the - * queue 'childq'. If so returns a pointer to the parent. - * Upon failure returns NULL. - * - * Caller must hold childq.lock - */ -static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) -{ - struct rpc_task *task; - struct list_head *le; - - task_for_each(task, le, &childq.tasks[0]) - if (task == parent) - return parent; - - return NULL; -} - -static void rpc_child_exit(struct rpc_task *child, void *calldata) -{ - struct rpc_task *parent; - - spin_lock_bh(&childq.lock); - if ((parent = rpc_find_parent(child, calldata)) != NULL) { - parent->tk_status = child->tk_status; - __rpc_wake_up_task(parent); - } - spin_unlock_bh(&childq.lock); -} - -static const struct rpc_call_ops rpc_child_ops = { - .rpc_call_done = rpc_child_exit, -}; - -/* - * Note: rpc_new_task releases the client after a failure. - */ -struct rpc_task * -rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) -{ - struct rpc_task *task; - - task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); - if (!task) - goto fail; - return task; - -fail: - parent->tk_status = -ENOMEM; - return NULL; -} - -void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) -{ - spin_lock_bh(&childq.lock); - /* N.B. Is it possible for the child to have already finished? */ - __rpc_sleep_on(&childq, task, func, NULL); - rpc_schedule_run(child); - spin_unlock_bh(&childq.lock); -} - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? -- cgit v1.2.3-71-gd317 From bbf7c1dd2ae2b4040b41b1065ee9b1b6905b1605 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: SUNRPC: Introduce transport switch callout for pluggable rpcbind Introduce a clean transport switch API for plugging in different types of rpcbind mechanisms. For instance, rpcbind can cleanly replace the existing portmapper client, or a transport can choose to implement RPC binding any way it likes. Test plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 3 +-- net/sunrpc/pmap_clnt.c | 4 ++-- net/sunrpc/xprtsock.c | 2 ++ 5 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 00e9dbaec9c5..2e68ac0aa022 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -106,7 +106,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); -void rpc_getport(struct rpc_task *, struct rpc_clnt *); +void rpc_getport(struct rpc_task *); int rpc_register(u32, u32, int, unsigned short, int *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4ce82616873d..84122559fa17 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -105,6 +105,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_task *task); void * (*buf_alloc)(struct rpc_task *task, size_t size); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cee504162a3f..d003c2f5688f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -774,7 +774,6 @@ call_encode(struct rpc_task *task) static void call_bind(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind (status %d)\n", @@ -784,7 +783,7 @@ call_bind(struct rpc_task *task) if (!xprt_bound(xprt)) { task->tk_action = call_bind_status; task->tk_timeout = xprt->bind_timeout; - rpc_getport(task, clnt); + xprt->ops->rpcbind(task); } } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 0efcbf1302a2..f7b279a63baa 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -81,13 +81,13 @@ static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt) /** * rpc_getport - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request - * @clnt: controlling rpc_clnt * * This one can be called for an ongoing RPC request, and can be used in * an async (rpciod) context. */ -void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) +void rpc_getport(struct rpc_task *task) { + struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = task->tk_xprt; struct sockaddr_in *sap = &xprt->addr; struct portmap_args *map; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 123ac1e5ba15..4c98b89a5b48 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1262,6 +1262,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1278,6 +1279,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, -- cgit v1.2.3-71-gd317 From ed39440a2573abc926f230267000f21fa5a87822 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:17 -0400 Subject: SUNRPC: create API for getting remote peer address Provide an API for retrieving the remote peer address without allowing direct access to the rpc_xprt struct. Test-plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2e68ac0aa022..65196b03f0ab 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -123,6 +123,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); +size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); /* * Helper function for NFSroot support diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d003c2f5688f..94768cf5fd5b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -533,6 +533,27 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) task->tk_action = rpc_exit_task; } +/** + * rpc_peeraddr - extract remote peer address from clnt's xprt + * @clnt: RPC client structure + * @buf: target buffer + * @size: length of target buffer + * + * Returns the number of bytes that are actually in the stored address. + */ +size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) +{ + size_t bytes; + struct rpc_xprt *xprt = clnt->cl_xprt; + + bytes = sizeof(xprt->addr); + if (bytes > bufsize) + bytes = bufsize; + memcpy(buf, &clnt->cl_xprt->addr, bytes); + return sizeof(xprt->addr); +} +EXPORT_SYMBOL(rpc_peeraddr); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { -- cgit v1.2.3-71-gd317 From 39d7bbcb5ba5e9d8d658b70903dd7939400e57db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: SUNRPC: remove extraneous header inclusions include/linux/sunrpc/clnt.h already includes include/linux/sunrpc/xprt.h. We can remove xprt.h from source files that already include clnt.h. Likewise include/linux/sunrpc/timer.h. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 1 - include/linux/nfs_xdr.h | 1 - net/sunrpc/pmap_clnt.c | 1 - net/sunrpc/sched.c | 1 - net/sunrpc/timer.c | 2 -- 5 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 445abb4d4214..41274874b9a5 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2426b11b6cce..0f33e621892f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1,7 +1,6 @@ #ifndef _LINUX_NFS_XDR_H #define _LINUX_NFS_XDR_H -#include #include /* diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 3eee8e907275..523f0e825dea 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #ifdef RPC_DEBUG diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 015ffe423a2f..ecf366351bf7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -21,7 +21,6 @@ #include #include -#include #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index bcbdf6430d5c..8142fdb8a930 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -19,8 +19,6 @@ #include #include -#include -#include #define RPC_RTO_MAX (60*HZ) #define RPC_RTO_INIT (HZ/5) -- cgit v1.2.3-71-gd317 From edb267a688fcee5335d596752f117a30c7152e44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: SUNRPC: add xprt switch API for printing formatted remote peer addresses Add a new method to the transport switch API to provide a way to convert the opaque contents of xprt->addr to a human-readable string. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 11 +++++++ net/sunrpc/xprtsock.c | 79 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 82 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 84122559fa17..8372ab8fc9b5 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -51,6 +51,14 @@ struct rpc_timeout { unsigned char to_exponential; }; +enum rpc_display_format_t { + RPC_DISPLAY_ADDR = 0, + RPC_DISPLAY_PORT, + RPC_DISPLAY_PROTO, + RPC_DISPLAY_ALL, + RPC_DISPLAY_MAX, +}; + struct rpc_task; struct rpc_xprt; struct seq_file; @@ -103,6 +111,7 @@ struct rpc_rqst { struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); + char * (*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); @@ -207,6 +216,8 @@ struct rpc_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); + + char * address_strings[RPC_DISPLAY_MAX]; }; #define XPRT_LAST_FRAG (1 << 0) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4c98b89a5b48..cb8e6c34e12f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif +static void xs_format_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) { + snprintf(buf, 20, "%u.%u.%u.%u", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + if (xprt->prot == IPPROTO_UDP) + xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; + else + xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) { + snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; +} + +static void xs_free_peer_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) @@ -490,6 +531,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xprt_disconnect(xprt); xs_close(xprt); + xs_free_peer_addresses(xprt); kfree(xprt->slot); } @@ -964,6 +1006,19 @@ static unsigned short xs_get_random_port(void) return rand + xprt_min_resvport; } +/** + * xs_print_peer_address - format an IPv4 address for printing + * @xprt: generic transport + * @format: flags field indicating which parts of the address to render + */ +static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) +{ + if (xprt->address_strings[format] != NULL) + return xprt->address_strings[format]; + else + return "unprintable"; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -1019,8 +1074,6 @@ static void xs_udp_connect_worker(void *args) if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); - /* Start by resetting any existing state */ xs_close(xprt); @@ -1034,6 +1087,9 @@ static void xs_udp_connect_worker(void *args) goto out; } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1102,8 +1158,6 @@ static void xs_tcp_connect_worker(void *args) if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); - if (!xprt->sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { @@ -1119,6 +1173,9 @@ static void xs_tcp_connect_worker(void *args) /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1260,6 +1317,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .rpcbind = rpc_getport, @@ -1277,6 +1335,7 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, .rpcbind = rpc_getport, @@ -1301,8 +1360,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - dprintk("RPC: setting up udp-ipv4 transport...\n"); - xprt->max_reqs = xprt_udp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); @@ -1332,6 +1389,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } @@ -1345,8 +1406,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - dprintk("RPC: setting up tcp-ipv4 transport...\n"); - xprt->max_reqs = xprt_tcp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); @@ -1375,5 +1434,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } -- cgit v1.2.3-71-gd317 From f425eba437f0051bde979ea2eef8bc875a77cd00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: SUNRPC: Create API for displaying remote peer address Provide an API for formatting the remote peer address for printing without exposing its internal structure. The address could be dynamic, so we support a function call to get the address rather than reading it straight out of a structure. Test-plan: Destructive testing (unplugging the network temporarily). Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 65196b03f0ab..b7d47f018353 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); +char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); /* * Helper function for NFSroot support diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 94768cf5fd5b..e5b19e348d88 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -554,6 +554,19 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) } EXPORT_SYMBOL(rpc_peeraddr); +/** + * rpc_peeraddr2str - return remote peer address in printable format + * @clnt: RPC client structure + * @format: address format + * + */ +char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) +{ + struct rpc_xprt *xprt = clnt->cl_xprt; + return xprt->ops->print_addr(xprt, format); +} +EXPORT_SYMBOL(rpc_peeraddr2str); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { -- cgit v1.2.3-71-gd317 From c4efcb1d3e0bc76aeb9ca6301d19a5079893c6c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: SUNRPC: Use "sockaddr_storage" for storing RPC client's remote peer address IPv6 addresses are big (128 bytes). Now that no RPC client consumers treat the addr field in rpc_xprt structs as an opaque, and access it only via the API calls, we can safely widen the field in the rpc_xprt struct to accomodate larger addresses. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/clnt.c | 2 +- net/sunrpc/xprt.c | 3 ++- net/sunrpc/xprtsock.c | 15 ++++++++++----- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8372ab8fc9b5..fc05cfbd5805 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -134,7 +134,8 @@ struct rpc_xprt { struct sock * inet; /* INET layer */ struct rpc_timeout timeout; /* timeout parms */ - struct sockaddr_in addr; /* server address */ + struct sockaddr_storage addr; /* server address */ + size_t addrlen; /* size of server address */ int prot; /* IP protocol */ unsigned long cong; /* current congestion */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e5b19e348d88..ff1e90fd81ab 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -550,7 +550,7 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) if (bytes > bufsize) bytes = bufsize; memcpy(buf, &clnt->cl_xprt->addr, bytes); - return sizeof(xprt->addr); + return xprt->addrlen; } EXPORT_SYMBOL(rpc_peeraddr); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b45abd0743cb..4987517cc74b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -896,7 +896,8 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) return ERR_PTR(-ENOMEM); - xprt->addr = *ap; + memcpy(&xprt->addr, ap, sizeof(*ap)); + xprt->addrlen = sizeof(*ap); switch (proto) { case IPPROTO_UDP: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cb8e6c34e12f..17179aa4c207 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -341,7 +341,7 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), xdr, req->rq_bytes_sent); + xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -1027,8 +1027,11 @@ static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_forma */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { + struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - xprt->addr.sin_port = htons(port); + + sap->sin_port = htons(port); } static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) @@ -1209,7 +1212,7 @@ static void xs_tcp_connect_worker(void *args) xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); + xprt->addrlen, O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { @@ -1359,6 +1362,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_udp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); @@ -1366,7 +1370,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - if (ntohs(xprt->addr.sin_port) != 0) + if (ntohs(addr->sin_port != 0)) xprt_set_bound(xprt); xprt->port = xs_get_random_port(); @@ -1405,6 +1409,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_tcp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); @@ -1412,7 +1417,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - if (ntohs(xprt->addr.sin_port) != 0) + if (ntohs(addr->sin_port) != 0) xprt_set_bound(xprt); xprt->port = xs_get_random_port(); -- cgit v1.2.3-71-gd317 From 6ca948238724c945bd353f51d54ae7d285f3889f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: SUNRPC: Clean-up after previous patches. Remove some unused macros related to accessing an RPC peer address Test plan: Compile kernel with CONFIG_NFS option enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 1 - include/linux/nfs_fs.h | 1 - include/linux/sunrpc/clnt.h | 3 --- 3 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 38b0e8a1aec0..a516a01561b8 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -26,7 +26,6 @@ #define NLM_HOST_REBIND (60 * HZ) #define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) #define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) -#define NLM_HOST_ADDR(sv) (&(sv)->s_nlmclnt->cl_xprt->addr) static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; static unsigned long next_gc; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 51e9bd90dedc..3b5b04193fee 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -216,7 +216,6 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) -#define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b7d47f018353..a26d69583c7a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -89,9 +89,6 @@ struct rpc_procinfo { char * p_name; /* name of procedure */ }; -#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) -#define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) - #ifdef __KERNEL__ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, -- cgit v1.2.3-71-gd317 From c2866763b4029411d166040306691773c12d4caf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:20 -0400 Subject: SUNRPC: use sockaddr + size when creating remote transport endpoints Prepare for more generic transport endpoint handling needed by transports that might use different forms of addressing, such as IPv6. Introduce a single function call to replace the two-call xprt_create_proto/rpc_create_client API. Define a new rpc_create_args structure that allows callers to pass in remote endpoint addresses of varying length. Test-plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 22 +++++++++++++ include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 61 ++++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 75 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a26d69583c7a..7817ba82f1b2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -97,6 +97,28 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); + +struct rpc_create_args { + int protocol; + struct sockaddr *address; + size_t addrsize; + struct rpc_timeout *timeout; + char *servername; + struct rpc_program *program; + u32 version; + rpc_authflavor_t authflavor; + unsigned long flags; +}; + +/* Values for "flags" field */ +#define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) +#define RPC_CLNT_CREATE_INTR (1UL << 1) +#define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) +#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) +#define RPC_CLNT_CREATE_NOPING (1UL << 5) + +struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fc05cfbd5805..bc80fcfdd892 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -237,6 +237,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ +struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ff1e90fd81ab..dbb93bdf6cc9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -192,6 +192,67 @@ out_no_xprt: return ERR_PTR(err); } +/* + * rpc_create - create an RPC client and transport with one call + * @args: rpc_clnt create argument structure + * + * Creates and initializes an RPC transport and an RPC client. + * + * It can ping the server in order to determine if it is up, and to see if + * it supports this program and version. RPC_CLNT_CREATE_NOPING disables + * this behavior so asynchronous tasks can also use rpc_create. + */ +struct rpc_clnt *rpc_create(struct rpc_create_args *args) +{ + struct rpc_xprt *xprt; + struct rpc_clnt *clnt; + + xprt = xprt_create_transport(args->protocol, args->address, + args->addrsize, args->timeout); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; + + /* + * By default, kernel RPC client connects from a reserved port. + * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, + * but it is always enabled for rpciod, which handles the connect + * operation. + */ + xprt->resvport = 1; + if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) + xprt->resvport = 0; + + dprintk("RPC: creating %s client for %s (xprt %p)\n", + args->program->name, args->servername, xprt); + + clnt = rpc_new_client(xprt, args->servername, args->program, + args->version, args->authflavor); + if (IS_ERR(clnt)) + return clnt; + + if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { + int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } + } + + clnt->cl_softrtry = 1; + if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + clnt->cl_softrtry = 0; + + if (args->flags & RPC_CLNT_CREATE_INTR) + clnt->cl_intr = 1; + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; + if (args->flags & RPC_CLNT_CREATE_ONESHOT) + clnt->cl_oneshot = 1; + + return clnt; +} +EXPORT_SYMBOL(rpc_create); + /** * Create an RPC client * @xprt - pointer to xprt struct diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4987517cc74b..17f56cfe2412 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -887,6 +887,81 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i to->to_exponential = 0; } +/** + * xprt_create_transport - create an RPC transport + * @proto: requested transport protocol + * @ap: remote peer address + * @size: length of address + * @to: timeout parameters + * + */ +struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) +{ + int result; + struct rpc_xprt *xprt; + struct rpc_rqst *req; + + if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { + dprintk("RPC: xprt_create_transport: no memory\n"); + return ERR_PTR(-ENOMEM); + } + if (size <= sizeof(xprt->addr)) { + memcpy(&xprt->addr, ap, size); + xprt->addrlen = size; + } else { + kfree(xprt); + dprintk("RPC: xprt_create_transport: address too large\n"); + return ERR_PTR(-EBADF); + } + + switch (proto) { + case IPPROTO_UDP: + result = xs_setup_udp(xprt, to); + break; + case IPPROTO_TCP: + result = xs_setup_tcp(xprt, to); + break; + default: + printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", + proto); + return ERR_PTR(-EIO); + } + if (result) { + kfree(xprt); + dprintk("RPC: xprt_create_transport: failed, %d\n", result); + return ERR_PTR(result); + } + + spin_lock_init(&xprt->transport_lock); + spin_lock_init(&xprt->reserve_lock); + + INIT_LIST_HEAD(&xprt->free); + INIT_LIST_HEAD(&xprt->recv); + INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; + xprt->cwnd = RPC_INITCWND; + + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); + rpc_init_wait_queue(&xprt->pending, "xprt_pending"); + rpc_init_wait_queue(&xprt->sending, "xprt_sending"); + rpc_init_wait_queue(&xprt->resend, "xprt_resend"); + rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); + + /* initialize free list */ + for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) + list_add(&req->rq_list, &xprt->free); + + xprt_init_xid(xprt); + + dprintk("RPC: created transport %p with %u slots\n", xprt, + xprt->max_reqs); + + return xprt; +} + static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) { int result; -- cgit v1.2.3-71-gd317 From ff9aa5e56df60cc8565a93cc868fe25ae3f20e49 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:21 -0400 Subject: SUNRPC: Eliminate xprt_create_proto and rpc_create_client The two function call API for creating a new RPC client is now obsolete. Remove it. Also, remove an unnecessary check to see whether the caller is capable of using privileged network services. The kernel RPC client always uses a privileged ephemeral port by default; callers are responsible for checking the authority of users to make use of any RPC service, or for specifying that a nonprivileged port is acceptable. Test plan: Repeated runs of Connectathon locking suite. Check network trace to ensure correctness of NLM requests and replies. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 7 ---- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/clnt.c | 42 +----------------------- net/sunrpc/sunrpc_syms.c | 3 -- net/sunrpc/xprt.c | 79 --------------------------------------------- net/sunrpc/xprtsock.c | 2 -- 6 files changed, 1 insertion(+), 133 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7817ba82f1b2..f6d1d646ce05 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -91,13 +91,6 @@ struct rpc_procinfo { #ifdef __KERNEL__ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, - u32 version, rpc_authflavor_t authflavor); -struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, - u32 version, rpc_authflavor_t authflavor); - struct rpc_create_args { int protocol; struct sockaddr *address; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc80fcfdd892..de4efea7c856 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -231,7 +231,6 @@ struct rpc_xprt { /* * Transport operations used by ULPs */ -struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to); void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); /* diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dbb93bdf6cc9..428704dd5b3e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) } } -/* - * Create an RPC client - * FIXME: This should also take a flags argument (as in task->tk_flags). - * It's called (among others) from pmap_create_client, which may in - * turn be called by an async task. In this case, rpciod should not be - * made to sleep too long. - */ -struct rpc_clnt * -rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *program, u32 vers, - rpc_authflavor_t flavor) +static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) { struct rpc_version *version; struct rpc_clnt *clnt = NULL; @@ -253,36 +243,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) } EXPORT_SYMBOL(rpc_create); -/** - * Create an RPC client - * @xprt - pointer to xprt struct - * @servname - name of server - * @info - rpc_program - * @version - rpc_program version - * @authflavor - rpc_auth flavour to use - * - * Creates an RPC client structure, then pings the server in order to - * determine if it is up, and if it supports this program and version. - * - * This function should never be called by asynchronous tasks such as - * the portmapper. - */ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) -{ - struct rpc_clnt *clnt; - int err; - - clnt = rpc_new_client(xprt, servname, info, version, authflavor); - if (IS_ERR(clnt)) - return clnt; - err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); - if (err == 0) - return clnt; - rpc_shutdown_client(clnt); - return ERR_PTR(err); -} - /* * This function clones the RPC client structure. It allows us to share the * same transport while varying parameters such as the authentication diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f38f939ce95f..26c0531d7e25 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -36,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status); EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ -EXPORT_SYMBOL(rpc_create_client); -EXPORT_SYMBOL(rpc_new_client); EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); @@ -57,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall); EXPORT_SYMBOL(rpc_mkpipe); /* Client transport */ -EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_set_timeout); /* Client credential cache */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 17f56cfe2412..e4f64fb58ff2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -962,85 +962,6 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si return xprt; } -static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) -{ - int result; - struct rpc_xprt *xprt; - struct rpc_rqst *req; - - if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) - return ERR_PTR(-ENOMEM); - - memcpy(&xprt->addr, ap, sizeof(*ap)); - xprt->addrlen = sizeof(*ap); - - switch (proto) { - case IPPROTO_UDP: - result = xs_setup_udp(xprt, to); - break; - case IPPROTO_TCP: - result = xs_setup_tcp(xprt, to); - break; - default: - printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - proto); - result = -EIO; - break; - } - if (result) { - kfree(xprt); - return ERR_PTR(result); - } - - spin_lock_init(&xprt->transport_lock); - spin_lock_init(&xprt->reserve_lock); - - INIT_LIST_HEAD(&xprt->free); - INIT_LIST_HEAD(&xprt->recv); - INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); - init_timer(&xprt->timer); - xprt->timer.function = xprt_init_autodisconnect; - xprt->timer.data = (unsigned long) xprt; - xprt->last_used = jiffies; - xprt->cwnd = RPC_INITCWND; - - rpc_init_wait_queue(&xprt->binding, "xprt_binding"); - rpc_init_wait_queue(&xprt->pending, "xprt_pending"); - rpc_init_wait_queue(&xprt->sending, "xprt_sending"); - rpc_init_wait_queue(&xprt->resend, "xprt_resend"); - rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); - - /* initialize free list */ - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) - list_add(&req->rq_list, &xprt->free); - - xprt_init_xid(xprt); - - dprintk("RPC: created transport %p with %u slots\n", xprt, - xprt->max_reqs); - - return xprt; -} - -/** - * xprt_create_proto - create an RPC client transport - * @proto: requested transport protocol - * @sap: remote peer's address - * @to: timeout parameters for new transport - * - */ -struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) -{ - struct rpc_xprt *xprt; - - xprt = xprt_setup(proto, sap, to); - if (IS_ERR(xprt)) - dprintk("RPC: xprt_create_proto failed\n"); - else - dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); - return xprt; -} - /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 17179aa4c207..0b84fab68d7e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1376,7 +1376,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -1423,7 +1422,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); -- cgit v1.2.3-71-gd317 From 4f390c152bc87165da4b1f5b7d870b46fb106d4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:22 -0400 Subject: NFS: Fix double d_drop in nfs_instantiate() error path If the LOOKUP or GETATTR in nfs_instantiate fail, nfs_instantiate will do a d_drop before returning. But some callers already do a d_drop in the case of an error return. Make certain we do only one d_drop in all error paths. This issue was introduced because over time, the symlink proc API diverged slightly from the create/mkdir/mknod proc API. To prevent other coding mistakes of this type, change the symlink proc API to be more like create/mkdir/mknod and move the nfs_instantiate call into the symlink proc routines so it is used in exactly the same way for create, mkdir, mknod, and symlink. Test plan: Connectathon, all versions of NFS. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 16 ++++------------ fs/nfs/nfs3proc.c | 26 ++++++++++++++++---------- fs/nfs/nfs4proc.c | 31 ++++++++++++++++--------------- fs/nfs/proc.c | 29 +++++++++++++++++++++-------- include/linux/nfs_xdr.h | 5 ++--- 5 files changed, 59 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 084e8cb41c84..affd3ae52e55 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1147,23 +1147,20 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct inode *dir = dentry->d_parent->d_inode; error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) - goto out_err; + return error; } if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) - goto out_err; + return error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); error = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_err; + return error; d_instantiate(dentry, inode); return 0; -out_err: - d_drop(dentry); - return error; } /* @@ -1448,8 +1445,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct iattr attr; - struct nfs_fattr sym_attr; - struct nfs_fh sym_fh; struct qstr qsymname; int error; @@ -1473,12 +1468,9 @@ dentry->d_parent->d_name.name, dentry->d_name.name); lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, - &attr, &sym_fh, &sym_attr); + error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr); nfs_end_data_update(dir); if (!error) - error = nfs_instantiate(dentry, &sym_fh, &sym_attr); - else d_drop(dentry); unlock_kernel(); return error; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9e8258ece6fd..d85ac427c326 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, - struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, + struct iattr *sattr) { - struct nfs_fattr dir_attr; + struct nfs_fh fhandle; + struct nfs_fattr fattr, dir_attr; struct nfs3_symlinkargs arg = { .fromfh = NFS_FH(dir), - .fromname = name->name, - .fromlen = name->len, + .fromname = dentry->d_name.name, + .fromlen = dentry->d_name.len, .topath = path->name, .tolen = path->len, .sattr = sattr }; struct nfs3_diropres res = { .dir_attr = &dir_attr, - .fh = fhandle, - .fattr = fattr + .fh = &fhandle, + .fattr = &fattr }; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], @@ -571,11 +571,17 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, if (path->len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", name->name, path->name); + + dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, + path->name); nfs_fattr_init(&dir_attr); - nfs_fattr_init(fattr); + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); +out: dprintk("NFS reply symlink: %d\n", status); return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a825547e8214..2d18eac6bee5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2084,24 +2084,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n return err; } -static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, - struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct qstr *path, struct iattr *sattr) { struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fattr dir_fattr; + struct nfs_fh fhandle; + struct nfs_fattr fattr, dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, - .name = name, + .name = &dentry->d_name, .attrs = sattr, .ftype = NF4LNK, .bitmask = server->attr_bitmask, }; struct nfs4_create_res res = { .server = server, - .fh = fhandle, - .fattr = fattr, + .fh = &fhandle, + .fattr = &fattr, .dir_fattr = &dir_fattr, }; struct rpc_message msg = { @@ -2113,27 +2113,28 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, if (path->len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; + arg.u.symlink = path; - nfs_fattr_init(fattr); + nfs_fattr_init(&fattr); nfs_fattr_init(&dir_fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) + if (!status) { update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); + nfs_post_op_update_inode(dir, res.dir_fattr); + status = nfs_instantiate(dentry, &fhandle, &fattr); + } return status; } -static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, - struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct qstr *path, struct iattr *sattr) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, name, path, sattr, - fhandle, fattr), + _nfs4_proc_symlink(dir, dentry, path, sattr), &exception); } while (exception.retry); return err; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 5a8b9407ee9a..0b507bf0f330 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -425,14 +425,15 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, - struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, + struct iattr *sattr) { + struct nfs_fh fhandle; + struct nfs_fattr fattr; struct nfs_symlinkargs arg = { .fromfh = NFS_FH(dir), - .fromname = name->name, - .fromlen = name->len, + .fromname = dentry->d_name.name, + .fromlen = dentry->d_name.len, .topath = path->name, .tolen = path->len, .sattr = sattr @@ -445,11 +446,23 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, if (path->len > NFS2_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", name->name, path->name); - nfs_fattr_init(fattr); - fhandle->size = 0; + + dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, + path->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); + + /* + * V2 SYMLINK requests don't return any attributes. Setting the + * filehandle size to zero indicates to nfs_instantiate that it + * should fill in the data with a LOOKUP call on the wire. + */ + if (status == 0) { + nfs_fattr_init(&fattr); + fhandle.size = 0; + status = nfs_instantiate(dentry, &fhandle, &fattr); + } + dprintk("NFS reply symlink: %d\n", status); return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0f33e621892f..ddf5d75e97a2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -793,9 +793,8 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); - int (*symlink) (struct inode *, struct qstr *, struct qstr *, - struct iattr *, struct nfs_fh *, - struct nfs_fattr *); + int (*symlink) (struct inode *, struct dentry *, struct qstr *, + struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, -- cgit v1.2.3-71-gd317 From 94a6d75320b3681e6e728b70e18bd186cb55e682 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:23 -0400 Subject: NFS: Use cached page as buffer for NFS symlink requests Now that we have a copy of the symlink path in the page cache, we can pass a struct page down to the XDR routines instead of a string buffer. Test plan: Connectathon, all NFS versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 +------- fs/nfs/nfs2xdr.c | 21 ++++++++++++++++++--- fs/nfs/nfs3proc.c | 14 +++++++------- fs/nfs/nfs3xdr.c | 7 +++++-- fs/nfs/nfs4proc.c | 12 +++++++----- fs/nfs/nfs4xdr.c | 8 ++++---- fs/nfs/proc.c | 14 +++++++------- include/linux/nfs_xdr.h | 17 ++++++++++------- 8 files changed, 59 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b483e5d206cb..51328ae640dd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1464,10 +1464,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym char *kaddr; struct iattr attr; unsigned int pathlen = strlen(symname); - struct qstr qsymname = { - .name = symname, - .len = pathlen, - }; int error; dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, @@ -1493,10 +1489,8 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr, KM_USER0); - /* XXX: eventually this will pass in {page, pathlen}, - * instead of qsymname; need XDR changes for that */ nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr); + error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); nfs_end_data_update(dir); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 67391eef6b93..b49501fc0a79 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -51,7 +51,7 @@ #define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) #define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) #define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) -#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz) +#define NFS_symlinkargs_sz (NFS_diropargs_sz+1+NFS_sattr_sz) #define NFS_readdirargs_sz (NFS_fhandle_sz+2) #define NFS_attrstat_sz (1+NFS_fattr_sz) @@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) static int nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) { + struct xdr_buf *sndbuf = &req->rq_snd_buf; + size_t pad; + p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_array(p, args->topath, args->tolen); + *p++ = htonl(args->pathlen); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen); + + /* + * xdr_encode_pages may have added a few bytes to ensure the + * pathname ends on a 4-byte boundary. Start encoding the + * attributes after the pad bytes. + */ + pad = sndbuf->tail->iov_len; + if (pad > 0) + p++; p = xdr_encode_sattr(p, args->sattr); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad; return 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d85ac427c326..f8688eaa0001 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -544,8 +544,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, - struct iattr *sattr) +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, + unsigned int len, struct iattr *sattr) { struct nfs_fh fhandle; struct nfs_fattr fattr, dir_attr; @@ -553,8 +553,8 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, .fromfh = NFS_FH(dir), .fromname = dentry->d_name.name, .fromlen = dentry->d_name.len, - .topath = path->name, - .tolen = path->len, + .pages = &page, + .pathlen = len, .sattr = sattr }; struct nfs3_diropres res = { @@ -569,11 +569,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, }; int status; - if (path->len > NFS3_MAXPATHLEN) + if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, - path->name); + dprintk("NFS call symlink %s\n", dentry->d_name.name); + nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 0250269e9753..16556fa4effb 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -56,7 +56,7 @@ #define NFS3_writeargs_sz (NFS3_fh_sz+5) #define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) #define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) -#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz) +#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz) #define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) #define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) #define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) @@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); p = xdr_encode_sattr(p, args->sattr); - p = xdr_encode_array(p, args->topath, args->tolen); + *p++ = htonl(args->pathlen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Copy the page */ + xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2d18eac6bee5..7f60beb40df3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2085,7 +2085,7 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n } static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct qstr *path, struct iattr *sattr) + struct page *page, unsigned int len, struct iattr *sattr) { struct nfs_server *server = NFS_SERVER(dir); struct nfs_fh fhandle; @@ -2111,10 +2111,11 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, }; int status; - if (path->len > NFS4_MAXPATHLEN) + if (len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; - arg.u.symlink = path; + arg.u.symlink.pages = &page; + arg.u.symlink.len = len; nfs_fattr_init(&fattr); nfs_fattr_init(&dir_fattr); @@ -2128,13 +2129,14 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, } static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct qstr *path, struct iattr *sattr) + struct page *page, unsigned int len, struct iattr *sattr) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, dentry, path, sattr), + _nfs4_proc_symlink(dir, dentry, page, + len, sattr), &exception); } while (exception.retry); return err; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 99926067eca4..3dd413f52da1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -128,7 +128,7 @@ static int nfs4_stat_to_errno(int); #define decode_link_maxsz (op_decode_hdr_maxsz + 5) #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ - nfs4_path_maxsz + \ + 1 + \ nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ @@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c switch (create->ftype) { case NF4LNK: - RESERVE_SPACE(4 + create->u.symlink->len); - WRITE32(create->u.symlink->len); - WRITEMEM(create->u.symlink->name, create->u.symlink->len); + RESERVE_SPACE(4); + WRITE32(create->u.symlink.len); + xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); break; case NF4BLK: case NF4CHR: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0b507bf0f330..630e50647bbb 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -425,8 +425,8 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, - struct iattr *sattr) +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, + unsigned int len, struct iattr *sattr) { struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -434,8 +434,8 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, .fromfh = NFS_FH(dir), .fromname = dentry->d_name.name, .fromlen = dentry->d_name.len, - .topath = path->name, - .tolen = path->len, + .pages = &page, + .pathlen = len, .sattr = sattr }; struct rpc_message msg = { @@ -444,11 +444,11 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, }; int status; - if (path->len > NFS2_MAXPATHLEN) + if (len > NFS2_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, - path->name); + dprintk("NFS call symlink %s\n", dentry->d_name.name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ddf5d75e97a2..dc5397d9d23c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -358,8 +358,8 @@ struct nfs_symlinkargs { struct nfs_fh * fromfh; const char * fromname; unsigned int fromlen; - const char * topath; - unsigned int tolen; + struct page ** pages; + unsigned int pathlen; struct iattr * sattr; }; @@ -434,8 +434,8 @@ struct nfs3_symlinkargs { struct nfs_fh * fromfh; const char * fromname; unsigned int fromlen; - const char * topath; - unsigned int tolen; + struct page ** pages; + unsigned int pathlen; struct iattr * sattr; }; @@ -533,7 +533,10 @@ struct nfs4_accessres { struct nfs4_create_arg { u32 ftype; union { - struct qstr * symlink; /* NF4LNK */ + struct { + struct page ** pages; + unsigned int len; + } symlink; /* NF4LNK */ struct { u32 specdata1; u32 specdata2; @@ -793,8 +796,8 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct qstr *, - struct iattr *); + int (*symlink) (struct inode *, struct dentry *, struct page *, + unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, -- cgit v1.2.3-71-gd317 From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:24 -0400 Subject: Add a real API for dealing with blk_congestion_wait() Signed-off-by: Trond Myklebust --- block/ll_rw_blk.c | 12 ++++++++++++ fs/nfs/write.c | 1 + include/linux/blkdev.h | 1 + include/linux/writeback.h | 1 + mm/page-writeback.c | 9 +++++++++ 5 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index ddd9253f9d55..dcbd6ff1fa33 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2734,6 +2734,18 @@ long blk_congestion_wait(int rw, long timeout) EXPORT_SYMBOL(blk_congestion_wait); +/** + * blk_congestion_end - wake up sleepers on a congestion queue + * @rw: READ or WRITE + */ +void blk_congestion_end(int rw) +{ + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + if (waitqueue_active(wqh)) + wake_up(wqh); +} + /* * Has to be called with the request spinlock acquired */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 453d44666ea5..38ba5c09af08 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) out: clear_bit(BDI_write_congested, &bdi->state); wake_up_all(&nfs_write_congestion); + writeback_congestion_end(); return err; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aafe82788b4e..96c9040c00a8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -746,6 +746,7 @@ extern void blk_queue_free_tags(request_queue_t *); extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern long blk_congestion_wait(int rw, long timeout); +extern void blk_congestion_end(int rw); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); extern int blkdev_issue_flush(struct block_device *, sector_t *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9e38b566d0e7..0422036af4eb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -85,6 +85,7 @@ int wakeup_pdflush(long nr_pages); void laptop_io_completion(void); void laptop_sync_completion(void); void throttle_vm_writeout(void); +void writeback_congestion_end(void); /* These are exported to sysctl. */ extern int dirty_background_ratio; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e630188ccc40..77a0bc4e261a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -802,6 +802,15 @@ int test_set_page_writeback(struct page *page) } EXPORT_SYMBOL(test_set_page_writeback); +/* + * Wakes up tasks that are being throttled due to writeback congestion + */ +void writeback_congestion_end(void) +{ + blk_congestion_end(WRITE); +} +EXPORT_SYMBOL(writeback_congestion_end); + /* * Return true if any of the pages in the mapping are marged with the * passed tag. -- cgit v1.2.3-71-gd317 From 5dd3177ae5012c1e2ad7a9ffdbd0e0d0de2f60e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 01:03:05 -0400 Subject: NFSv4: Fix a use-after-free issue with the nfs server. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 36 +++++++++++++++++++++--------------- fs/nfs/nfs4renewd.c | 1 + fs/nfs/super.c | 8 +++++--- include/linux/nfs_fs_sb.h | 1 + 4 files changed, 28 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 12941a8a6d75..f1ff2aec2ca5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -164,6 +164,26 @@ error_0: return NULL; } +static void nfs4_shutdown_client(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + while (!list_empty(&clp->cl_unused)) { + struct nfs4_state_owner *sp; + + sp = list_entry(clp->cl_unused.next, + struct nfs4_state_owner, + so_list); + list_del(&sp->so_list); + kfree(sp); + } + BUG_ON(!list_empty(&clp->cl_state_owners)); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); +#endif +} + /* * Destroy a shared client record */ @@ -171,21 +191,7 @@ static void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); -#ifdef CONFIG_NFS_V4 - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) { - while (!list_empty(&clp->cl_unused)) { - struct nfs4_state_owner *sp; - - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); - nfs_idmap_delete(clp); - } -#endif + nfs4_shutdown_client(clp); /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index f2c893690ac4..7b6df1852e75 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -121,6 +121,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) __FUNCTION__, (timeout + HZ - 1) / HZ); cancel_delayed_work(&clp->cl_renewd); schedule_delayed_work(&clp->cl_renewd, timeout); + set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 97cfb143e09f..665949d27798 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -883,13 +883,15 @@ static int nfs4_get_sb(struct file_system_type *fs_type, goto out_free; } + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } + if (!s->s_root) { /* initial superblock/root creation */ s->s_flags = flags; - nfs4_fill_super(s); - } else { - nfs_free_server(server); } mntroot = nfs4_get_root(s, &mntfh); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6d0be0efd1b5..7ccfc7ef0a83 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -19,6 +19,7 @@ struct nfs_client { #define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ +#define NFS_CS_RENEWD 3 /* - renewd started */ struct sockaddr_in cl_addr; /* server identifier */ char * cl_hostname; /* hostname of server */ struct list_head cl_share_link; /* link in global client list */ -- cgit v1.2.3-71-gd317 From 158998b6fe36f6acef087f574c96d44713499cc9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 01:03:17 -0400 Subject: SUNRPC: Make rpc_mkpipe() take the parent dentry as an argument Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 6 +----- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- net/sunrpc/auth_gss/auth_gss.c | 7 ++----- net/sunrpc/rpc_pipe.c | 38 +++++++++++++++++++++++--------------- 4 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index f96dfac7dc9a..82ad7110a1c0 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -84,7 +84,6 @@ struct idmap_hashtable { }; struct idmap { - char idmap_path[48]; struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; @@ -119,10 +118,7 @@ nfs_idmap_new(struct nfs_client *clp) if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return -ENOMEM; - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", clp->cl_rpcclient->cl_pathname); - - idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { error = PTR_ERR(idmap->idmap_dentry); diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a481472c9484..a2eb9b4a9de3 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -43,7 +43,7 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); extern int rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ef1cf5b476c8..6eed3e166ba3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -88,7 +88,6 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - char path[48]; spinlock_t lock; }; @@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (err) goto err_put_mech; - snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", - clnt->cl_pathname, - gss_auth->mech->gm_name); - gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { err = PTR_ERR(gss_auth->dentry); goto err_put_mech; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c21dc07f2a8c..11ec12a09d70 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -621,17 +621,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) } static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +rpc_lookup_create(struct dentry *parent, const char *name, int len) { + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dir = nd->dentry->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); + dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; if (dentry->d_inode) { @@ -642,7 +638,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd) return dentry; out_err: mutex_unlock(&dir->i_mutex); - rpc_release_path(nd); + return dentry; +} + +static struct dentry * +rpc_lookup_negative(char *path, struct nameidata *nd) +{ + struct dentry *dentry; + int error; + + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + if (IS_ERR(dentry)) + rpc_release_path(nd); return dentry; } @@ -701,17 +710,16 @@ rpc_rmdir(struct dentry *dentry) } struct dentry * -rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) +rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) { - struct nameidata nd; struct dentry *dentry; struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_negative(path, &nd); + dentry = rpc_lookup_create(parent, name, strlen(name)); if (IS_ERR(dentry)) return dentry; - dir = nd.dentry->d_inode; + dir = parent->d_inode; inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; @@ -726,13 +734,13 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) dget(dentry); out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); - printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", - __FILE__, __FUNCTION__, path, -ENOMEM); + printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", + __FILE__, __FUNCTION__, parent->d_name.name, name, + -ENOMEM); goto out; } -- cgit v1.2.3-71-gd317 From 6b6ca86b77b62b798cf9ca2599036420abce7796 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Sep 2006 12:55:57 -0400 Subject: SUNRPC: Add refcounting to the struct rpc_xprt In a subsequent patch, this will allow the portmapper to take a reference to the rpc_xprt for which it is updating the port number, fixing an Oops. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++++- net/sunrpc/clnt.c | 8 +++----- net/sunrpc/xprt.c | 28 +++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index de4efea7c856..bdeba8538c71 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,7 @@ struct rpc_xprt_ops { }; struct rpc_xprt { + struct kref kref; /* Reference count */ struct rpc_xprt_ops * ops; /* transport methods */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ @@ -248,7 +250,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); -int xprt_destroy(struct rpc_xprt *xprt); +struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); +void xprt_put(struct rpc_xprt *xprt); static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ceadb728f0da..084a0ad5c64e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -177,7 +177,7 @@ out_no_path: kfree(clnt->cl_server); kfree(clnt); out_err: - xprt_destroy(xprt); + xprt_put(xprt); out_no_xprt: return ERR_PTR(err); } @@ -261,6 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt) atomic_set(&new->cl_users, 0); new->cl_parent = clnt; atomic_inc(&clnt->cl_count); + new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; @@ -337,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } - if (clnt->cl_xprt) { - xprt_destroy(clnt->cl_xprt); - clnt->cl_xprt = NULL; - } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; + xprt_put(clnt->cl_xprt); kfree(clnt); return 0; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a85f82baefc1..1f786f68729d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -926,6 +926,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si return ERR_PTR(result); } + kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); @@ -958,16 +959,37 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si /** * xprt_destroy - destroy an RPC transport, killing off all requests. - * @xprt: transport to destroy + * @kref: kref for the transport to destroy * */ -int xprt_destroy(struct rpc_xprt *xprt) +static void xprt_destroy(struct kref *kref) { + struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); + dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); xprt->ops->destroy(xprt); kfree(xprt); +} - return 0; +/** + * xprt_put - release a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +void xprt_put(struct rpc_xprt *xprt) +{ + kref_put(&xprt->kref, xprt_destroy); +} + +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + kref_get(&xprt->kref); + return xprt; } -- cgit v1.2.3-71-gd317 From 4c8bd7eeee4c8f157fb61fb64b57500990b42e0e Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 22 Sep 2006 22:31:36 -0700 Subject: [KERNEL] Do not truncate to 'int' in ALIGN() macro. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 851aa1bcfc1a..2b2ae4fdce8b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -31,7 +31,7 @@ extern const char linux_banner[]; #define STACK_MAGIC 0xdeadbeef #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) +#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) -- cgit v1.2.3-71-gd317 From 42431acbac43eb47c774c29d370f5c59136805bf Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 24 Sep 2006 10:44:09 +0100 Subject: [MMC] MMC_CAP_BYTEBLOCK flag for non-log2 block sizes capable hosts Some MMC hosts can only handle log2 block sizes. Unfortunately, the MMC password support needs to be able to send non-log2 block sizes. Provide a capability so that the MMC password support can decide whether it should use this support or not. The unfortunate side effect of this host limitation is that any MMC card protected by a password which is not a log2 block size can not be accessed on a host which only allows a log2 block size. This change just adds the flag. The MMC password support code needs updating to use it (if and when it is finally submitted.) Signed-off-by: Russell King --- drivers/mmc/at91_mci.c | 1 + drivers/mmc/imxmmc.c | 2 +- drivers/mmc/omap.c | 7 ++++--- drivers/mmc/sdhci.c | 2 +- drivers/mmc/wbsd.c | 2 +- include/linux/mmc/host.h | 1 + 6 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/at91_mci.c b/drivers/mmc/at91_mci.c index 6b7638b84290..d34b7d9d92ed 100644 --- a/drivers/mmc/at91_mci.c +++ b/drivers/mmc/at91_mci.c @@ -822,6 +822,7 @@ static int at91_mci_probe(struct platform_device *pdev) mmc->f_min = 375000; mmc->f_max = 25000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps = MMC_CAP_BYTEBLOCK; host = mmc_priv(mmc); host->mmc = mmc; diff --git a/drivers/mmc/imxmmc.c b/drivers/mmc/imxmmc.c index fb6565b98f32..1b79dd271aae 100644 --- a/drivers/mmc/imxmmc.c +++ b/drivers/mmc/imxmmc.c @@ -956,7 +956,7 @@ static int imxmci_probe(struct platform_device *pdev) mmc->f_min = 150000; mmc->f_max = CLK_RATE/2; mmc->ocr_avail = MMC_VDD_32_33; - mmc->caps |= MMC_CAP_4_BIT_DATA; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_BYTEBLOCK; /* MMC core transfer sizes tunable parameters */ mmc->max_hw_segs = 64; diff --git a/drivers/mmc/omap.c b/drivers/mmc/omap.c index ddf06b32c159..52c9e52e6b78 100644 --- a/drivers/mmc/omap.c +++ b/drivers/mmc/omap.c @@ -1034,13 +1034,14 @@ static int __init mmc_omap_probe(struct platform_device *pdev) host->irq = pdev->resource[1].start; host->base = (void __iomem*)IO_ADDRESS(r->start); - if (minfo->wire4) - mmc->caps |= MMC_CAP_4_BIT_DATA; - mmc->ops = &mmc_omap_ops; mmc->f_min = 400000; mmc->f_max = 24000000; mmc->ocr_avail = MMC_VDD_32_33|MMC_VDD_33_34; + mmc->caps = MMC_CAP_BYTEBLOCK; + + if (minfo->wire4) + mmc->caps |= MMC_CAP_4_BIT_DATA; /* Use scatterlist DMA to reduce per-transfer costs. * NOTE max_seg_size assumption that small blocks aren't diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c index dea4edd1c434..fdfc3838dd79 100644 --- a/drivers/mmc/sdhci.c +++ b/drivers/mmc/sdhci.c @@ -1262,7 +1262,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) mmc->ops = &sdhci_ops; mmc->f_min = host->max_clk / 256; mmc->f_max = host->max_clk; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_BYTEBLOCK; mmc->ocr_avail = 0; if (caps & SDHCI_CAN_VDD_330) diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c index 4a6617d9a49f..6435a6822ad3 100644 --- a/drivers/mmc/wbsd.c +++ b/drivers/mmc/wbsd.c @@ -1323,7 +1323,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev) mmc->f_min = 375000; mmc->f_max = 24000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_BYTEBLOCK; spin_lock_init(&host->lock); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index b282ec9bba08..587264a58d56 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -86,6 +86,7 @@ struct mmc_host { #define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ #define MMC_CAP_MULTIWRITE (1 << 1) /* Can accurately report bytes sent to card on error */ +#define MMC_CAP_BYTEBLOCK (1 << 2) /* Can do non-log2 block sizes */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v1.2.3-71-gd317 From e18fa700c9a31360bc8f193aa543b7ef7b39a06b Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 24 Sep 2006 11:13:19 -0400 Subject: Move several *_SUPER_MAGIC symbols to include/linux/magic.h. Signed-off-by: Jeff Garzik --- fs/affs/affs.h | 1 - fs/affs/super.c | 1 + fs/autofs/autofs_i.h | 2 -- fs/autofs/inode.c | 1 + fs/autofs4/autofs_i.h | 2 -- fs/autofs4/inode.c | 1 + fs/hpfs/hpfs_fn.h | 1 - fs/hpfs/super.c | 1 + fs/openpromfs/inode.c | 2 +- include/linux/Kbuild | 4 +--- include/linux/adfs_fs.h | 2 +- include/linux/affs_fs.h | 7 ------- include/linux/coda_psdev.h | 4 ++-- include/linux/efs_fs_sb.h | 3 +-- include/linux/ext2_fs.h | 6 +----- include/linux/ext3_fs.h | 6 +----- include/linux/hpfs_fs.h | 8 -------- include/linux/iso_fs.h | 6 +++--- include/linux/jffs2.h | 4 ++-- include/linux/magic.h | 37 +++++++++++++++++++++++++++++++++++++ include/linux/minix_fs.h | 6 ++---- include/linux/msdos_fs.h | 4 ++-- include/linux/ncp_fs.h | 5 +---- include/linux/nfs_fs.h | 7 ++----- include/linux/openprom_fs.h | 10 ---------- include/linux/proc_fs.h | 3 +-- include/linux/qnx4_fs.h | 2 +- include/linux/reiserfs_fs.h | 10 ++-------- include/linux/smb.h | 3 +-- include/linux/usbdevice_fs.h | 3 +-- 30 files changed, 67 insertions(+), 85 deletions(-) delete mode 100644 include/linux/affs_fs.h delete mode 100644 include/linux/hpfs_fs.h create mode 100644 include/linux/magic.h delete mode 100644 include/linux/openprom_fs.h (limited to 'include/linux') diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 0ddd4cc0d1a0..1dc8438ef389 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -1,7 +1,6 @@ #include #include #include -#include #include /* AmigaOS allows file names with up to 30 characters length. diff --git a/fs/affs/super.c b/fs/affs/super.c index 5200f4938df0..17352011ab67 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "affs.h" extern struct timezone sys_tz; diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index a62327f1bdff..c7700d9b3f96 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -37,8 +37,6 @@ #define DPRINTK(D) ((void)0) #endif -#define AUTOFS_SUPER_MAGIC 0x0187 - /* * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the * kernel will keep the negative response cached for up to the time given diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 65e5ed42190e..af2efbbb5d76 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "autofs_i.h" #include diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d6603d02304c..480ab178cba5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -40,8 +40,6 @@ #define DPRINTK(fmt,args...) do {} while(0) #endif -#define AUTOFS_SUPER_MAGIC 0x0187 - /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index fde78b110ddd..11a6a9ae51b7 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "autofs_i.h" #include diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index f687d54ed442..32ab51e42b96 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index f798480a363f..8fe51c343786 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -11,6 +11,7 @@ #include #include #include +#include /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 93a56bd4a2b7..592a6402e851 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -8,10 +8,10 @@ #include #include #include -#include #include #include #include +#include #include #include diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 7d076d97b2f7..67383605f2e5 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -12,7 +12,6 @@ header-y += netfilter_bridge/ header-y += netfilter_ipv4/ header-y += netfilter_ipv6/ -header-y += affs_fs.h header-y += affs_hardblocks.h header-y += aio_abi.h header-y += a.out.h @@ -67,7 +66,6 @@ header-y += genetlink.h header-y += gen_stats.h header-y += gigaset_dev.h header-y += hdsmart.h -header-y += hpfs_fs.h header-y += hysdn_if.h header-y += i2c-dev.h header-y += i8k.h @@ -103,6 +101,7 @@ header-y += ixjuser.h header-y += jffs2.h header-y += keyctl.h header-y += limits.h +header-y += magic.h header-y += major.h header-y += matroxfb.h header-y += meye.h @@ -116,7 +115,6 @@ header-y += netrom.h header-y += nfs2.h header-y += nfs4_mount.h header-y += nfs_mount.h -header-y += openprom_fs.h header-y += param.h header-y += pci_ids.h header-y += pci_regs.h diff --git a/include/linux/adfs_fs.h b/include/linux/adfs_fs.h index 4a5d50c2bdbf..ef788c2085a1 100644 --- a/include/linux/adfs_fs.h +++ b/include/linux/adfs_fs.h @@ -2,6 +2,7 @@ #define _ADFS_FS_H #include +#include /* * Disc Record at disc address 0xc00 @@ -38,7 +39,6 @@ struct adfs_discrecord { #define ADFS_DR_OFFSET (0x1c0) #define ADFS_DR_SIZE 60 #define ADFS_DR_SIZE_BITS (ADFS_DR_SIZE << 3) -#define ADFS_SUPER_MAGIC 0xadf5 #ifdef __KERNEL__ #include diff --git a/include/linux/affs_fs.h b/include/linux/affs_fs.h deleted file mode 100644 index c57b5ee87d55..000000000000 --- a/include/linux/affs_fs.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _AFFS_FS_H -#define _AFFS_FS_H -/* - * The affs filesystem constants/structures - */ -#define AFFS_SUPER_MAGIC 0xadff -#endif diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 98f6c52c152b..b541bb3d1f4b 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -1,11 +1,11 @@ #ifndef __CODA_PSDEV_H #define __CODA_PSDEV_H +#include + #define CODA_PSDEV_MAJOR 67 #define MAX_CODADEVS 5 /* how many do we allow */ -#define CODA_SUPER_MAGIC 0x73757245 - struct kstatfs; struct coda_sb_info diff --git a/include/linux/efs_fs_sb.h b/include/linux/efs_fs_sb.h index c76088baef28..ff1945e37790 100644 --- a/include/linux/efs_fs_sb.h +++ b/include/linux/efs_fs_sb.h @@ -9,8 +9,7 @@ #ifndef __EFS_FS_SB_H__ #define __EFS_FS_SB_H__ -/* statfs() magic number for EFS */ -#define EFS_SUPER_MAGIC 0x414A53 +#include /* EFS superblock magic numbers */ #define EFS_MAGIC 0x072959 diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index facf34e98954..33a1aa107329 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -17,6 +17,7 @@ #define _LINUX_EXT2_FS_H #include +#include /* * The second extended filesystem constants/structures @@ -63,11 +64,6 @@ /* First non-reserved inode for old ext2 filesystems */ #define EXT2_GOOD_OLD_FIRST_INO 11 -/* - * The second extended file system magic number - */ -#define EXT2_SUPER_MAGIC 0xEF53 - #ifdef __KERNEL__ #include static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 9f9cce7bd86d..0eed918b3816 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -17,6 +17,7 @@ #define _LINUX_EXT3_FS_H #include +#include /* * The second extended filesystem constants/structures @@ -66,11 +67,6 @@ /* First non-reserved inode for old ext3 filesystems */ #define EXT3_GOOD_OLD_FIRST_INO 11 -/* - * The second extended file system magic number - */ -#define EXT3_SUPER_MAGIC 0xEF53 - /* * Maximal count of links to a file */ diff --git a/include/linux/hpfs_fs.h b/include/linux/hpfs_fs.h deleted file mode 100644 index a5028dd94d31..000000000000 --- a/include/linux/hpfs_fs.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _LINUX_HPFS_FS_H -#define _LINUX_HPFS_FS_H - -/* HPFS magic number (word 0 of block 16) */ - -#define HPFS_SUPER_MAGIC 0xf995e849 - -#endif diff --git a/include/linux/iso_fs.h b/include/linux/iso_fs.h index 47967878bfef..4688ac4284e2 100644 --- a/include/linux/iso_fs.h +++ b/include/linux/iso_fs.h @@ -2,6 +2,8 @@ #define _ISOFS_FS_H #include +#include + /* * The isofs filesystem constants/structures */ @@ -160,6 +162,4 @@ struct iso_directory_record { #define ISOFS_BUFFER_SIZE(INODE) ((INODE)->i_sb->s_blocksize) #define ISOFS_BUFFER_BITS(INODE) ((INODE)->i_sb->s_blocksize_bits) -#define ISOFS_SUPER_MAGIC 0x9660 - -#endif +#endif /* _ISOFS_FS_H */ diff --git a/include/linux/jffs2.h b/include/linux/jffs2.h index c9c760700bc3..840631fa5ff1 100644 --- a/include/linux/jffs2.h +++ b/include/linux/jffs2.h @@ -15,12 +15,12 @@ #ifndef __LINUX_JFFS2_H__ #define __LINUX_JFFS2_H__ +#include + /* You must include something which defines the C99 uintXX_t types. We don't do it from here because this file is used in too many different environments. */ -#define JFFS2_SUPER_MAGIC 0x72b6 - /* Values we may expect to find in the 'magic' field */ #define JFFS2_OLD_MAGIC_BITMASK 0x1984 #define JFFS2_MAGIC_BITMASK 0x1985 diff --git a/include/linux/magic.h b/include/linux/magic.h new file mode 100644 index 000000000000..22036dd2ba36 --- /dev/null +++ b/include/linux/magic.h @@ -0,0 +1,37 @@ +#ifndef __LINUX_MAGIC_H__ +#define __LINUX_MAGIC_H__ + +#define ADFS_SUPER_MAGIC 0xadf5 +#define AFFS_SUPER_MAGIC 0xadff +#define AUTOFS_SUPER_MAGIC 0x0187 +#define CODA_SUPER_MAGIC 0x73757245 +#define EFS_SUPER_MAGIC 0x414A53 +#define EXT2_SUPER_MAGIC 0xEF53 +#define EXT3_SUPER_MAGIC 0xEF53 +#define HPFS_SUPER_MAGIC 0xf995e849 +#define ISOFS_SUPER_MAGIC 0x9660 +#define JFFS2_SUPER_MAGIC 0x72b6 + +#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ +#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ +#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ +#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ + +#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ +#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ +#define NFS_SUPER_MAGIC 0x6969 +#define OPENPROM_SUPER_MAGIC 0x9fa1 +#define PROC_SUPER_MAGIC 0x9fa0 +#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ + +#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ + /* used by file system utilities that + look at the superblock, etc. */ +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" +#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + +#define SMB_SUPER_MAGIC 0x517B +#define USBDEVICE_SUPER_MAGIC 0x9fa2 + +#endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/minix_fs.h b/include/linux/minix_fs.h index 1ecc3cc8cef5..916e8f72c63d 100644 --- a/include/linux/minix_fs.h +++ b/include/linux/minix_fs.h @@ -1,6 +1,8 @@ #ifndef _LINUX_MINIX_FS_H #define _LINUX_MINIX_FS_H +#include + /* * The minix filesystem constants/structures */ @@ -19,10 +21,6 @@ #define MINIX_I_MAP_SLOTS 8 #define MINIX_Z_MAP_SLOTS 64 -#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ -#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ -#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ -#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ #define MINIX_VALID_FS 0x0001 /* Clean fs. */ #define MINIX_ERROR_FS 0x0002 /* fs has errors. */ diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index d9035c73e5d1..bae62d62dc3e 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -1,6 +1,8 @@ #ifndef _LINUX_MSDOS_FS_H #define _LINUX_MSDOS_FS_H +#include + /* * The MS-DOS filesystem constants/structures */ @@ -18,8 +20,6 @@ #define CT_LE_L(v) cpu_to_le32(v) -#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ - #define MSDOS_ROOT_INO 1 /* == MINIX_ROOT_INO */ #define MSDOS_DIR_BITS 5 /* log2(sizeof(struct msdos_dir_entry)) */ diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index b208f0cd556b..02e352be717e 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -185,10 +186,6 @@ struct ncp_entry_info { __u8 file_handle[6]; }; -/* Guess, what 0x564c is :-) */ -#define NCP_SUPER_MAGIC 0x564c - - static inline struct ncp_server *NCP_SBP(struct super_block *sb) { return sb->s_fs_info; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3b5b04193fee..36f5bcf513b0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -9,6 +9,8 @@ #ifndef _LINUX_NFS_FS_H #define _LINUX_NFS_FS_H +#include + /* * Enable debugging support for nfs client. * Requires RPC_DEBUG. @@ -21,11 +23,6 @@ #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) -/* - * superblock magic number for NFS - */ -#define NFS_SUPER_MAGIC 0x6969 - /* * When flushing a cluster of dirty pages, there can be different * strategies: diff --git a/include/linux/openprom_fs.h b/include/linux/openprom_fs.h deleted file mode 100644 index a837aab8217e..000000000000 --- a/include/linux/openprom_fs.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_OPENPROM_FS_H -#define _LINUX_OPENPROM_FS_H - -/* - * The openprom filesystem constants/structures - */ - -#define OPENPROM_SUPER_MAGIC 0x9fa1 - -#endif /* _LINUX_OPENPROM_FS_H */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 17e75783e3a5..3435ca38dd14 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -24,8 +25,6 @@ enum { PROC_ROOT_INO = 1, }; -#define PROC_SUPER_MAGIC 0x9fa0 - /* * This is not completely implemented yet. The idea is to * create an in-memory tree (like the actual /proc filesystem diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 27f49c85d5d6..0c7ac444fd35 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -11,6 +11,7 @@ #define _LINUX_QNX4_FS_H #include +#include #define QNX4_ROOT_INO 1 @@ -25,7 +26,6 @@ #define QNX4_I_MAP_SLOTS 8 #define QNX4_Z_MAP_SLOTS 64 -#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define QNX4_VALID_FS 0x0001 /* Clean fs. */ #define QNX4_ERROR_FS 0x0002 /* fs has errors. */ #define QNX4_BLOCK_SIZE 0x200 /* blocksize of 512 bytes */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index daa2d83cefe8..28493ffaafe7 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -12,6 +12,8 @@ #define _LINUX_REISER_FS_H #include +#include + #ifdef __KERNEL__ #include #include @@ -227,14 +229,6 @@ struct reiserfs_super_block { ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) - /* used by gcc */ -#define REISERFS_SUPER_MAGIC 0x52654973 - /* used by file system utilities that - look at the superblock, etc. */ -#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" -#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" -#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" - int is_reiserfs_3_5(struct reiserfs_super_block *rs); int is_reiserfs_3_6(struct reiserfs_super_block *rs); int is_reiserfs_jr(struct reiserfs_super_block *rs); diff --git a/include/linux/smb.h b/include/linux/smb.h index b0162208c963..6df3b1501559 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -10,6 +10,7 @@ #define _LINUX_SMB_H #include +#include enum smb_protocol { SMB_PROTOCOL_NONE, @@ -101,8 +102,6 @@ enum smb_conn_state { CONN_RETRYING /* Currently trying to reconnect */ }; -#define SMB_SUPER_MAGIC 0x517B - #define SMB_HEADER_LEN 37 /* includes everything up to, but not * including smb_bcc */ diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h index 7b7aadb69092..617d8a1c59ae 100644 --- a/include/linux/usbdevice_fs.h +++ b/include/linux/usbdevice_fs.h @@ -32,11 +32,10 @@ #define _LINUX_USBDEVICE_FS_H #include +#include /* --------------------------------------------------------------------- */ -#define USBDEVICE_SUPER_MAGIC 0x9fa2 - /* usbdevfs ioctl codes */ struct usbdevfs_ctrltransfer { -- cgit v1.2.3-71-gd317 From 00e4d116a7ef94eb910be037912b0b2fc09f608b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 22 Sep 2006 09:33:58 +0100 Subject: [DCCP]: Allow default/fallback service code. This has been discussed on dccp@vger and removes the necessity for applications to supply service codes in each and every case. If an application does not want to provide a service code, that's fine, it will be given 0. Otherwise, service codes can be set via socket options as before. This patch has been tested using various client/server configurations (including listening on multiple service codes). Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/networking/dccp.txt | 8 +++++--- include/linux/dccp.h | 6 +----- net/dccp/ipv4.c | 3 --- net/dccp/proto.c | 11 +---------- 4 files changed, 7 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index c45daabd3bfe..74563b38ffd9 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -1,7 +1,6 @@ DCCP protocol ============ -Last updated: 10 November 2005 Contents ======== @@ -42,8 +41,11 @@ Socket options DCCP_SOCKOPT_PACKET_SIZE is used for CCID3 to set default packet size for calculations. -DCCP_SOCKOPT_SERVICE sets the service. This is compulsory as per the -specification. If you don't set it you will get EPROTO. +DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of +service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, +the socket will fall back to 0 (which means that no meaningful service code +is present). Connecting sockets set at most one service option; for +listening sockets, multiple service codes can be specified. Notes ===== diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 2d7671c92c0b..29f9291e45c0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -404,6 +404,7 @@ struct dccp_service_list { }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) +#define DCCP_SERVICE_CODE_IS_ABSENT 0 static inline int dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) @@ -484,11 +485,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk) return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; } -static inline int dccp_service_not_initialized(const struct sock *sk) -{ - return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9a1a76a7dc41..66be29b6f508 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -56,9 +56,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 962df0ea31aa..72cbdcfc2c65 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -217,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; + dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; return 0; @@ -267,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - /* - * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) - * before calling listen() - */ - if (dccp_service_not_initialized(sk)) - return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -540,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len, int err = -ENOENT, slen = 0, total_len = sizeof(u32); lock_sock(sk); - if (dccp_service_not_initialized(sk)) - goto out; - if ((sl = dp->dccps_service_list) != NULL) { slen = sl->dccpsl_nr * sizeof(u32); total_len += slen; -- cgit v1.2.3-71-gd317 From 349457ccf2592c14bdf13b6706170ae2e94931b1 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 8 Sep 2006 14:22:21 -0700 Subject: [PATCH] Allow file systems to manually d_move() inside of ->rename() Some file systems want to manually d_move() the dentries involved in a rename. We can do this by making use of the FS_ODD_RENAME flag if we just have nfs_rename() unconditionally do the d_move(). While there, we rename the flag to be more descriptive. OCFS2 uses this to protect that part of the rename operation with a cluster lock. Signed-off-by: Mark Fasheh Cc: Trond Myklebust Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- fs/namei.c | 6 +++--- fs/nfs/dir.c | 3 +-- fs/nfs/super.c | 10 +++++----- include/linux/fs.h | 7 ++++--- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 432d6bc6fab0..6b591c01b09f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, dput(new_dentry); } if (!error) - d_move(old_dentry,new_dentry); + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) + d_move(old_dentry,new_dentry); return error; } @@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, else error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (!error) { - /* The following d_move() should become unconditional */ - if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry, new_dentry); } if (target) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3419c2da9ba9..7432f1a43f3d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1669,8 +1669,7 @@ out: if (rehash) d_rehash(rehash); if (!error) { - if (!S_ISDIR(old_inode->i_mode)) - d_move(old_dentry, new_dentry); + d_move(old_dentry, new_dentry); nfs_renew_times(new_dentry); nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b99113b0f65f..e8d40030cab4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -71,7 +71,7 @@ static struct file_system_type nfs_fs_type = { .name = "nfs", .get_sb = nfs_get_sb, .kill_sb = nfs_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; struct file_system_type nfs_xdev_fs_type = { @@ -79,7 +79,7 @@ struct file_system_type nfs_xdev_fs_type = { .name = "nfs", .get_sb = nfs_xdev_get_sb, .kill_sb = nfs_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; static struct super_operations nfs_sops = { @@ -107,7 +107,7 @@ static struct file_system_type nfs4_fs_type = { .name = "nfs4", .get_sb = nfs4_get_sb, .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; struct file_system_type nfs4_xdev_fs_type = { @@ -115,7 +115,7 @@ struct file_system_type nfs4_xdev_fs_type = { .name = "nfs4", .get_sb = nfs4_xdev_get_sb, .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; struct file_system_type nfs4_referral_fs_type = { @@ -123,7 +123,7 @@ struct file_system_type nfs4_referral_fs_type = { .name = "nfs4", .get_sb = nfs4_referral_get_sb, .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; static struct super_operations nfs4_sops = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 555bc195c420..1d3e601ece73 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -92,9 +92,10 @@ extern int dir_notify_enable; #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ -#define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ +#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() + * during rename() internally. + */ + /* * These are the fs-independent mount-flags: up to 32 flags are supported */ -- cgit v1.2.3-71-gd317 From b83eff641ed39bd631535b9a8971e161b056f541 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Fri, 22 Sep 2006 14:25:36 +1200 Subject: [DCCP]: Introduce constants for CCID numbers Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 29f9291e45c0..d6f4ec467a4b 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -169,6 +169,12 @@ enum { DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* DCCP CCIDS */ +enum { + DCCPC_CCID2 = 2, + DCCPC_CCID3 = 3, +}; + /* DCCP features */ enum { DCCPF_RESERVED = 0, @@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 #define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 -- cgit v1.2.3-71-gd317 From 9e72cbf353e259bd30ab472d72d7bdb9be23fb12 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 24 Sep 2006 22:06:48 +0100 Subject: Remove dead netfilter_logging.h from include/linux/Kbuild Signed-off-by: David Woodhouse --- include/linux/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 19a3664f1dfe..1df2ac30a4d2 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -266,7 +266,6 @@ unifdef-y += netfilter_decnet.h unifdef-y += netfilter.h unifdef-y += netfilter_ipv4.h unifdef-y += netfilter_ipv6.h -unifdef-y += netfilter_logging.h unifdef-y += net.h unifdef-y += netlink.h unifdef-y += nfs3.h -- cgit v1.2.3-71-gd317 From 3e597c6045502dd0fa98a61aa95ba178f8a2cc03 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2006 23:42:20 +0100 Subject: [PATCH] fix iptables __user misannotations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/netfilter/x_tables.h | 4 ++-- net/ipv4/netfilter/ip_tables.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 739a98eebe2c..04319a76103a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -390,13 +390,13 @@ extern int xt_compat_match_offset(struct xt_match *match); extern void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, int *size); extern int xt_compat_match_to_user(struct xt_entry_match *m, - void * __user *dstptr, int *size); + void __user **dstptr, int *size); extern int xt_compat_target_offset(struct xt_target *target); extern void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, int *size); extern int xt_compat_target_to_user(struct xt_entry_target *t, - void * __user *dstptr, int *size); + void __user **dstptr, int *size); #endif /* CONFIG_COMPAT */ #endif /* __KERNEL__ */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 800067d69a9a..78a44b01c035 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1364,15 +1364,15 @@ struct compat_ipt_replace { }; static inline int compat_copy_match_to_user(struct ipt_entry_match *m, - void * __user *dstptr, compat_uint_t *size) + void __user **dstptr, compat_uint_t *size) { return xt_compat_match_to_user(m, dstptr, size); } static int compat_copy_entry_to_user(struct ipt_entry *e, - void * __user *dstptr, compat_uint_t *size) + void __user **dstptr, compat_uint_t *size) { - struct ipt_entry_target __user *t; + struct ipt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; -- cgit v1.2.3-71-gd317 From 0d6c7ae22d4fadbc83677ea1a6144eea8911e319 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sun, 24 Sep 2006 19:28:47 -0700 Subject: [NETFILTER]: Add dscp,DSCP headers to header-y This patch adds xt_dscp.h and xt_DSCP.h to the kernel headers which are exported via 'make headers_install'. These are necessary for userspace to add rules using dscp match and DSCP target. Signed-off-by: Yasuyuki Kozakai Signed-off-by: David S. Miller --- include/linux/netfilter/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9a285cecf249..312bd2ffee33 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,8 @@ header-y += xt_connmark.h header-y += xt_CONNMARK.h header-y += xt_conntrack.h header-y += xt_dccp.h +header-y += xt_dscp.h +header-y += xt_DSCP.h header-y += xt_esp.h header-y += xt_helper.h header-y += xt_length.h -- cgit v1.2.3-71-gd317 From a6d967a485c67ec8a1276261f39d81ace6a3e308 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Mon, 25 Sep 2006 15:33:09 -0400 Subject: [libata] No need for all those arch libata-portmap.h headers They all contain the same thing. Instead, have a single generic one in include/asm-generic, and permit an arch to override as needed. Signed-off-by: Jeff Garzik --- include/asm-alpha/libata-portmap.h | 1 - include/asm-frv/libata-portmap.h | 1 - include/asm-i386/libata-portmap.h | 1 - include/asm-ia64/libata-portmap.h | 1 - include/asm-powerpc/libata-portmap.h | 1 - include/asm-sparc/libata-portmap.h | 1 - include/asm-sparc64/libata-portmap.h | 1 - include/asm-x86_64/libata-portmap.h | 1 - include/linux/libata.h | 8 ++++++++ 9 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 include/asm-alpha/libata-portmap.h delete mode 100644 include/asm-frv/libata-portmap.h delete mode 100644 include/asm-i386/libata-portmap.h delete mode 100644 include/asm-ia64/libata-portmap.h delete mode 100644 include/asm-powerpc/libata-portmap.h delete mode 100644 include/asm-sparc/libata-portmap.h delete mode 100644 include/asm-sparc64/libata-portmap.h delete mode 100644 include/asm-x86_64/libata-portmap.h (limited to 'include/linux') diff --git a/include/asm-alpha/libata-portmap.h b/include/asm-alpha/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-alpha/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-frv/libata-portmap.h b/include/asm-frv/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-frv/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-i386/libata-portmap.h b/include/asm-i386/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-i386/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ia64/libata-portmap.h b/include/asm-ia64/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-ia64/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-powerpc/libata-portmap.h b/include/asm-powerpc/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-powerpc/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-sparc/libata-portmap.h b/include/asm-sparc/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-sparc/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-sparc64/libata-portmap.h b/include/asm-sparc64/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-sparc64/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-x86_64/libata-portmap.h b/include/asm-x86_64/libata-portmap.h deleted file mode 100644 index 75484ef0c743..000000000000 --- a/include/asm-x86_64/libata-portmap.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/linux/libata.h b/include/linux/libata.h index 1ef3d3901b47..d6a3d4b345fc 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -36,7 +36,15 @@ #include #include +/* + * Define if arch has non-standard setup. This is a _PCI_ standard + * not a legacy or ISA standard. + */ +#ifdef CONFIG_ATA_NONSTANDARD #include +#else +#include +#endif /* * compile-time options: to be removed as soon as all the drivers are -- cgit v1.2.3-71-gd317 From baef186519c69b11cf7e48c26e75feb1e6173baa Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Fri, 8 Sep 2006 16:04:05 -0400 Subject: [PATCH] WE-21 support (core API) This is version 21 of the Wireless Extensions. Changelog : o finishes migrating the ESSID API (remove the +1) o netdev->get_wireless_stats is no more o long/short retry This is a redacted version of a patch originally submitted by Jean Tourrilhes. I removed most of the additions, in order to minimize future support requirements for nl80211 (or other WE successor). CC: Jean Tourrilhes Signed-off-by: John W. Linville --- include/linux/netdevice.h | 1 - include/linux/wireless.h | 24 ++++++++++++++--- net/core/net-sysfs.c | 5 +--- net/core/wireless.c | 67 +++++++++++++++++++++++++++++------------------ 4 files changed, 62 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43289127b458..ac4f50213bc3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -334,7 +334,6 @@ struct net_device struct net_device_stats* (*get_stats)(struct net_device *dev); - struct iw_statistics* (*get_wireless_stats)(struct net_device *dev); /* List of functions to handle Wireless Extensions (instead of ioctl). * See for details. Jean II */ diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 13588564b42b..a50a0130fd9e 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1,7 +1,7 @@ /* * This file define a set of standard wireless extensions * - * Version : 20 17.2.06 + * Version : 21 14.3.06 * * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved. @@ -69,9 +69,14 @@ /***************************** INCLUDES *****************************/ +/* This header is used in user-space, therefore need to be sanitised + * for that purpose. Those includes are usually not compatible with glibc. + * To know which includes to use in user-space, check iwlib.h. */ +#ifdef __KERNEL__ #include /* for "caddr_t" et al */ #include /* for "struct sockaddr" et al */ #include /* for IFNAMSIZ and co... */ +#endif /* __KERNEL__ */ /***************************** VERSION *****************************/ /* @@ -80,7 +85,7 @@ * (there is some stuff that will be added in the future...) * I just plan to increment with each new version. */ -#define WIRELESS_EXT 20 +#define WIRELESS_EXT 21 /* * Changes : @@ -208,6 +213,14 @@ * V19 to V20 * ---------- * - RtNetlink requests support (SET/GET) + * + * V20 to V21 + * ---------- + * - Remove (struct net_device *)->get_wireless_stats() + * - Change length in ESSID and NICK to strlen() instead of strlen()+1 + * - Add IW_RETRY_SHORT/IW_RETRY_LONG retry modifiers + * - Power/Retry relative values no longer * 100000 + * - Add explicit flag to tell stats are in 802.11k RCPI : IW_QUAL_RCPI */ /**************************** CONSTANTS ****************************/ @@ -448,6 +461,7 @@ #define IW_QUAL_QUAL_INVALID 0x10 /* Driver doesn't provide value */ #define IW_QUAL_LEVEL_INVALID 0x20 #define IW_QUAL_NOISE_INVALID 0x40 +#define IW_QUAL_RCPI 0x80 /* Level + Noise are 802.11k RCPI */ #define IW_QUAL_ALL_INVALID 0x70 /* Frequency flags */ @@ -500,10 +514,12 @@ #define IW_RETRY_TYPE 0xF000 /* Type of parameter */ #define IW_RETRY_LIMIT 0x1000 /* Maximum number of retries*/ #define IW_RETRY_LIFETIME 0x2000 /* Maximum duration of retries in us */ -#define IW_RETRY_MODIFIER 0x000F /* Modify a parameter */ +#define IW_RETRY_MODIFIER 0x00FF /* Modify a parameter */ #define IW_RETRY_MIN 0x0001 /* Value is a minimum */ #define IW_RETRY_MAX 0x0002 /* Value is a maximum */ #define IW_RETRY_RELATIVE 0x0004 /* Value is not in seconds/ms/us */ +#define IW_RETRY_SHORT 0x0010 /* Value is for short packets */ +#define IW_RETRY_LONG 0x0020 /* Value is for long packets */ /* Scanning request flags */ #define IW_SCAN_DEFAULT 0x0000 /* Default scan of the driver */ @@ -1017,7 +1033,7 @@ struct iw_range /* Note : this frequency list doesn't need to fit channel numbers, * because each entry contain its channel index */ - __u32 enc_capa; /* IW_ENC_CAPA_* bit field */ + __u32 enc_capa; /* IW_ENC_CAPA_* bit field */ }; /* diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 13472762b18b..f47f319bb7dc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -344,8 +344,6 @@ static ssize_t wireless_show(struct class_device *cd, char *buf, if(dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats) iw = dev->wireless_handlers->get_wireless_stats(dev); - else if (dev->get_wireless_stats) - iw = dev->get_wireless_stats(dev); if (iw != NULL) ret = (*format)(iw, buf); } @@ -465,8 +463,7 @@ int netdev_register_sysfs(struct net_device *net) *groups++ = &netstat_group; #ifdef WIRELESS_EXT - if (net->get_wireless_stats - || (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)) + if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif diff --git a/net/core/wireless.c b/net/core/wireless.c index 3168fca312f7..ffff0da46c6e 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -68,6 +68,14 @@ * * v8 - 17.02.06 - Jean II * o RtNetlink requests support (SET/GET) + * + * v8b - 03.08.06 - Herbert Xu + * o Fix Wireless Event locking issues. + * + * v9 - 14.3.06 - Jean II + * o Change length in ESSID and NICK to strlen() instead of strlen()+1 + * o Make standard_ioctl_num and standard_event_num unsigned + * o Remove (struct net_device *)->get_wireless_stats() */ /***************************** INCLUDES *****************************/ @@ -234,24 +242,24 @@ static const struct iw_ioctl_description standard_ioctl[] = { [SIOCSIWESSID - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, .flags = IW_DESCR_FLAG_EVENT, }, [SIOCGIWESSID - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, .flags = IW_DESCR_FLAG_DUMP, }, [SIOCSIWNICKN - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, }, [SIOCGIWNICKN - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, }, [SIOCSIWRATE - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_PARAM, @@ -338,8 +346,8 @@ static const struct iw_ioctl_description standard_ioctl[] = { .max_tokens = sizeof(struct iw_pmksa), }, }; -static const int standard_ioctl_num = (sizeof(standard_ioctl) / - sizeof(struct iw_ioctl_description)); +static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) / + sizeof(struct iw_ioctl_description)); /* * Meta-data about all the additional standard Wireless Extension events @@ -389,8 +397,8 @@ static const struct iw_ioctl_description standard_event[] = { .max_tokens = sizeof(struct iw_pmkid_cand), }, }; -static const int standard_event_num = (sizeof(standard_event) / - sizeof(struct iw_ioctl_description)); +static const unsigned standard_event_num = (sizeof(standard_event) / + sizeof(struct iw_ioctl_description)); /* Size (in bytes) of the various private data types */ static const char iw_priv_type_size[] = { @@ -465,17 +473,6 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) (dev->wireless_handlers->get_wireless_stats != NULL)) return dev->wireless_handlers->get_wireless_stats(dev); - /* Old location, field to be removed in next WE */ - if(dev->get_wireless_stats) { - static int printed_message; - - if (!printed_message++) - printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n", - dev->name); - - return dev->get_wireless_stats(dev); - } - /* Not found */ return (struct iw_statistics *) NULL; } @@ -1843,8 +1840,33 @@ int wireless_rtnetlink_set(struct net_device * dev, */ #ifdef WE_EVENT_RTNETLINK +/* ---------------------------------------------------------------- */ +/* + * Locking... + * ---------- + * + * Thanks to Herbert Xu for fixing + * the locking issue in here and implementing this code ! + * + * The issue : wireless_send_event() is often called in interrupt context, + * while the Netlink layer can never be called in interrupt context. + * The fully formed RtNetlink events are queued, and then a tasklet is run + * to feed those to Netlink. + * The skb_queue is interrupt safe, and its lock is not held while calling + * Netlink, so there is no possibility of dealock. + * Jean II + */ + static struct sk_buff_head wireless_nlevent_queue; +static int __init wireless_nlevent_init(void) +{ + skb_queue_head_init(&wireless_nlevent_queue); + return 0; +} + +subsys_initcall(wireless_nlevent_init); + static void wireless_nlevent_process(unsigned long data) { struct sk_buff *skb; @@ -1921,13 +1943,6 @@ static inline void rtmsg_iwinfo(struct net_device * dev, tasklet_schedule(&wireless_nlevent_tasklet); } -static int __init wireless_nlevent_init(void) -{ - skb_queue_head_init(&wireless_nlevent_queue); - return 0; -} - -subsys_initcall(wireless_nlevent_init); #endif /* WE_EVENT_RTNETLINK */ /* ---------------------------------------------------------------- */ -- cgit v1.2.3-71-gd317 From 0b680e753724d31a9c45f059d1aad29df54584a1 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 22 Sep 2006 21:54:10 -0700 Subject: [PATCH] bonding: Add priv_flag to avoid event mishandling Add priv_flag to specifically identify bonding-involved devices. Needed because IFF_MASTER is an unreliable identifier (vlan interfaces above bonding will inherit IFF_MASTER). Misidentification of devices would cause notifier events for other devices to be erroneously processed by bonding, causing various havoc. Bug discovered by Martin Papik ; this patch is modified from his original. Signed-off-by: Martin Papik Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- drivers/net/bonding/bond_main.c | 7 ++++++- include/linux/if.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d2f460b0dbab..9e5a533a1622 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1371,6 +1371,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } new_slave->dev = slave_dev; + slave_dev->priv_flags |= IFF_BONDING; if ((bond->params.mode == BOND_MODE_TLB) || (bond->params.mode == BOND_MODE_ALB)) { @@ -1784,7 +1785,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) dev_set_mac_address(slave_dev, &addr); slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | - IFF_SLAVE_INACTIVE); + IFF_SLAVE_INACTIVE | IFF_BONDING); kfree(slave); @@ -3216,6 +3217,9 @@ static int bond_netdev_event(struct notifier_block *this, unsigned long event, v (event_dev ? event_dev->name : "None"), event); + if (!(event_dev->priv_flags & IFF_BONDING)) + return NOTIFY_DONE; + if (event_dev->flags & IFF_MASTER) { dprintk("IFF_MASTER\n"); return bond_master_netdev_event(event, event_dev); @@ -4185,6 +4189,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) /* Initialize the device options */ bond_dev->tx_queue_len = 0; bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; + bond_dev->priv_flags |= IFF_BONDING; /* At first, we block adding VLANs. That's the only way to * prevent problems that occur when adding VLANs over an diff --git a/include/linux/if.h b/include/linux/if.h index cd080d765324..a023ec1274fe 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -59,6 +59,7 @@ #define IFF_SLAVE_INACTIVE 0x4 /* bonding slave not the curr. active */ #define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ #define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ +#define IFF_BONDING 0x20 /* bonding master or slave */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 -- cgit v1.2.3-71-gd317 From f5b2b966f032f22d3a289045a5afd4afa09f09c6 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 22 Sep 2006 21:54:53 -0700 Subject: [PATCH] bonding: Validate probe replies in ARP monitor Add logic to check ARP request / reply packets used for ARP monitor link integrity checking. The current method simply examines the slave device to see if it has sent and received traffic; this can be fooled by extraneous traffic. For example, if multiple hosts running bonding are behind a common switch, the probe traffic from the multiple instances of bonding will update the tx/rx times on each other's slave devices. Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- Documentation/networking/bonding.txt | 59 ++++++++++++ drivers/net/bonding/bond_main.c | 182 +++++++++++++++++++++++++++++++++-- drivers/net/bonding/bond_sysfs.c | 54 +++++++++++ drivers/net/bonding/bonding.h | 32 +++++- include/linux/if.h | 1 + include/linux/netdevice.h | 7 +- 6 files changed, 325 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index afac780445cd..dc942eaf490f 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -192,6 +192,17 @@ or, for backwards compatibility, the option value. E.g., arp_interval Specifies the ARP link monitoring frequency in milliseconds. + + The ARP monitor works by periodically checking the slave + devices to determine whether they have sent or received + traffic recently (the precise criteria depends upon the + bonding mode, and the state of the slave). Regular traffic is + generated via ARP probes issued for the addresses specified by + the arp_ip_target option. + + This behavior can be modified by the arp_validate option, + below. + If ARP monitoring is used in an etherchannel compatible mode (modes 0 and 2), the switch should be configured in a mode that evenly distributes packets across all links. If the @@ -213,6 +224,54 @@ arp_ip_target maximum number of targets that can be specified is 16. The default value is no IP addresses. +arp_validate + + Specifies whether or not ARP probes and replies should be + validated in the active-backup mode. This causes the ARP + monitor to examine the incoming ARP requests and replies, and + only consider a slave to be up if it is receiving the + appropriate ARP traffic. + + Possible values are: + + none or 0 + + No validation is performed. This is the default. + + active or 1 + + Validation is performed only for the active slave. + + backup or 2 + + Validation is performed only for backup slaves. + + all or 3 + + Validation is performed for all slaves. + + For the active slave, the validation checks ARP replies to + confirm that they were generated by an arp_ip_target. Since + backup slaves do not typically receive these replies, the + validation performed for backup slaves is on the ARP request + sent out via the active slave. It is possible that some + switch or network configurations may result in situations + wherein the backup slaves do not receive the ARP requests; in + such a situation, validation of backup slaves must be + disabled. + + This option is useful in network configurations in which + multiple bonding hosts are concurrently issuing ARPs to one or + more targets beyond a common switch. Should the link between + the switch and target fail (but not the switch itself), the + probe traffic generated by the multiple bonding instances will + fool the standard ARP monitor into considering the links as + still up. Use of the arp_validate option can resolve this, as + the ARP monitor will only consider ARP requests and replies + associated with its own instance of bonding. + + This option was added in bonding version 3.1.0. + downdelay Specifies the time, in milliseconds, to wait before disabling diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bafe62f7c9b7..fd521b05db83 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -96,6 +96,7 @@ static char *lacp_rate = NULL; static char *xmit_hash_policy = NULL; static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; +static char *arp_validate = NULL; struct bond_params bonding_defaults; module_param(max_bonds, int, 0); @@ -127,6 +128,8 @@ module_param(arp_interval, int, 0); MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); module_param_array(arp_ip_target, charp, NULL, 0); MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); +module_param(arp_validate, charp, 0); +MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); /*----------------------------- Global variables ----------------------------*/ @@ -170,6 +173,14 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = { { NULL, -1}, }; +struct bond_parm_tbl arp_validate_tbl[] = { +{ "none", BOND_ARP_VALIDATE_NONE}, +{ "active", BOND_ARP_VALIDATE_ACTIVE}, +{ "backup", BOND_ARP_VALIDATE_BACKUP}, +{ "all", BOND_ARP_VALIDATE_ALL}, +{ NULL, -1}, +}; + /*-------------------------- Forward declarations ---------------------------*/ static void bond_send_gratuitous_arp(struct bonding *bond); @@ -1424,6 +1435,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_compute_features(bond); + new_slave->last_arp_rx = jiffies; + if (bond->params.miimon && !bond->params.use_carrier) { link_reporting = bond_check_dev_link(bond, slave_dev, 1); @@ -1785,7 +1798,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) dev_set_mac_address(slave_dev, &addr); slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | - IFF_SLAVE_INACTIVE | IFF_BONDING); + IFF_SLAVE_INACTIVE | IFF_BONDING | + IFF_SLAVE_NEEDARP); kfree(slave); @@ -2298,6 +2312,25 @@ static int bond_has_ip(struct bonding *bond) return 0; } +static int bond_has_this_ip(struct bonding *bond, u32 ip) +{ + struct vlan_entry *vlan, *vlan_next; + + if (ip == bond->master_ip) + return 1; + + if (list_empty(&bond->vlan_list)) + return 0; + + list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, + vlan_list) { + if (ip == vlan->vlan_ip) + return 1; + } + + return 0; +} + /* * We go to the (large) trouble of VLAN tagging ARP frames because * switches in VLAN mode (especially if ports are configured as @@ -2436,6 +2469,93 @@ static void bond_send_gratuitous_arp(struct bonding *bond) } } +static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip) +{ + int i; + u32 *targets = bond->params.arp_targets; + + targets = bond->params.arp_targets; + for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { + dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] " + "%u.%u.%u.%u bhti(tip) %d\n", + NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]), + bond_has_this_ip(bond, tip)); + if (sip == targets[i]) { + if (bond_has_this_ip(bond, tip)) + slave->last_arp_rx = jiffies; + return; + } + } +} + +static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +{ + struct arphdr *arp; + struct slave *slave; + struct bonding *bond; + unsigned char *arp_ptr; + u32 sip, tip; + + if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) + goto out; + + bond = dev->priv; + read_lock(&bond->lock); + + dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", + bond->dev->name, skb->dev ? skb->dev->name : "NULL", + orig_dev ? orig_dev->name : "NULL"); + + slave = bond_get_slave_by_dev(bond, orig_dev); + if (!slave || !slave_do_arp_validate(bond, slave)) + goto out_unlock; + + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + if (!pskb_may_pull(skb, (sizeof(struct arphdr) + + (2 * dev->addr_len) + + (2 * sizeof(u32))))) + goto out_unlock; + + arp = skb->nh.arph; + if (arp->ar_hln != dev->addr_len || + skb->pkt_type == PACKET_OTHERHOST || + skb->pkt_type == PACKET_LOOPBACK || + arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4) + goto out_unlock; + + arp_ptr = (unsigned char *)(arp + 1); + arp_ptr += dev->addr_len; + memcpy(&sip, arp_ptr, 4); + arp_ptr += 4 + dev->addr_len; + memcpy(&tip, arp_ptr, 4); + + dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u" + " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name, + slave->state, bond->params.arp_validate, + slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip)); + + /* + * Backup slaves won't see the ARP reply, but do come through + * here for each ARP probe (so we swap the sip/tip to validate + * the probe). In a "redundant switch, common router" type of + * configuration, the ARP probe will (hopefully) travel from + * the active, through one switch, the router, then the other + * switch before reaching the backup. + */ + if (slave->state == BOND_STATE_ACTIVE) + bond_validate_arp(bond, slave, sip, tip); + else + bond_validate_arp(bond, slave, tip, sip); + +out_unlock: + read_unlock(&bond->lock); +out: + dev_kfree_skb(skb); + return NET_RX_SUCCESS; +} + /* * this function is called regularly to monitor each slave's link * ensuring that traffic is being sent and received when arp monitoring @@ -2600,7 +2720,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) */ bond_for_each_slave(bond, slave, i) { if (slave->link != BOND_LINK_UP) { - if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) { + if ((jiffies - slave_last_rx(bond, slave)) <= + delta_in_ticks) { slave->link = BOND_LINK_UP; @@ -2645,7 +2766,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) if ((slave != bond->curr_active_slave) && (!bond->current_arp_slave) && - (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && + (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) && bond_has_ip(bond))) { /* a backup slave has gone down; three times * the delta allows the current slave to be @@ -2692,7 +2813,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) * if it is up and needs to take over as the curr_active_slave */ if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || - (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && + (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) && bond_has_ip(bond))) && ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { @@ -3315,6 +3436,21 @@ static void bond_unregister_lacpdu(struct bonding *bond) dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); } +void bond_register_arp(struct bonding *bond) +{ + struct packet_type *pt = &bond->arp_mon_pt; + + pt->type = htons(ETH_P_ARP); + pt->dev = NULL; /*bond->dev;XXX*/ + pt->func = bond_arp_rcv; + dev_add_pack(pt); +} + +void bond_unregister_arp(struct bonding *bond) +{ + dev_remove_pack(&bond->arp_mon_pt); +} + /*---------------------------- Hashing Policies -----------------------------*/ /* @@ -3401,6 +3537,9 @@ static int bond_open(struct net_device *bond_dev) } else { arp_timer->function = (void *)&bond_loadbalance_arp_mon; } + if (bond->params.arp_validate) + bond_register_arp(bond); + add_timer(arp_timer); } @@ -3428,6 +3567,9 @@ static int bond_close(struct net_device *bond_dev) bond_unregister_lacpdu(bond); } + if (bond->params.arp_validate) + bond_unregister_arp(bond); + write_lock_bh(&bond->lock); @@ -4281,6 +4423,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { + int arp_validate_value; + /* * Convert string parameters. */ @@ -4484,6 +4628,29 @@ static int bond_check_params(struct bond_params *params) arp_interval = 0; } + if (arp_validate) { + if (bond_mode != BOND_MODE_ACTIVEBACKUP) { + printk(KERN_ERR DRV_NAME + ": arp_validate only supported in active-backup mode\n"); + return -EINVAL; + } + if (!arp_interval) { + printk(KERN_ERR DRV_NAME + ": arp_validate requires arp_interval\n"); + return -EINVAL; + } + + arp_validate_value = bond_parse_parm(arp_validate, + arp_validate_tbl); + if (arp_validate_value == -1) { + printk(KERN_ERR DRV_NAME + ": Error: invalid arp_validate \"%s\"\n", + arp_validate == NULL ? "NULL" : arp_validate); + return -EINVAL; + } + } else + arp_validate_value = 0; + if (miimon) { printk(KERN_INFO DRV_NAME ": MII link monitoring set to %d ms\n", @@ -4492,8 +4659,10 @@ static int bond_check_params(struct bond_params *params) int i; printk(KERN_INFO DRV_NAME - ": ARP monitoring set to %d ms with %d target(s):", - arp_interval, arp_ip_count); + ": ARP monitoring set to %d ms, validate %s, with %d target(s):", + arp_interval, + arp_validate_tbl[arp_validate_value].modename, + arp_ip_count); for (i = 0; i < arp_ip_count; i++) printk (" %s", arp_ip_target[i]); @@ -4527,6 +4696,7 @@ static int bond_check_params(struct bond_params *params) params->xmit_policy = xmit_hashtype; params->miimon = miimon; params->arp_interval = arp_interval; + params->arp_validate = arp_validate_value; params->updelay = updelay; params->downdelay = downdelay; params->use_carrier = use_carrier; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 15b6a29bb4d4..ced9ed8f995a 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -51,6 +51,7 @@ extern struct bond_params bonding_defaults; extern struct bond_parm_tbl bond_mode_tbl[]; extern struct bond_parm_tbl bond_lacp_tbl[]; extern struct bond_parm_tbl xmit_hashtype_tbl[]; +extern struct bond_parm_tbl arp_validate_tbl[]; static int expected_refcount = -1; static struct class *netdev_class; @@ -502,6 +503,53 @@ out: } static CLASS_DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, bonding_show_xmit_hash, bonding_store_xmit_hash); +/* + * Show and set arp_validate. + */ +static ssize_t bonding_show_arp_validate(struct class_device *cd, char *buf) +{ + struct bonding *bond = to_bond(cd); + + return sprintf(buf, "%s %d\n", + arp_validate_tbl[bond->params.arp_validate].modename, + bond->params.arp_validate) + 1; +} + +static ssize_t bonding_store_arp_validate(struct class_device *cd, const char *buf, size_t count) +{ + int new_value; + struct bonding *bond = to_bond(cd); + + new_value = bond_parse_parm((char *)buf, arp_validate_tbl); + if (new_value < 0) { + printk(KERN_ERR DRV_NAME + ": %s: Ignoring invalid arp_validate value %s\n", + bond->dev->name, buf); + return -EINVAL; + } + if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) { + printk(KERN_ERR DRV_NAME + ": %s: arp_validate only supported in active-backup mode.\n", + bond->dev->name); + return -EINVAL; + } + printk(KERN_INFO DRV_NAME ": %s: setting arp_validate to %s (%d).\n", + bond->dev->name, arp_validate_tbl[new_value].modename, + new_value); + + if (!bond->params.arp_validate && new_value) { + bond_register_arp(bond); + } else if (bond->params.arp_validate && !new_value) { + bond_unregister_arp(bond); + } + + bond->params.arp_validate = new_value; + + return count; +} + +static CLASS_DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate); + /* * Show and set the arp timer interval. There are two tricky bits * here. First, if ARP monitoring is activated, then we must disable @@ -914,6 +962,11 @@ static ssize_t bonding_store_miimon(struct class_device *cd, const char *buf, si "ARP monitoring. Disabling ARP monitoring...\n", bond->dev->name); bond->params.arp_interval = 0; + if (bond->params.arp_validate) { + bond_unregister_arp(bond); + bond->params.arp_validate = + BOND_ARP_VALIDATE_NONE; + } /* Kill ARP timer, else it brings bond's link down */ if (bond->mii_timer.function) { printk(KERN_INFO DRV_NAME @@ -1273,6 +1326,7 @@ static CLASS_DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, N static struct attribute *per_bond_attrs[] = { &class_device_attr_slaves.attr, &class_device_attr_mode.attr, + &class_device_attr_arp_validate.attr, &class_device_attr_arp_interval.attr, &class_device_attr_arp_ip_target.attr, &class_device_attr_downdelay.attr, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 17caafe58247..db16fee40a5f 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -22,8 +22,8 @@ #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.0.3" -#define DRV_RELDATE "March 23, 2006" +#define DRV_VERSION "3.1.0-test" +#define DRV_RELDATE "September 9, 2006" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -126,6 +126,7 @@ struct bond_params { int xmit_policy; int miimon; int arp_interval; + int arp_validate; int use_carrier; int updelay; int downdelay; @@ -151,6 +152,7 @@ struct slave { struct slave *prev; int delay; u32 jiffies; + u32 last_arp_rx; s8 link; /* one of BOND_LINK_XXXX */ s8 state; /* one of BOND_STATE_XXXX */ u32 original_flags; @@ -198,6 +200,7 @@ struct bonding { struct bond_params params; struct list_head vlan_list; struct vlan_group *vlgrp; + struct packet_type arp_mon_pt; }; /** @@ -228,6 +231,25 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) return (struct bonding *)slave->dev->master->priv; } +#define BOND_ARP_VALIDATE_NONE 0 +#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE) +#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP) +#define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \ + BOND_ARP_VALIDATE_BACKUP) + +extern inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave) +{ + return bond->params.arp_validate & (1 << slave->state); +} + +extern inline u32 slave_last_rx(struct bonding *bond, struct slave *slave) +{ + if (slave_do_arp_validate(bond, slave)) + return slave->last_arp_rx; + + return slave->dev->last_rx; +} + static inline void bond_set_slave_inactive_flags(struct slave *slave) { struct bonding *bond = slave->dev->master->priv; @@ -235,12 +257,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave) bond->params.mode != BOND_MODE_ALB) slave->state = BOND_STATE_BACKUP; slave->dev->priv_flags |= IFF_SLAVE_INACTIVE; + if (slave_do_arp_validate(bond, slave)) + slave->dev->priv_flags |= IFF_SLAVE_NEEDARP; } static inline void bond_set_slave_active_flags(struct slave *slave) { slave->state = BOND_STATE_ACTIVE; - slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE; + slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP); } static inline void bond_set_master_3ad_flags(struct bonding *bond) @@ -284,6 +308,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl); const char *bond_mode_name(int mode); void bond_select_active_slave(struct bonding *bond); void bond_change_active_slave(struct bonding *bond, struct slave *new_active); +void bond_register_arp(struct bonding *); +void bond_unregister_arp(struct bonding *); #endif /* _LINUX_BONDING_H */ diff --git a/include/linux/if.h b/include/linux/if.h index a023ec1274fe..8018c2e22c0c 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -60,6 +60,7 @@ #define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ #define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ #define IFF_BONDING 0x20 /* bonding master or slave */ +#define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43289127b458..afd80eff2725 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1016,7 +1016,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) } /* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast. + * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and + * ARP on active-backup slaves with arp_validate enabled. */ static inline int skb_bond_should_drop(struct sk_buff *skb) { @@ -1025,6 +1026,10 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) if (master && (dev->priv_flags & IFF_SLAVE_INACTIVE)) { + if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && + skb->protocol == __constant_htons(ETH_P_ARP)) + return 0; + if (master->priv_flags & IFF_MASTER_ALB) { if (skb->pkt_type != PACKET_BROADCAST && skb->pkt_type != PACKET_MULTICAST) -- cgit v1.2.3-71-gd317 From ddd5d35a8f7c1924049e8b6877b3177c1787e6a3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 10 Aug 2006 01:18:18 -0400 Subject: class_device_create(): make fmt argument 'const char *' Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 3 ++- include/linux/device.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/class.c b/drivers/base/class.c index 46336f1b93b6..75057aaab809 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -679,7 +679,8 @@ int class_device_register(struct class_device *class_dev) struct class_device *class_device_create(struct class *cls, struct class_device *parent, dev_t devt, - struct device *device, char *fmt, ...) + struct device *device, + const char *fmt, ...) { va_list args; struct class_device *class_dev = NULL; diff --git a/include/linux/device.h b/include/linux/device.h index 1e5f30da98bc..1fec28546525 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -277,7 +277,7 @@ extern struct class_device *class_device_create(struct class *cls, struct class_device *parent, dev_t devt, struct device *device, - char *fmt, ...) + const char *fmt, ...) __attribute__((format(printf,5,6))); extern void class_device_destroy(struct class *cls, dev_t devt); -- cgit v1.2.3-71-gd317 From 5cbe5f8a5897470698222ac9924429056e57d84c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Aug 2006 01:19:19 -0400 Subject: device_create(): make fmt argument 'const char *' Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- include/linux/device.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 04d089fd4f76..5d4b7e04471e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -583,7 +583,7 @@ static void device_create_release(struct device *dev) * been created with a call to class_create(). */ struct device *device_create(struct class *class, struct device *parent, - dev_t devt, char *fmt, ...) + dev_t devt, const char *fmt, ...) { va_list args; struct device *dev = NULL; diff --git a/include/linux/device.h b/include/linux/device.h index 1fec28546525..8d92013f9ce1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -384,7 +384,7 @@ extern void device_reprobe(struct device *dev); * Easy functions for dynamically creating devices on the fly */ extern struct device *device_create(struct class *cls, struct device *parent, - dev_t devt, char *fmt, ...) + dev_t devt, const char *fmt, ...) __attribute__((format(printf,4,5))); extern void device_destroy(struct class *cls, dev_t devt); -- cgit v1.2.3-71-gd317 From ab7d7371acc68fa9130b079a9ba879191202035f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Sandonis Date: Wed, 13 Sep 2006 15:34:05 +0200 Subject: Driver core: add const to class_create Adds const to class_create second parameter, because: struct class { const char * name; /*...*/ } Signed-off-by: Miguel Ojeda Sandonis Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 2 +- include/linux/device.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/class.c b/drivers/base/class.c index 75057aaab809..e078bc21d52e 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -197,7 +197,7 @@ static int class_device_create_uevent(struct class_device *class_dev, * Note, the pointer created here is to be destroyed when finished by * making a call to class_destroy(). */ -struct class *class_create(struct module *owner, char *name) +struct class *class_create(struct module *owner, const char *name) { struct class *cls; int retval; diff --git a/include/linux/device.h b/include/linux/device.h index 8d92013f9ce1..8a648cd94fa9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -271,7 +271,7 @@ struct class_interface { extern int class_interface_register(struct class_interface *); extern void class_interface_unregister(struct class_interface *); -extern struct class *class_create(struct module *owner, char *name); +extern struct class *class_create(struct module *owner, const char *name); extern void class_destroy(struct class *cls); extern struct class_device *class_device_create(struct class *cls, struct class_device *parent, -- cgit v1.2.3-71-gd317 From 7c8265f51073bc8632a99de78d5fd19117ed78b7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Jun 2006 14:50:29 -0700 Subject: Suspend infrastructure cleanup and extension Allow devices to participate in the suspend process more intimately, in particular, allow the final phase (with interrupts disabled) to also be open to normal devices, not just system devices. Also, allow classes to participate in device suspend. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/resume.c | 28 ++++++++-- drivers/base/power/suspend.c | 122 ++++++++++++++++++++++++++++++++----------- include/linux/device.h | 11 +++- include/linux/pm.h | 1 + kernel/power/main.c | 4 ++ 5 files changed, 130 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c index 826093ef4c7e..48e3d49d7b65 100644 --- a/drivers/base/power/resume.c +++ b/drivers/base/power/resume.c @@ -38,13 +38,35 @@ int resume_device(struct device * dev) dev_dbg(dev,"resuming\n"); error = dev->bus->resume(dev); } + if (dev->class && dev->class->resume) { + dev_dbg(dev,"class resume\n"); + error = dev->class->resume(dev); + } up(&dev->sem); TRACE_RESUME(error); return error; } +static int resume_device_early(struct device * dev) +{ + int error = 0; + TRACE_DEVICE(dev); + TRACE_RESUME(0); + if (dev->bus && dev->bus->resume_early) { + dev_dbg(dev,"EARLY resume\n"); + error = dev->bus->resume_early(dev); + } + TRACE_RESUME(error); + return error; +} + +/* + * Resume the devices that have either not gone through + * the late suspend, or that did go through it but also + * went through the early resume + */ void dpm_resume(void) { down(&dpm_list_sem); @@ -99,10 +121,8 @@ void dpm_power_up(void) struct list_head * entry = dpm_off_irq.next; struct device * dev = to_device(entry); - get_device(dev); - list_move_tail(entry, &dpm_active); - resume_device(dev); - put_device(dev); + list_move_tail(entry, &dpm_off); + resume_device_early(dev); } } diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index 69509e02f703..10e8032aee1a 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -65,7 +65,19 @@ int suspend_device(struct device * dev, pm_message_t state) dev->power.prev_state = dev->power.power_state; - if (dev->bus && dev->bus->suspend && !dev->power.power_state.event) { + if (dev->class && dev->class->suspend && !dev->power.power_state.event) { + dev_dbg(dev, "class %s%s\n", + suspend_verb(state.event), + ((state.event == PM_EVENT_SUSPEND) + && device_may_wakeup(dev)) + ? ", may wakeup" + : "" + ); + error = dev->class->suspend(dev, state); + suspend_report_result(dev->class->suspend, error); + } + + if (!error && dev->bus && dev->bus->suspend && !dev->power.power_state.event) { dev_dbg(dev, "%s%s\n", suspend_verb(state.event), ((state.event == PM_EVENT_SUSPEND) @@ -81,15 +93,74 @@ int suspend_device(struct device * dev, pm_message_t state) } +/* + * This is called with interrupts off, only a single CPU + * running. We can't do down() on a semaphore (and we don't + * need the protection) + */ +static int suspend_device_late(struct device *dev, pm_message_t state) +{ + int error = 0; + + if (dev->power.power_state.event) { + dev_dbg(dev, "PM: suspend_late %d-->%d\n", + dev->power.power_state.event, state.event); + } + + if (dev->bus && dev->bus->suspend_late && !dev->power.power_state.event) { + dev_dbg(dev, "LATE %s%s\n", + suspend_verb(state.event), + ((state.event == PM_EVENT_SUSPEND) + && device_may_wakeup(dev)) + ? ", may wakeup" + : "" + ); + error = dev->bus->suspend_late(dev, state); + suspend_report_result(dev->bus->suspend_late, error); + } + return error; +} + +/** + * device_prepare_suspend - save state and prepare to suspend + * + * NOTE! Devices cannot detach at this point - not only do we + * hold the device list semaphores over the whole prepare, but + * the whole point is to do non-invasive preparatory work, not + * the actual suspend. + */ +int device_prepare_suspend(pm_message_t state) +{ + int error = 0; + struct device * dev; + + down(&dpm_sem); + down(&dpm_list_sem); + list_for_each_entry_reverse(dev, &dpm_active, power.entry) { + if (!dev->bus || !dev->bus->suspend_prepare) + continue; + error = dev->bus->suspend_prepare(dev, state); + if (error) + break; + } + up(&dpm_list_sem); + up(&dpm_sem); + return error; +} + /** * device_suspend - Save state and stop all devices in system. * @state: Power state to put each device in. * * Walk the dpm_active list, call ->suspend() for each device, and move - * it to dpm_off. - * Check the return value for each. If it returns 0, then we move the - * the device to the dpm_off list. If it returns -EAGAIN, we move it to - * the dpm_off_irq list. If we get a different error, try and back out. + * it to the dpm_off list. + * + * (For historical reasons, if it returns -EAGAIN, that used to mean + * that the device would be called again with interrupts disabled. + * These days, we use the "suspend_late()" callback for that, so we + * print a warning and consider it an error). + * + * If we get a different error, try and back out. * * If we hit a failure with any of the devices, call device_resume() * above to bring the suspended devices back to life. @@ -115,39 +186,27 @@ int device_suspend(pm_message_t state) /* Check if the device got removed */ if (!list_empty(&dev->power.entry)) { - /* Move it to the dpm_off or dpm_off_irq list */ + /* Move it to the dpm_off list */ if (!error) list_move(&dev->power.entry, &dpm_off); - else if (error == -EAGAIN) { - list_move(&dev->power.entry, &dpm_off_irq); - error = 0; - } } if (error) printk(KERN_ERR "Could not suspend device %s: " - "error %d\n", kobject_name(&dev->kobj), error); + "error %d%s\n", + kobject_name(&dev->kobj), error, + error == -EAGAIN ? " (please convert to suspend_late)" : ""); put_device(dev); } up(&dpm_list_sem); - if (error) { - /* we failed... before resuming, bring back devices from - * dpm_off_irq list back to main dpm_off list, we do want - * to call resume() on them, in case they partially suspended - * despite returning -EAGAIN - */ - while (!list_empty(&dpm_off_irq)) { - struct list_head * entry = dpm_off_irq.next; - list_move(entry, &dpm_off); - } + if (error) dpm_resume(); - } + up(&dpm_sem); return error; } EXPORT_SYMBOL_GPL(device_suspend); - /** * device_power_down - Shut down special devices. * @state: Power state to enter. @@ -162,14 +221,17 @@ int device_power_down(pm_message_t state) int error = 0; struct device * dev; - list_for_each_entry_reverse(dev, &dpm_off_irq, power.entry) { - if ((error = suspend_device(dev, state))) - break; + while (!list_empty(&dpm_off)) { + struct list_head * entry = dpm_off.prev; + + dev = to_device(entry); + error = suspend_device_late(dev, state); + if (error) + goto Error; + list_move(&dev->power.entry, &dpm_off_irq); } - if (error) - goto Error; - if ((error = sysdev_suspend(state))) - goto Error; + + error = sysdev_suspend(state); Done: return error; Error: diff --git a/include/linux/device.h b/include/linux/device.h index 8a648cd94fa9..b40be6fca6fa 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -51,8 +51,12 @@ struct bus_type { int (*probe)(struct device * dev); int (*remove)(struct device * dev); void (*shutdown)(struct device * dev); - int (*suspend)(struct device * dev, pm_message_t state); - int (*resume)(struct device * dev); + + int (*suspend_prepare)(struct device * dev, pm_message_t state); + int (*suspend)(struct device * dev, pm_message_t state); + int (*suspend_late)(struct device * dev, pm_message_t state); + int (*resume_early)(struct device * dev); + int (*resume)(struct device * dev); }; extern int bus_register(struct bus_type * bus); @@ -154,6 +158,9 @@ struct class { void (*release)(struct class_device *dev); void (*class_release)(struct class *class); + + int (*suspend)(struct device *, pm_message_t state); + int (*resume)(struct device *); }; extern int class_register(struct class *); diff --git a/include/linux/pm.h b/include/linux/pm.h index 658c1b93d5bb..096fb6f754cf 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -190,6 +190,7 @@ extern void device_resume(void); extern suspend_disk_method_t pm_disk_mode; extern int device_suspend(pm_message_t state); +extern int device_prepare_suspend(pm_message_t state); #define device_set_wakeup_enable(dev,val) \ ((dev)->power.should_wakeup = !!(val)) diff --git a/kernel/power/main.c b/kernel/power/main.c index 6d295c776794..0c3ed6ac938e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -57,6 +57,10 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; + error = device_prepare_suspend(PMSG_SUSPEND); + if (error) + return error; + pm_prepare_console(); disable_nonboot_cpus(); -- cgit v1.2.3-71-gd317 From cbd69dbbf1adfce6e048f15afc8629901ca9dae5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Jun 2006 14:50:29 -0700 Subject: Suspend changes for PCI core Changes the PCI core to use the new suspend infrastructure changes. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 41 ++++++++++++++++++++++++++++++++++++++++- include/linux/pci.h | 3 +++ 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 474e9cd0e9e4..9e7d6ceb3805 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -264,6 +264,19 @@ static int pci_device_remove(struct device * dev) return 0; } +static int pci_device_suspend_prepare(struct device * dev, pm_message_t state) +{ + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + int i = 0; + + if (drv && drv->suspend_prepare) { + i = drv->suspend_prepare(pci_dev, state); + suspend_report_result(drv->suspend_prepare, i); + } + return i; +} + static int pci_device_suspend(struct device * dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); @@ -279,6 +292,18 @@ static int pci_device_suspend(struct device * dev, pm_message_t state) return i; } +static int pci_device_suspend_late(struct device * dev, pm_message_t state) +{ + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + int i = 0; + + if (drv && drv->suspend_late) { + i = drv->suspend_late(pci_dev, state); + suspend_report_result(drv->suspend_late, i); + } + return i; +} /* * Default resume method for devices that have no driver provided resume, @@ -313,6 +338,17 @@ static int pci_device_resume(struct device * dev) return error; } +static int pci_device_resume_early(struct device * dev) +{ + int error = 0; + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + + if (drv && drv->resume_early) + error = drv->resume_early(pci_dev); + return error; +} + static void pci_device_shutdown(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); @@ -508,9 +544,12 @@ struct bus_type pci_bus_type = { .uevent = pci_uevent, .probe = pci_device_probe, .remove = pci_device_remove, + .suspend_prepare= pci_device_suspend_prepare, .suspend = pci_device_suspend, - .shutdown = pci_device_shutdown, + .suspend_late = pci_device_suspend_late, + .resume_early = pci_device_resume_early, .resume = pci_device_resume, + .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 8565b81d7fbc..4b2e629467c7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -345,7 +345,10 @@ struct pci_driver { const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */ int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ + int (*suspend_prepare) (struct pci_dev *dev, pm_message_t state); int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ + int (*suspend_late) (struct pci_dev *dev, pm_message_t state); + int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */ void (*shutdown) (struct pci_dev *dev); -- cgit v1.2.3-71-gd317 From 82bb67f2c1f9ef438c56ac24e7dca027fe7289b5 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 14 Aug 2006 23:11:04 -0700 Subject: PM: define PM_EVENT_PRETHAW This adds a new pm_message_t event type to use when preparing to restore a swsusp snapshot. Devices that have been initialized by Linux after resume (rather than left in power-up-reset state) may need to be reset; this new event type give drivers the chance to do that. The drivers that will care about this are those which understand more hardware states than just "on" and "reset", relying on hardware state during resume() methods to be either the state left by the preceding suspend(), or a power-lost reset. The best current example of this class of drivers are USB host controller drivers, which currently do not work through swsusp when they're statically linked. When the swsusp freeze/thaw mechanism kicks in, a troublesome third state could exist: one state set up by a different kernel instance, before a snapshot image is resumed. This mechanism lets drivers prevent that state. Signed-off-by: David Brownell Cc: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 62 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 096fb6f754cf..6b27e07aef19 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -142,29 +142,61 @@ typedef struct pm_message { } pm_message_t; /* - * There are 4 important states driver can be in: - * ON -- driver is working - * FREEZE -- stop operations and apply whatever policy is applicable to a - * suspended driver of that class, freeze queues for block like IDE - * does, drop packets for ethernet, etc... stop DMA engine too etc... - * so a consistent image can be saved; but do not power any hardware - * down. - * SUSPEND - like FREEZE, but hardware is doing as much powersaving as - * possible. Roughly pci D3. + * Several driver power state transitions are externally visible, affecting + * the state of pending I/O queues and (for drivers that touch hardware) + * interrupts, wakeups, DMA, and other hardware state. There may also be + * internal transitions to various low power modes, which are transparent + * to the rest of the driver stack (such as a driver that's ON gating off + * clocks which are not in active use). * - * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 - * (SUSPEND). We'll need to fix the drivers. So yes, putting 3 to all different - * defines is intentional, and will go away as soon as drivers are fixed. Also - * note that typedef is neccessary, we'll probably want to switch to - * typedef struct pm_message_t { int event; int flags; } pm_message_t - * or something similar soon. + * One transition is triggered by resume(), after a suspend() call; the + * message is implicit: + * + * ON Driver starts working again, responding to hardware events + * and software requests. The hardware may have gone through + * a power-off reset, or it may have maintained state from the + * previous suspend() which the driver will rely on while + * resuming. On most platforms, there are no restrictions on + * availability of resources like clocks during resume(). + * + * Other transitions are triggered by messages sent using suspend(). All + * these transitions quiesce the driver, so that I/O queues are inactive. + * That commonly entails turning off IRQs and DMA; there may be rules + * about how to quiesce that are specific to the bus or the device's type. + * (For example, network drivers mark the link state.) Other details may + * differ according to the message: + * + * SUSPEND Quiesce, enter a low power device state appropriate for + * the upcoming system state (such as PCI_D3hot), and enable + * wakeup events as appropriate. + * + * FREEZE Quiesce operations so that a consistent image can be saved; + * but do NOT otherwise enter a low power device state, and do + * NOT emit system wakeup events. + * + * PRETHAW Quiesce as if for FREEZE; additionally, prepare for restoring + * the system from a snapshot taken after an earlier FREEZE. + * Some drivers will need to reset their hardware state instead + * of preserving it, to ensure that it's never mistaken for the + * state which that earlier snapshot had set up. + * + * A minimally power-aware driver treats all messages as SUSPEND, fully + * reinitializes its device during resume() -- whether or not it was reset + * during the suspend/resume cycle -- and can't issue wakeup events. + * + * More power-aware drivers may also use low power states at runtime as + * well as during system sleep states like PM_SUSPEND_STANDBY. They may + * be able to use wakeup events to exit from runtime low-power states, + * or from system low-power states such as standby or suspend-to-RAM. */ #define PM_EVENT_ON 0 #define PM_EVENT_FREEZE 1 #define PM_EVENT_SUSPEND 2 +#define PM_EVENT_PRETHAW 3 #define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, }) #define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) #define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) -- cgit v1.2.3-71-gd317 From 1d3a82af45428c5e8deaa119cdeb79611ae46371 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 30 Aug 2006 14:09:47 -0700 Subject: PM: no suspend_prepare() phase Remove the new suspend_prepare() phase. It doesn't seem very usable, has never been tested, doesn't address fault cleanup, and would need a sibling resume_complete(); plus there are no real use cases. It could be restored later if those issues get resolved. Signed-off-by: David Brownell Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/suspend.c | 27 --------------------------- drivers/pci/pci-driver.c | 14 -------------- include/linux/device.h | 1 - include/linux/pci.h | 1 - kernel/power/main.c | 4 ---- 5 files changed, 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index e86db83746ac..6453c25103d2 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -117,33 +117,6 @@ static int suspend_device_late(struct device *dev, pm_message_t state) return error; } -/** - * device_prepare_suspend - save state and prepare to suspend - * - * NOTE! Devices cannot detach at this point - not only do we - * hold the device list semaphores over the whole prepare, but - * the whole point is to do non-invasive preparatory work, not - * the actual suspend. - */ -int device_prepare_suspend(pm_message_t state) -{ - int error = 0; - struct device * dev; - - down(&dpm_sem); - down(&dpm_list_sem); - list_for_each_entry_reverse(dev, &dpm_active, power.entry) { - if (!dev->bus || !dev->bus->suspend_prepare) - continue; - error = dev->bus->suspend_prepare(dev, state); - if (error) - break; - } - up(&dpm_list_sem); - up(&dpm_sem); - return error; -} - /** * device_suspend - Save state and stop all devices in system. * @state: Power state to put each device in. diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 9e7d6ceb3805..8948ac9ab681 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -264,19 +264,6 @@ static int pci_device_remove(struct device * dev) return 0; } -static int pci_device_suspend_prepare(struct device * dev, pm_message_t state) -{ - struct pci_dev * pci_dev = to_pci_dev(dev); - struct pci_driver * drv = pci_dev->driver; - int i = 0; - - if (drv && drv->suspend_prepare) { - i = drv->suspend_prepare(pci_dev, state); - suspend_report_result(drv->suspend_prepare, i); - } - return i; -} - static int pci_device_suspend(struct device * dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); @@ -544,7 +531,6 @@ struct bus_type pci_bus_type = { .uevent = pci_uevent, .probe = pci_device_probe, .remove = pci_device_remove, - .suspend_prepare= pci_device_suspend_prepare, .suspend = pci_device_suspend, .suspend_late = pci_device_suspend_late, .resume_early = pci_device_resume_early, diff --git a/include/linux/device.h b/include/linux/device.h index b40be6fca6fa..b3646462d6dc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -52,7 +52,6 @@ struct bus_type { int (*remove)(struct device * dev); void (*shutdown)(struct device * dev); - int (*suspend_prepare)(struct device * dev, pm_message_t state); int (*suspend)(struct device * dev, pm_message_t state); int (*suspend_late)(struct device * dev, pm_message_t state); int (*resume_early)(struct device * dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4b2e629467c7..9514bbfe96e2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -345,7 +345,6 @@ struct pci_driver { const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */ int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ - int (*suspend_prepare) (struct pci_dev *dev, pm_message_t state); int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*suspend_late) (struct pci_dev *dev, pm_message_t state); int (*resume_early) (struct pci_dev *dev); diff --git a/kernel/power/main.c b/kernel/power/main.c index 0c3ed6ac938e..6d295c776794 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -57,10 +57,6 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; - error = device_prepare_suspend(PMSG_SUSPEND); - if (error) - return error; - pm_prepare_console(); disable_nonboot_cpus(); -- cgit v1.2.3-71-gd317 From 386415d88b1ae50304f9c61aa3e0db082fa90428 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 3 Sep 2006 13:16:45 -0700 Subject: PM: platform_bus and late_suspend/early_resume Teach platform_bus about the new suspend_late/resume_early PM calls, issued with IRQs off. Do we really need sysdev and friends any more, or can janitors start switching its users over to platform_device so we can do a minor code-ectomy? Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 30 ++++++++++++++++++++++++++++-- include/linux/platform_device.h | 2 ++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 2b8755db76c6..940ce41f1887 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -505,12 +505,36 @@ static int platform_match(struct device * dev, struct device_driver * drv) return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0); } -static int platform_suspend(struct device * dev, pm_message_t state) +static int platform_suspend(struct device *dev, pm_message_t mesg) { int ret = 0; if (dev->driver && dev->driver->suspend) - ret = dev->driver->suspend(dev, state); + ret = dev->driver->suspend(dev, mesg); + + return ret; +} + +static int platform_suspend_late(struct device *dev, pm_message_t mesg) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + int ret = 0; + + if (dev->driver && drv->suspend_late) + ret = drv->suspend_late(pdev, mesg); + + return ret; +} + +static int platform_resume_early(struct device *dev) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + int ret = 0; + + if (dev->driver && drv->resume_early) + ret = drv->resume_early(pdev); return ret; } @@ -531,6 +555,8 @@ struct bus_type platform_bus_type = { .match = platform_match, .uevent = platform_uevent, .suspend = platform_suspend, + .suspend_late = platform_suspend_late, + .resume_early = platform_resume_early, .resume = platform_resume, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 782090c68932..29cd6dee13db 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -49,6 +49,8 @@ struct platform_driver { int (*remove)(struct platform_device *); void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); + int (*suspend_late)(struct platform_device *, pm_message_t state); + int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); struct device_driver driver; }; -- cgit v1.2.3-71-gd317 From de0ff00d723fd821d372496e2c084805644aa5e1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Jun 2006 00:06:09 -0700 Subject: Driver core: add groups support to struct device This is needed for the network class devices in order to be able to convert over to use struct device. Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/device.h | 1 + 2 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 5d4b7e04471e..641c0c42bb48 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -197,6 +197,35 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, return count; } +static int device_add_groups(struct device *dev) +{ + int i; + int error = 0; + + if (dev->groups) { + for (i = 0; dev->groups[i]; i++) { + error = sysfs_create_group(&dev->kobj, dev->groups[i]); + if (error) { + while (--i >= 0) + sysfs_remove_group(&dev->kobj, dev->groups[i]); + goto out; + } + } + } +out: + return error; +} + +static void device_remove_groups(struct device *dev) +{ + int i; + if (dev->groups) { + for (i = 0; dev->groups[i]; i++) { + sysfs_remove_group(&dev->kobj, dev->groups[i]); + } + } +} + static ssize_t show_dev(struct device *dev, struct device_attribute *attr, char *buf) { @@ -350,6 +379,8 @@ int device_add(struct device *dev) sysfs_create_link(&dev->parent->kobj, &dev->kobj, class_name); } + if ((error = device_add_groups(dev))) + goto GroupError; if ((error = device_pm_add(dev))) goto PMError; if ((error = bus_add_device(dev))) @@ -376,6 +407,8 @@ int device_add(struct device *dev) BusError: device_pm_remove(dev); PMError: + device_remove_groups(dev); + GroupError: if (dev->devt_attr) { device_remove_file(dev, dev->devt_attr); kfree(dev->devt_attr); @@ -470,6 +503,7 @@ void device_del(struct device * dev) up(&dev->class->sem); } device_remove_file(dev, &dev->uevent_attr); + device_remove_groups(dev); /* Notify the platform of the removal, in case they * need to do anything... diff --git a/include/linux/device.h b/include/linux/device.h index b3646462d6dc..994d3ebd53f4 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -344,6 +344,7 @@ struct device { struct list_head node; struct class *class; /* optional*/ dev_t devt; /* dev_t, creates the sysfs "dev" */ + struct attribute_group **groups; /* optional groups */ void (*release)(struct device * dev); }; -- cgit v1.2.3-71-gd317 From 2620efef7029bb040430f50f0fc148f2d5e002ad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 28 Jun 2006 16:19:58 -0700 Subject: Driver core: add ability for classes to handle devices properly This adds two new callbacks to the class structure: int (*dev_uevent)(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size); void (*dev_release)(struct device *dev); And one pointer: struct device_attribute * dev_attrs; which all corrispond with the same thing as the "normal" class devices do, yet this is for when a struct device is bound to a class. Someday soon, struct class_device will go away, and then the other fields in this structure can be removed too. But this is necessary in order to get the transition to work properly. Tested out on a network core patch that converted it to use struct device instead of struct class_device. Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/device.h | 4 ++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 5c91d0d81e78..f1228f25efe0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -94,6 +94,8 @@ static void device_release(struct kobject * kobj) if (dev->release) dev->release(dev); + else if (dev->class && dev->class->dev_release) + dev->class->dev_release(dev); else { printk(KERN_ERR "Device '%s' does not have a release() function, " "it is broken and must be fixed.\n", @@ -183,6 +185,15 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, char **envp, } } + if (dev->class && dev->class->dev_uevent) { + /* have the class specific function add its stuff */ + retval = dev->class->dev_uevent(dev, envp, num_envp, buffer, buffer_size); + if (retval) { + pr_debug("%s - dev_uevent() returned %d\n", + __FUNCTION__, retval); + } + } + return retval; } @@ -228,6 +239,43 @@ static void device_remove_groups(struct device *dev) } } +static int device_add_attrs(struct device *dev) +{ + struct class *class = dev->class; + int error = 0; + int i; + + if (!class) + return 0; + + if (class->dev_attrs) { + for (i = 0; attr_name(class->dev_attrs[i]); i++) { + error = device_create_file(dev, &class->dev_attrs[i]); + if (error) + break; + } + } + if (error) + while (--i >= 0) + device_remove_file(dev, &class->dev_attrs[i]); + return error; +} + +static void device_remove_attrs(struct device *dev) +{ + struct class *class = dev->class; + int i; + + if (!class) + return; + + if (class->dev_attrs) { + for (i = 0; attr_name(class->dev_attrs[i]); i++) + device_remove_file(dev, &class->dev_attrs[i]); + } +} + + static ssize_t show_dev(struct device *dev, struct device_attribute *attr, char *buf) { @@ -382,6 +430,8 @@ int device_add(struct device *dev) } } + if ((error = device_add_attrs(dev))) + goto AttrsError; if ((error = device_add_groups(dev))) goto GroupError; if ((error = device_pm_add(dev))) @@ -412,6 +462,8 @@ int device_add(struct device *dev) PMError: device_remove_groups(dev); GroupError: + device_remove_attrs(dev); + AttrsError: if (dev->devt_attr) { device_remove_file(dev, dev->devt_attr); kfree(dev->devt_attr); @@ -509,6 +561,7 @@ void device_del(struct device * dev) } device_remove_file(dev, &dev->uevent_attr); device_remove_groups(dev); + device_remove_attrs(dev); /* Notify the platform of the removal, in case they * need to do anything... diff --git a/include/linux/device.h b/include/linux/device.h index 994d3ebd53f4..3122bd25ce42 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -151,12 +151,16 @@ struct class { struct class_attribute * class_attrs; struct class_device_attribute * class_dev_attrs; + struct device_attribute * dev_attrs; int (*uevent)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size); + int (*dev_uevent)(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size); void (*release)(struct class_device *dev); void (*class_release)(struct class *class); + void (*dev_release)(struct device *dev); int (*suspend)(struct device *, pm_message_t state); int (*resume)(struct device *); -- cgit v1.2.3-71-gd317 From a2de48cace5d0993da6cfa28b276ae724dc3569b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 Jul 2006 14:31:12 -0700 Subject: Driver core: add device_rename function The network layer needs this to convert to using struct device instead of a struct class_device. Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/device.h | 1 + 2 files changed, 56 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index f1228f25efe0..0db47561331e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -736,3 +736,58 @@ void device_destroy(struct class *class, dev_t devt) device_unregister(dev); } EXPORT_SYMBOL_GPL(device_destroy); + +/** + * device_rename - renames a device + * @dev: the pointer to the struct device to be renamed + * @new_name: the new name of the device + */ +int device_rename(struct device *dev, char *new_name) +{ + char *old_class_name = NULL; + char *new_class_name = NULL; + char *old_symlink_name = NULL; + int error; + + dev = get_device(dev); + if (!dev) + return -EINVAL; + + pr_debug("DEVICE: renaming '%s' to '%s'\n", dev->bus_id, new_name); + + if ((dev->class) && (dev->parent)) + old_class_name = make_class_name(dev->class->name, &dev->kobj); + + if (dev->class) { + old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); + if (!old_symlink_name) + return -ENOMEM; + strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE); + } + + strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); + + error = kobject_rename(&dev->kobj, new_name); + + if (old_class_name) { + new_class_name = make_class_name(dev->class->name, &dev->kobj); + if (new_class_name) { + sysfs_create_link(&dev->parent->kobj, &dev->kobj, + new_class_name); + sysfs_remove_link(&dev->parent->kobj, old_class_name); + } + } + if (dev->class) { + sysfs_remove_link(&dev->class->subsys.kset.kobj, + old_symlink_name); + sysfs_create_link(&dev->class->subsys.kset.kobj, &dev->kobj, + dev->bus_id); + } + put_device(dev); + + kfree(old_class_name); + kfree(new_class_name); + kfree(old_symlink_name); + + return error; +} diff --git a/include/linux/device.h b/include/linux/device.h index 3122bd25ce42..3400e09bf458 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -380,6 +380,7 @@ extern int device_add(struct device * dev); extern void device_del(struct device * dev); extern int device_for_each_child(struct device *, void *, int (*fn)(struct device *, void *)); +extern int device_rename(struct device *dev, char *new_name); /* * Manual binding of a device to driver. See drivers/base/bus.c -- cgit v1.2.3-71-gd317 From c205ef4880273d2de4ee5388d4e52227ff688cc4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 7 Aug 2006 22:19:37 -0700 Subject: Driver core: create devices/virtual/ tree This change creates a devices/virtual/CLASS_NAME tree for struct devices that belong to a class, yet do not have a "real" struct device for a parent. It automatically creates the directories on the fly as needed. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 17 +++++++++++++++++ drivers/base/core.c | 7 +++++++ include/linux/device.h | 5 ++++- 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/class.c b/drivers/base/class.c index e078bc21d52e..cbdf47c0c60d 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -19,6 +19,8 @@ #include #include "base.h" +extern struct subsystem devices_subsys; + #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr) #define to_class(obj) container_of(obj, struct class, subsys.kset.kobj) @@ -878,7 +880,22 @@ void class_interface_unregister(struct class_interface *class_intf) class_put(parent); } +int virtual_device_parent(struct device *dev) +{ + if (!dev->class) + return -ENODEV; + + if (!dev->class->virtual_dir) { + static struct kobject *virtual_dir = NULL; + if (!virtual_dir) + virtual_dir = kobject_add_dir(&devices_subsys.kset.kobj, "virtual"); + dev->class->virtual_dir = kobject_add_dir(virtual_dir, dev->class->name); + } + + dev->kobj.parent = dev->class->virtual_dir; + return 0; +} int __init classes_init(void) { diff --git a/drivers/base/core.c b/drivers/base/core.c index 0db47561331e..e21a65fc043e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -378,6 +378,13 @@ int device_add(struct device *dev) if (!dev || !strlen(dev->bus_id)) goto Error; + /* if this is a class device, and has no parent, create one */ + if ((dev->class) && (dev->parent == NULL)) { + error = virtual_device_parent(dev); + if (error) + goto Error; + } + parent = get_device(dev->parent); pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id); diff --git a/include/linux/device.h b/include/linux/device.h index 3400e09bf458..bbb0d6b5d232 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -149,6 +149,8 @@ struct class { struct list_head interfaces; struct semaphore sem; /* locks both the children and interfaces lists */ + struct kobject *virtual_dir; + struct class_attribute * class_attrs; struct class_device_attribute * class_dev_attrs; struct device_attribute * dev_attrs; @@ -291,7 +293,6 @@ extern struct class_device *class_device_create(struct class *cls, __attribute__((format(printf,5,6))); extern void class_device_destroy(struct class *cls, dev_t devt); - /* interface for exporting device attributes */ struct device_attribute { struct attribute attr; @@ -400,6 +401,8 @@ extern struct device *device_create(struct class *cls, struct device *parent, __attribute__((format(printf,4,5))); extern void device_destroy(struct class *cls, dev_t devt); +extern int virtual_device_parent(struct device *dev); + /* * Platform "fixup" functions - allow the platform to have their say * about devices and actions that the general device layer doesn't -- cgit v1.2.3-71-gd317 From c47ed219ba81632595e9f02e27318151fec16c9e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Sep 2006 15:34:05 +0200 Subject: Class: add support for class interfaces for devices When moving class_device usage over to device, we need to handle class_interfaces properly with devices. This patch adds that support. Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 10 ++++++++++ drivers/base/core.c | 14 +++++++++++++- include/linux/device.h | 2 ++ 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/class.c b/drivers/base/class.c index cbdf47c0c60d..b06b0e2b9c62 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -842,6 +842,7 @@ int class_interface_register(struct class_interface *class_intf) { struct class *parent; struct class_device *class_dev; + struct device *dev; if (!class_intf || !class_intf->class) return -ENODEV; @@ -856,6 +857,10 @@ int class_interface_register(struct class_interface *class_intf) list_for_each_entry(class_dev, &parent->children, node) class_intf->add(class_dev, class_intf); } + if (class_intf->add_dev) { + list_for_each_entry(dev, &parent->devices, node) + class_intf->add_dev(dev, class_intf); + } up(&parent->sem); return 0; @@ -865,6 +870,7 @@ void class_interface_unregister(struct class_interface *class_intf) { struct class * parent = class_intf->class; struct class_device *class_dev; + struct device *dev; if (!parent) return; @@ -875,6 +881,10 @@ void class_interface_unregister(struct class_interface *class_intf) list_for_each_entry(class_dev, &parent->children, node) class_intf->remove(class_dev, class_intf); } + if (class_intf->remove_dev) { + list_for_each_entry(dev, &parent->devices, node) + class_intf->remove_dev(dev, class_intf); + } up(&parent->sem); class_put(parent); diff --git a/drivers/base/core.c b/drivers/base/core.c index e21a65fc043e..1d3d3582fcca 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -372,6 +372,7 @@ int device_add(struct device *dev) { struct device *parent = NULL; char *class_name = NULL; + struct class_interface *class_intf; int error = -EINVAL; dev = get_device(dev); @@ -451,9 +452,14 @@ int device_add(struct device *dev) klist_add_tail(&dev->knode_parent, &parent->klist_children); if (dev->class) { - /* tie the class to the device */ down(&dev->class->sem); + /* tie the class to the device */ list_add_tail(&dev->node, &dev->class->devices); + + /* notify any interfaces that the device is here */ + list_for_each_entry(class_intf, &dev->class->interfaces, node) + if (class_intf->add_dev) + class_intf->add_dev(dev, class_intf); up(&dev->class->sem); } @@ -548,6 +554,7 @@ void device_del(struct device * dev) { struct device * parent = dev->parent; char *class_name = NULL; + struct class_interface *class_intf; if (parent) klist_del(&dev->knode_parent); @@ -563,6 +570,11 @@ void device_del(struct device * dev) } kfree(class_name); down(&dev->class->sem); + /* notify any interfaces that the device is now gone */ + list_for_each_entry(class_intf, &dev->class->interfaces, node) + if (class_intf->remove_dev) + class_intf->remove_dev(dev, class_intf); + /* remove the device from the class list */ list_del_init(&dev->node); up(&dev->class->sem); } diff --git a/include/linux/device.h b/include/linux/device.h index bbb0d6b5d232..e0fae0e76fa9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -278,6 +278,8 @@ struct class_interface { int (*add) (struct class_device *, struct class_interface *); void (*remove) (struct class_device *, struct class_interface *); + int (*add_dev) (struct device *, struct class_interface *); + void (*remove_dev) (struct device *, struct class_interface *); }; extern int class_interface_register(struct class_interface *); -- cgit v1.2.3-71-gd317 From 2589f1887b0bf9f08ec3d7f3c5705ccb7c628076 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Sep 2006 09:39:19 -0700 Subject: Driver core: add ability for devices to create and remove bin files Makes it easier for devices to create and remove binary attribute files so they don't have to call directly into sysfs. This is needed to help with the conversion from struct class_device to struct device. Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 26 ++++++++++++++++++++++++++ include/linux/device.h | 4 ++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 1d3d3582fcca..bc9f35c81691 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -319,6 +319,32 @@ void device_remove_file(struct device * dev, struct device_attribute * attr) } } +/** + * device_create_bin_file - create sysfs binary attribute file for device. + * @dev: device. + * @attr: device binary attribute descriptor. + */ +int device_create_bin_file(struct device *dev, struct bin_attribute *attr) +{ + int error = -EINVAL; + if (dev) + error = sysfs_create_bin_file(&dev->kobj, attr); + return error; +} +EXPORT_SYMBOL_GPL(device_create_bin_file); + +/** + * device_remove_bin_file - remove sysfs binary attribute file + * @dev: device. + * @attr: device binary attribute descriptor. + */ +void device_remove_bin_file(struct device *dev, struct bin_attribute *attr) +{ + if (dev) + sysfs_remove_bin_file(&dev->kobj, attr); +} +EXPORT_SYMBOL_GPL(device_remove_bin_file); + static void klist_children_get(struct klist_node *n) { struct device *dev = container_of(n, struct device, knode_parent); diff --git a/include/linux/device.h b/include/linux/device.h index e0fae0e76fa9..7d447d7271c5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -309,6 +309,10 @@ struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store) extern int device_create_file(struct device *device, struct device_attribute * entry); extern void device_remove_file(struct device * dev, struct device_attribute * attr); +extern int __must_check device_create_bin_file(struct device *dev, + struct bin_attribute *attr); +extern void device_remove_bin_file(struct device *dev, + struct bin_attribute *attr); struct device { struct klist klist_children; struct klist_node knode_parent; /* node in sibling list */ -- cgit v1.2.3-71-gd317 From 995982ca79d9262869513948ec7c540f32035491 Mon Sep 17 00:00:00 2001 From: "Randy.Dunlap" Date: Mon, 10 Jul 2006 23:05:25 -0700 Subject: sysfs_remove_bin_file: no return value, dump_stack on error Make sysfs_remove_bin_file() void. If it detects an error, printk the file name and call dump_stack(). sysfs_hash_and_remove() now returns an error code indicating its success or failure so that sysfs_remove_bin_file() can know success/failure. Convert the only driver that checked the return value of sysfs_remove_bin_file(). Signed-off-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/acpiphp_ibm.c | 4 +--- fs/sysfs/bin.c | 13 ++++++++----- fs/sysfs/inode.c | 11 ++++++++--- fs/sysfs/sysfs.h | 2 +- include/linux/sysfs.h | 2 +- 5 files changed, 19 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 317457dd4014..d0a07d9ab30c 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -487,9 +487,7 @@ static void __exit ibm_acpiphp_exit(void) if (ACPI_FAILURE(status)) err("%s: Notification handler removal failed\n", __FUNCTION__); /* remove the /sys entries */ - if (sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr)) - err("%s: removal of sysfs file apci_table failed\n", - __FUNCTION__); + sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr); } module_init(ibm_acpiphp_init); diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index c16a93c353c0..98022e41cda1 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -176,7 +177,6 @@ const struct file_operations bin_fops = { * sysfs_create_bin_file - create binary file for object. * @kobj: object. * @attr: attribute descriptor. - * */ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) @@ -191,13 +191,16 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) * sysfs_remove_bin_file - remove binary file for object. * @kobj: object. * @attr: attribute descriptor. - * */ -int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) +void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - sysfs_hash_and_remove(kobj->dentry,attr->attr.name); - return 0; + if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { + printk(KERN_ERR "%s: " + "bad dentry or inode or no such file: \"%s\"\n", + __FUNCTION__, attr->attr.name); + dump_stack(); + } } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 9889e54e1f13..fd7cd5f843d2 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "sysfs.h" extern struct super_block * sysfs_sb; @@ -234,17 +235,18 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) } } -void sysfs_hash_and_remove(struct dentry * dir, const char * name) +int sysfs_hash_and_remove(struct dentry * dir, const char * name) { struct sysfs_dirent * sd; struct sysfs_dirent * parent_sd; + int found = 0; if (!dir) - return; + return -ENOENT; if (dir->d_inode == NULL) /* no inode means this hasn't been made visible yet */ - return; + return -ENOENT; parent_sd = dir->d_fsdata; mutex_lock(&dir->d_inode->i_mutex); @@ -255,8 +257,11 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) list_del_init(&sd->s_sibling); sysfs_drop_dentry(sd, dir); sysfs_put(sd); + found = 1; break; } } mutex_unlock(&dir->d_inode->i_mutex); + + return found ? 0 : -ENOENT; } diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3651ffb5ec09..6f3d6bd52887 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -10,7 +10,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, umode_t, int); extern int sysfs_add_file(struct dentry *, const struct attribute *, int); -extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); +extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 1ea5d3cda6ae..95f6db54d8d2 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -114,7 +114,7 @@ extern void sysfs_remove_link(struct kobject *, const char * name); int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr); -int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr); +void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); int sysfs_create_group(struct kobject *, const struct attribute_group *); void sysfs_remove_group(struct kobject *, const struct attribute_group *); -- cgit v1.2.3-71-gd317 From 4a7fb6363f2d1a6c09a10253937f672f3c7929e1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 14 Aug 2006 22:43:17 -0700 Subject: add __must_check to device management code We're getting a lot of crashes in the sysfs/kobject/device/bus/class code and they're very hard to diagnose. I'm suspecting that in some cases this is because drivers aren't checking return values and aren't handling errors correctly. So the code blithely blunders on and crashes later in very obscure ways. There's just no reason to ignore errors which can and do occur. So the patch sprinkles __must_check all over these APIs. Causes 1,513 new warnings. Heh. Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 52 ++++++++++++++++++++++++++----------------------- include/linux/kobject.h | 16 ++++++++------- include/linux/pci.h | 34 +++++++++++++++++--------------- include/linux/sysfs.h | 19 ++++++++++-------- 4 files changed, 66 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 7d447d7271c5..ad4db72f5733 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -58,7 +59,7 @@ struct bus_type { int (*resume)(struct device * dev); }; -extern int bus_register(struct bus_type * bus); +extern int __must_check bus_register(struct bus_type * bus); extern void bus_unregister(struct bus_type * bus); extern void bus_rescan_devices(struct bus_type * bus); @@ -70,9 +71,9 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data, struct device * bus_find_device(struct bus_type *bus, struct device *start, void *data, int (*match)(struct device *, void *)); -int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, - void * data, int (*fn)(struct device_driver *, void *)); - +int __must_check bus_for_each_drv(struct bus_type *bus, + struct device_driver *start, void *data, + int (*fn)(struct device_driver *, void *)); /* driverfs interface for exporting bus attributes */ @@ -85,7 +86,8 @@ struct bus_attribute { #define BUS_ATTR(_name,_mode,_show,_store) \ struct bus_attribute bus_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int bus_create_file(struct bus_type *, struct bus_attribute *); +extern int __must_check bus_create_file(struct bus_type *, + struct bus_attribute *); extern void bus_remove_file(struct bus_type *, struct bus_attribute *); struct device_driver { @@ -107,14 +109,13 @@ struct device_driver { }; -extern int driver_register(struct device_driver * drv); +extern int __must_check driver_register(struct device_driver * drv); extern void driver_unregister(struct device_driver * drv); extern struct device_driver * get_driver(struct device_driver * drv); extern void put_driver(struct device_driver * drv); extern struct device_driver *driver_find(const char *name, struct bus_type *bus); - /* driverfs interface for exporting driver attributes */ struct driver_attribute { @@ -126,16 +127,17 @@ struct driver_attribute { #define DRIVER_ATTR(_name,_mode,_show,_store) \ struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int driver_create_file(struct device_driver *, struct driver_attribute *); +extern int __must_check driver_create_file(struct device_driver *, + struct driver_attribute *); extern void driver_remove_file(struct device_driver *, struct driver_attribute *); -extern int driver_for_each_device(struct device_driver * drv, struct device * start, - void * data, int (*fn)(struct device *, void *)); +extern int __must_check driver_for_each_device(struct device_driver * drv, + struct device *start, void *data, + int (*fn)(struct device *, void *)); struct device * driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *, void *)); - /* * device classes */ @@ -168,7 +170,7 @@ struct class { int (*resume)(struct device *); }; -extern int class_register(struct class *); +extern int __must_check class_register(struct class *); extern void class_unregister(struct class *); @@ -181,7 +183,8 @@ struct class_attribute { #define CLASS_ATTR(_name,_mode,_show,_store) \ struct class_attribute class_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int class_create_file(struct class *, const struct class_attribute *); +extern int __must_check class_create_file(struct class *, + const struct class_attribute *); extern void class_remove_file(struct class *, const struct class_attribute *); struct class_device_attribute { @@ -194,7 +197,7 @@ struct class_device_attribute { struct class_device_attribute class_device_attr_##_name = \ __ATTR(_name,_mode,_show,_store) -extern int class_device_create_file(struct class_device *, +extern int __must_check class_device_create_file(struct class_device *, const struct class_device_attribute *); /** @@ -254,10 +257,10 @@ class_set_devdata (struct class_device *dev, void *data) } -extern int class_device_register(struct class_device *); +extern int __must_check class_device_register(struct class_device *); extern void class_device_unregister(struct class_device *); extern void class_device_initialize(struct class_device *); -extern int class_device_add(struct class_device *); +extern int __must_check class_device_add(struct class_device *); extern void class_device_del(struct class_device *); extern int class_device_rename(struct class_device *, char *); @@ -267,7 +270,7 @@ extern void class_device_put(struct class_device *); extern void class_device_remove_file(struct class_device *, const struct class_device_attribute *); -extern int class_device_create_bin_file(struct class_device *, +extern int __must_check class_device_create_bin_file(struct class_device *, struct bin_attribute *); extern void class_device_remove_bin_file(struct class_device *, struct bin_attribute *); @@ -282,7 +285,7 @@ struct class_interface { void (*remove_dev) (struct device *, struct class_interface *); }; -extern int class_interface_register(struct class_interface *); +extern int __must_check class_interface_register(struct class_interface *); extern void class_interface_unregister(struct class_interface *); extern struct class *class_create(struct module *owner, const char *name); @@ -307,7 +310,8 @@ struct device_attribute { #define DEVICE_ATTR(_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int device_create_file(struct device *device, struct device_attribute * entry); +extern int __must_check device_create_file(struct device *device, + struct device_attribute * entry); extern void device_remove_file(struct device * dev, struct device_attribute * attr); extern int __must_check device_create_bin_file(struct device *dev, struct bin_attribute *attr); @@ -380,12 +384,12 @@ static inline int device_is_registered(struct device *dev) /* * High level routines for use by the bus drivers */ -extern int device_register(struct device * dev); +extern int __must_check device_register(struct device * dev); extern void device_unregister(struct device * dev); extern void device_initialize(struct device * dev); -extern int device_add(struct device * dev); +extern int __must_check device_add(struct device * dev); extern void device_del(struct device * dev); -extern int device_for_each_child(struct device *, void *, +extern int __must_check device_for_each_child(struct device *, void *, int (*fn)(struct device *, void *)); extern int device_rename(struct device *dev, char *new_name); @@ -395,7 +399,7 @@ extern int device_rename(struct device *dev, char *new_name); */ extern void device_bind_driver(struct device * dev); extern void device_release_driver(struct device * dev); -extern int device_attach(struct device * dev); +extern int __must_check device_attach(struct device * dev); extern void driver_attach(struct device_driver * drv); extern void device_reprobe(struct device *dev); @@ -433,7 +437,7 @@ extern void device_shutdown(void); /* drivers/base/firmware.c */ -extern int firmware_register(struct subsystem *); +extern int __must_check firmware_register(struct subsystem *); extern void firmware_unregister(struct subsystem *); /* debugging and troubleshooting/diagnostic helpers. */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2d229327959e..bcd9cd173c2c 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -71,12 +72,12 @@ static inline const char * kobject_name(const struct kobject * kobj) extern void kobject_init(struct kobject *); extern void kobject_cleanup(struct kobject *); -extern int kobject_add(struct kobject *); +extern int __must_check kobject_add(struct kobject *); extern void kobject_del(struct kobject *); -extern int kobject_rename(struct kobject *, const char *new_name); +extern int __must_check kobject_rename(struct kobject *, const char *new_name); -extern int kobject_register(struct kobject *); +extern int __must_check kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); extern struct kobject * kobject_get(struct kobject *); @@ -128,8 +129,8 @@ struct kset { extern void kset_init(struct kset * k); -extern int kset_add(struct kset * k); -extern int kset_register(struct kset * k); +extern int __must_check kset_add(struct kset * k); +extern int __must_check kset_register(struct kset * k); extern void kset_unregister(struct kset * k); static inline struct kset * to_kset(struct kobject * kobj) @@ -239,7 +240,7 @@ extern struct subsystem hypervisor_subsys; (obj)->subsys.kset.kobj.kset = &(_subsys).kset extern void subsystem_init(struct subsystem *); -extern int subsystem_register(struct subsystem *); +extern int __must_check subsystem_register(struct subsystem *); extern void subsystem_unregister(struct subsystem *); static inline struct subsystem * subsys_get(struct subsystem * s) @@ -258,7 +259,8 @@ struct subsys_attribute { ssize_t (*store)(struct subsystem *, const char *, size_t); }; -extern int subsys_create_file(struct subsystem * , struct subsys_attribute *); +extern int __must_check subsys_create_file(struct subsystem * , + struct subsys_attribute *); #if defined(CONFIG_HOTPLUG) void kobject_uevent(struct kobject *kobj, enum kobject_action action); diff --git a/include/linux/pci.h b/include/linux/pci.h index 9514bbfe96e2..3ec72551ac31 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -403,7 +404,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */ extern struct list_head pci_devices; /* list of all devices */ void pcibios_fixup_bus(struct pci_bus *); -int pcibios_enable_device(struct pci_dev *, int mask); +int __must_check pcibios_enable_device(struct pci_dev *, int mask); char *pcibios_setup (char *str); /* Used only when drivers/pci/setup.c is used */ @@ -490,19 +491,19 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val); } -int pci_enable_device(struct pci_dev *dev); -int pci_enable_device_bars(struct pci_dev *dev, int mask); +int __must_check pci_enable_device(struct pci_dev *dev); +int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask); void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); #define HAVE_PCI_SET_MWI -int pci_set_mwi(struct pci_dev *dev); +int __must_check pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); -int pci_assign_resource(struct pci_dev *dev, int i); -int pci_assign_resource_fixed(struct pci_dev *dev, int i); +int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i); void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ @@ -528,23 +529,24 @@ void pdev_sort_resources(struct pci_dev *, struct resource_list *); void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 -int pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); -int pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); /* drivers/pci/bus.c */ -int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, - resource_size_t size, resource_size_t align, - resource_size_t min, unsigned int type_mask, - void (*alignf)(void *, struct resource *, - resource_size_t, resource_size_t), - void *alignf_data); +int __must_check pci_bus_alloc_resource(struct pci_bus *bus, + struct resource *res, resource_size_t size, + resource_size_t align, resource_size_t min, + unsigned int type_mask, + void (*alignf)(void *, struct resource *, + resource_size_t, resource_size_t), + void *alignf_data); void pci_enable_bridges(struct pci_bus *bus); /* Proper probing supporting hot-pluggable devices */ -int __pci_register_driver(struct pci_driver *, struct module *); -static inline int pci_register_driver(struct pci_driver *driver) +int __must_check __pci_register_driver(struct pci_driver *, struct module *); +static inline int __must_check pci_register_driver(struct pci_driver *driver) { return __pci_register_driver(driver, THIS_MODULE); } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 95f6db54d8d2..02ad790921fd 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -10,6 +10,7 @@ #ifndef _SYSFS_H_ #define _SYSFS_H_ +#include #include struct kobject; @@ -86,37 +87,39 @@ struct sysfs_dirent { #ifdef CONFIG_SYSFS -extern int +extern int __must_check sysfs_create_dir(struct kobject *); extern void sysfs_remove_dir(struct kobject *); -extern int +extern int __must_check sysfs_rename_dir(struct kobject *, const char *new_name); -extern int +extern int __must_check sysfs_create_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_update_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode); extern void sysfs_remove_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name); extern void sysfs_remove_link(struct kobject *, const char * name); -int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr); +int __must_check sysfs_create_bin_file(struct kobject *kobj, + struct bin_attribute *attr); void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); -int sysfs_create_group(struct kobject *, const struct attribute_group *); +int __must_check sysfs_create_group(struct kobject *, + const struct attribute_group *); void sysfs_remove_group(struct kobject *, const struct attribute_group *); void sysfs_notify(struct kobject * k, char *dir, char *attr); -- cgit v1.2.3-71-gd317 From cebc04ba9aeb3a646cc746300421fc0e5aa4f253 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 14 Aug 2006 22:43:18 -0700 Subject: add CONFIG_ENABLE_MUST_CHECK Those 1500 warnings can be a bit of a pain. Add a config option to shut them up. Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler.h | 5 +++++ lib/Kconfig.debug | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 9b4f11094937..060b96112ec6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -99,6 +99,11 @@ extern void __chk_io_ptr(void __iomem *); #define __must_check #endif +#ifndef CONFIG_ENABLE_MUST_CHECK +#undef __must_check +#define __must_check +#endif + /* * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 554ee688a9f8..5c114c3f03c5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -8,6 +8,13 @@ config PRINTK_TIME operations. This is useful for identifying long delays in kernel startup. +config ENABLE_MUST_CHECK + bool "Enable __must_check logic" + default y + help + Enable the __must_check logic in the kernel build. Disable this to + suppress the "warning: ignoring return value of 'foo', declared with + attribute warn_unused_result" messages. config MAGIC_SYSRQ bool "Magic SysRq key" -- cgit v1.2.3-71-gd317 From f86db396ff455ed586751d21816a1ebd431264e5 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 14 Aug 2006 22:43:20 -0700 Subject: drivers/base: check errors Add lots of return-value checking. : fix bus_rescan_devices()] Cc: "Randy.Dunlap" Signed-off-by: Cornelia Huck Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 2 +- drivers/base/bus.c | 107 +++++++++++++++++++++++++++++++++---------------- drivers/base/dd.c | 37 ++++++++++++----- include/linux/device.h | 8 ++-- 4 files changed, 104 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/base.h b/drivers/base/base.h index c3b8dc98b8a7..d26644a59537 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -16,7 +16,7 @@ extern int cpu_dev_init(void); extern int attribute_container_init(void); extern int bus_add_device(struct device * dev); -extern void bus_attach_device(struct device * dev); +extern int bus_attach_device(struct device * dev); extern void bus_remove_device(struct device * dev); extern struct bus_type *get_bus(struct bus_type * bus); extern void put_bus(struct bus_type * bus); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 4d22a1d10a1c..aa685a20b649 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -371,12 +371,20 @@ int bus_add_device(struct device * dev) if (bus) { pr_debug("bus %s: add device %s\n", bus->name, dev->bus_id); error = device_add_attrs(bus, dev); - if (!error) { - sysfs_create_link(&bus->devices.kobj, &dev->kobj, dev->bus_id); - sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "subsystem"); - sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "bus"); - } + if (error) + goto out; + error = sysfs_create_link(&bus->devices.kobj, + &dev->kobj, dev->bus_id); + if (error) + goto out; + error = sysfs_create_link(&dev->kobj, + &dev->bus->subsys.kset.kobj, "subsystem"); + if (error) + goto out; + error = sysfs_create_link(&dev->kobj, + &dev->bus->subsys.kset.kobj, "bus"); } +out: return error; } @@ -386,14 +394,19 @@ int bus_add_device(struct device * dev) * * - Try to attach to driver. */ -void bus_attach_device(struct device * dev) +int bus_attach_device(struct device * dev) { - struct bus_type * bus = dev->bus; + struct bus_type *bus = dev->bus; + int ret = 0; if (bus) { - device_attach(dev); - klist_add_tail(&dev->knode_bus, &bus->klist_devices); + ret = device_attach(dev); + if (ret >= 0) { + klist_add_tail(&dev->knode_bus, &bus->klist_devices); + ret = 0; + } } + return ret; } /** @@ -455,10 +468,17 @@ static void driver_remove_attrs(struct bus_type * bus, struct device_driver * dr * Thanks to drivers making their tables __devinit, we can't allow manual * bind and unbind from userspace unless CONFIG_HOTPLUG is enabled. */ -static void add_bind_files(struct device_driver *drv) +static int __must_check add_bind_files(struct device_driver *drv) { - driver_create_file(drv, &driver_attr_unbind); - driver_create_file(drv, &driver_attr_bind); + int ret; + + ret = driver_create_file(drv, &driver_attr_unbind); + if (ret == 0) { + ret = driver_create_file(drv, &driver_attr_bind); + if (ret) + driver_remove_file(drv, &driver_attr_unbind); + } + return ret; } static void remove_bind_files(struct device_driver *drv) @@ -476,7 +496,7 @@ static inline void remove_bind_files(struct device_driver *drv) {} * @drv: driver. * */ -int bus_add_driver(struct device_driver * drv) +int bus_add_driver(struct device_driver *drv) { struct bus_type * bus = get_bus(drv->bus); int error = 0; @@ -484,27 +504,39 @@ int bus_add_driver(struct device_driver * drv) if (bus) { pr_debug("bus %s: add driver %s\n", bus->name, drv->name); error = kobject_set_name(&drv->kobj, "%s", drv->name); - if (error) { - put_bus(bus); - return error; - } + if (error) + goto out_put_bus; drv->kobj.kset = &bus->drivers; - if ((error = kobject_register(&drv->kobj))) { - put_bus(bus); - return error; - } + if ((error = kobject_register(&drv->kobj))) + goto out_put_bus; - driver_attach(drv); + error = driver_attach(drv); + if (error) + goto out_unregister; klist_add_tail(&drv->knode_bus, &bus->klist_drivers); module_add_driver(drv->owner, drv); - driver_add_attrs(bus, drv); - add_bind_files(drv); + error = driver_add_attrs(bus, drv); + if (error) { + /* How the hell do we get out of this pickle? Give up */ + printk(KERN_ERR "%s: driver_add_attrs(%s) failed\n", + __FUNCTION__, drv->name); + } + error = add_bind_files(drv); + if (error) { + /* Ditto */ + printk(KERN_ERR "%s: add_bind_files(%s) failed\n", + __FUNCTION__, drv->name); + } } return error; +out_unregister: + kobject_unregister(&drv->kobj); +out_put_bus: + put_bus(bus); + return error; } - /** * bus_remove_driver - delete driver from bus's knowledge. * @drv: driver. @@ -530,16 +562,21 @@ void bus_remove_driver(struct device_driver * drv) /* Helper for bus_rescan_devices's iter */ -static int bus_rescan_devices_helper(struct device *dev, void *data) +static int __must_check bus_rescan_devices_helper(struct device *dev, + void *data) { + int ret = 0; + if (!dev->driver) { if (dev->parent) /* Needed for USB */ down(&dev->parent->sem); - device_attach(dev); + ret = device_attach(dev); if (dev->parent) up(&dev->parent->sem); + if (ret > 0) + ret = 0; } - return 0; + return ret < 0 ? ret : 0; } /** @@ -550,9 +587,9 @@ static int bus_rescan_devices_helper(struct device *dev, void *data) * attached and rescan it against existing drivers to see if it matches * any by calling device_attach() for the unbound devices. */ -void bus_rescan_devices(struct bus_type * bus) +int bus_rescan_devices(struct bus_type * bus) { - bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); + return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); } /** @@ -564,7 +601,7 @@ void bus_rescan_devices(struct bus_type * bus) * to use if probing criteria changed during a devices lifetime and * driver attachment should change accordingly. */ -void device_reprobe(struct device *dev) +int device_reprobe(struct device *dev) { if (dev->driver) { if (dev->parent) /* Needed for USB */ @@ -573,14 +610,14 @@ void device_reprobe(struct device *dev) if (dev->parent) up(&dev->parent->sem); } - - bus_rescan_devices_helper(dev, NULL); + return bus_rescan_devices_helper(dev, NULL); } EXPORT_SYMBOL_GPL(device_reprobe); -struct bus_type * get_bus(struct bus_type * bus) +struct bus_type *get_bus(struct bus_type *bus) { - return bus ? container_of(subsys_get(&bus->subsys), struct bus_type, subsys) : NULL; + return bus ? container_of(subsys_get(&bus->subsys), + struct bus_type, subsys) : NULL; } void put_bus(struct bus_type * bus) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 889c71111239..9f6f11ca0ab6 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -38,17 +38,29 @@ * * This function must be called with @dev->sem held. */ -void device_bind_driver(struct device * dev) +int device_bind_driver(struct device *dev) { - if (klist_node_attached(&dev->knode_driver)) - return; + int ret; + + if (klist_node_attached(&dev->knode_driver)) { + printk(KERN_WARNING "%s: device %s already bound\n", + __FUNCTION__, kobject_name(&dev->kobj)); + return 0; + } pr_debug("bound device '%s' to driver '%s'\n", dev->bus_id, dev->driver->name); klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices); - sysfs_create_link(&dev->driver->kobj, &dev->kobj, + ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, kobject_name(&dev->kobj)); - sysfs_create_link(&dev->kobj, &dev->driver->kobj, "driver"); + if (ret == 0) { + ret = sysfs_create_link(&dev->kobj, &dev->driver->kobj, + "driver"); + if (ret) + sysfs_remove_link(&dev->driver->kobj, + kobject_name(&dev->kobj)); + } + return ret; } /** @@ -91,7 +103,11 @@ int driver_probe_device(struct device_driver * drv, struct device * dev) goto ProbeFailed; } } - device_bind_driver(dev); + if (device_bind_driver(dev)) { + printk(KERN_ERR "%s: device_bind_driver(%s) failed\n", + __FUNCTION__, dev->bus_id); + /* How does undo a ->probe? We're screwed. */ + } ret = 1; pr_debug("%s: Bound Device %s to Driver %s\n", drv->bus->name, dev->bus_id, drv->name); @@ -139,8 +155,9 @@ int device_attach(struct device * dev) down(&dev->sem); if (dev->driver) { - device_bind_driver(dev); - ret = 1; + ret = device_bind_driver(dev); + if (ret == 0) + ret = 1; } else ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); up(&dev->sem); @@ -182,9 +199,9 @@ static int __driver_attach(struct device * dev, void * data) * returns 0 and the @dev->driver is set, we've found a * compatible pair. */ -void driver_attach(struct device_driver * drv) +int driver_attach(struct device_driver * drv) { - bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); + return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); } /** diff --git a/include/linux/device.h b/include/linux/device.h index ad4db72f5733..b3da9a870cfc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -62,7 +62,7 @@ struct bus_type { extern int __must_check bus_register(struct bus_type * bus); extern void bus_unregister(struct bus_type * bus); -extern void bus_rescan_devices(struct bus_type * bus); +extern int __must_check bus_rescan_devices(struct bus_type * bus); /* iterator helpers for buses */ @@ -397,11 +397,11 @@ extern int device_rename(struct device *dev, char *new_name); * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ -extern void device_bind_driver(struct device * dev); +extern int __must_check device_bind_driver(struct device *dev); extern void device_release_driver(struct device * dev); extern int __must_check device_attach(struct device * dev); -extern void driver_attach(struct device_driver * drv); -extern void device_reprobe(struct device *dev); +extern int __must_check driver_attach(struct device_driver *drv); +extern int __must_check device_reprobe(struct device *dev); /* * Easy functions for dynamically creating devices on the fly -- cgit v1.2.3-71-gd317 From f20a9ead0d005fbeeae3fc21a96f9bf197ac1c1c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 14 Aug 2006 22:43:23 -0700 Subject: sysfs: add proper sysfs_init() prototype Don't be crufty. Mark it __must_check too. Cc: "Randy.Dunlap" Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 +--------- include/linux/sysfs.h | 7 +++++++ 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index fa7ed6a9fc2d..36d180858136 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -28,15 +29,6 @@ extern int __init init_rootfs(void); -#ifdef CONFIG_SYSFS -extern int __init sysfs_init(void); -#else -static inline int sysfs_init(void) -{ - return 0; -} -#endif - /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 02ad790921fd..6d5c43d31dec 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -123,6 +123,8 @@ int __must_check sysfs_create_group(struct kobject *, void sysfs_remove_group(struct kobject *, const struct attribute_group *); void sysfs_notify(struct kobject * k, char *dir, char *attr); +extern int __must_check sysfs_init(void); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir(struct kobject * k) @@ -194,6 +196,11 @@ static inline void sysfs_notify(struct kobject * k, char *dir, char *attr) { } +static inline int __must_check sysfs_init(void) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ #endif /* _SYSFS_H_ */ -- cgit v1.2.3-71-gd317 From d779249ed4cb3b50690de6de8448829d65a1cd08 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Jul 2006 10:59:59 -0700 Subject: Driver Core: add ability for drivers to do a threaded probe This adds the infrastructure for drivers to do a threaded probe, and waits at init time for all currently outstanding probes to complete. A new kernel thread will be created when the probe() function for the driver is called, if the multithread_probe bit is set in the driver saying it can support this kind of operation. I have tested this with USB and PCI, and it works, and shaves off a lot of time in the boot process, but there are issues with finding root boot disks, and some USB drivers assume that this can never happen, so it is currently not enabled for any bus type. Individual drivers can enable this right now if they wish, and bus authors can selectivly turn it on as well, once they determine that their subsystem will work properly with it. Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 108 ++++++++++++++++++++++++++++++++++++------------- include/linux/device.h | 3 ++ init/do_mounts.c | 5 +++ 3 files changed, 89 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 9f6f11ca0ab6..319a73be4180 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -17,6 +17,7 @@ #include #include +#include #include "base.h" #include "power/power.h" @@ -63,44 +64,35 @@ int device_bind_driver(struct device *dev) return ret; } -/** - * driver_probe_device - attempt to bind device & driver. - * @drv: driver. - * @dev: device. - * - * First, we call the bus's match function, if one present, which - * should compare the device IDs the driver supports with the - * device IDs of the device. Note we don't do this ourselves - * because we don't know the format of the ID structures, nor what - * is to be considered a match and what is not. - * - * This function returns 1 if a match is found, an error if one - * occurs (that is not -ENODEV or -ENXIO), and 0 otherwise. - * - * This function must be called with @dev->sem held. When called - * for a USB interface, @dev->parent->sem must be held as well. - */ -int driver_probe_device(struct device_driver * drv, struct device * dev) +struct stupid_thread_structure { + struct device_driver *drv; + struct device *dev; +}; + +static atomic_t probe_count = ATOMIC_INIT(0); +static int really_probe(void *void_data) { + struct stupid_thread_structure *data = void_data; + struct device_driver *drv = data->drv; + struct device *dev = data->dev; int ret = 0; - if (drv->bus->match && !drv->bus->match(dev, drv)) - goto Done; + atomic_inc(&probe_count); + pr_debug("%s: Probing driver %s with device %s\n", + drv->bus->name, drv->name, dev->bus_id); - pr_debug("%s: Matched Device %s with Driver %s\n", - drv->bus->name, dev->bus_id, drv->name); dev->driver = drv; if (dev->bus->probe) { ret = dev->bus->probe(dev); if (ret) { dev->driver = NULL; - goto ProbeFailed; + goto probe_failed; } } else if (drv->probe) { ret = drv->probe(dev); if (ret) { dev->driver = NULL; - goto ProbeFailed; + goto probe_failed; } } if (device_bind_driver(dev)) { @@ -111,9 +103,9 @@ int driver_probe_device(struct device_driver * drv, struct device * dev) ret = 1; pr_debug("%s: Bound Device %s to Driver %s\n", drv->bus->name, dev->bus_id, drv->name); - goto Done; + goto done; - ProbeFailed: +probe_failed: if (ret == -ENODEV || ret == -ENXIO) { /* Driver matched, but didn't support device * or device not found. @@ -126,7 +118,69 @@ int driver_probe_device(struct device_driver * drv, struct device * dev) "%s: probe of %s failed with error %d\n", drv->name, dev->bus_id, ret); } - Done: +done: + kfree(data); + atomic_dec(&probe_count); + return ret; +} + +/** + * driver_probe_done + * Determine if the probe sequence is finished or not. + * + * Should somehow figure out how to use a semaphore, not an atomic variable... + */ +int driver_probe_done(void) +{ + pr_debug("%s: probe_count = %d\n", __FUNCTION__, + atomic_read(&probe_count)); + if (atomic_read(&probe_count)) + return -EBUSY; + return 0; +} + +/** + * driver_probe_device - attempt to bind device & driver together + * @drv: driver to bind a device to + * @dev: device to try to bind to the driver + * + * First, we call the bus's match function, if one present, which should + * compare the device IDs the driver supports with the device IDs of the + * device. Note we don't do this ourselves because we don't know the + * format of the ID structures, nor what is to be considered a match and + * what is not. + * + * This function returns 1 if a match is found, an error if one occurs + * (that is not -ENODEV or -ENXIO), and 0 otherwise. + * + * This function must be called with @dev->sem held. When called for a + * USB interface, @dev->parent->sem must be held as well. + */ +int driver_probe_device(struct device_driver * drv, struct device * dev) +{ + struct stupid_thread_structure *data; + struct task_struct *probe_task; + int ret = 0; + + if (drv->bus->match && !drv->bus->match(dev, drv)) + goto done; + + pr_debug("%s: Matched Device %s with Driver %s\n", + drv->bus->name, dev->bus_id, drv->name); + + data = kmalloc(sizeof(*data), GFP_KERNEL); + data->drv = drv; + data->dev = dev; + + if (drv->multithread_probe) { + probe_task = kthread_run(really_probe, data, + "probe-%s", dev->bus_id); + if (IS_ERR(probe_task)) + ret = PTR_ERR(probe_task); + } else + ret = really_probe(data); + +done: return ret; } diff --git a/include/linux/device.h b/include/linux/device.h index b3da9a870cfc..74246efba931 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -106,6 +106,8 @@ struct device_driver { void (*shutdown) (struct device * dev); int (*suspend) (struct device * dev, pm_message_t state); int (*resume) (struct device * dev); + + unsigned int multithread_probe:1; }; @@ -115,6 +117,7 @@ extern void driver_unregister(struct device_driver * drv); extern struct device_driver * get_driver(struct device_driver * drv); extern void put_driver(struct device_driver * drv); extern struct device_driver *driver_find(const char *name, struct bus_type *bus); +extern int driver_probe_done(void); /* driverfs interface for exporting driver attributes */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 94aeec7aa917..b290aadb1d3f 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -403,6 +404,10 @@ void __init prepare_namespace(void) ssleep(root_delay); } + /* wait for the known devices to complete their probing */ + while (driver_probe_done() != 0) + msleep(100); + md_run_setup(); if (saved_root_name[0]) { -- cgit v1.2.3-71-gd317 From f2eaae197f4590c4d96f31b09b0ee9067421a95c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 18 Sep 2006 16:22:34 -0400 Subject: Driver core: Fix potential deadlock in driver core There is a potential deadlock in the driver core. It boils down to the fact that bus_remove_device() calls klist_remove() instead of klist_del(), thereby waiting until the reference count of the klist_node in the bus's klist of devices drops to 0. The refcount can't reach 0 so long as a modprobe process is trying to bind a new driver to the device being removed, by calling __driver_attach(). The problem is that __driver_attach() tries to acquire the device's parent's semaphore, but the caller of bus_remove_device() is quite likely to own that semaphore already. It isn't sufficient just to replace klist_remove() with klist_del(). Doing so runs the risk that the device would remain on the bus's klist of devices for some time, and so could be bound to another driver even after it was unregistered. What's needed is a new way to distinguish whether or not a device is registered, based on a criterion other than whether its klist_node is linked into the bus's klist of devices. That way driver binding can fail when the device is unregistered, even if it is still linked into the klist. This patch (as782) implements the solution, by adding a new bitflag to indiate when a struct device is registered, by testing the flag before allowing a driver to bind a device, and by changing the definition of the device_is_registered() inline. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 8 ++++++-- drivers/base/dd.c | 2 ++ include/linux/device.h | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index aa685a20b649..636af538a2b5 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -392,6 +392,7 @@ out: * bus_attach_device - add device to bus * @dev: device tried to attach to a driver * + * - Add device to bus's list of devices. * - Try to attach to driver. */ int bus_attach_device(struct device * dev) @@ -400,11 +401,13 @@ int bus_attach_device(struct device * dev) int ret = 0; if (bus) { + dev->is_registered = 1; ret = device_attach(dev); if (ret >= 0) { klist_add_tail(&dev->knode_bus, &bus->klist_devices); ret = 0; - } + } else + dev->is_registered = 0; } return ret; } @@ -425,7 +428,8 @@ void bus_remove_device(struct device * dev) sysfs_remove_link(&dev->kobj, "bus"); sysfs_remove_link(&dev->bus->devices.kobj, dev->bus_id); device_remove_attrs(dev->bus, dev); - klist_remove(&dev->knode_bus); + dev->is_registered = 0; + klist_del(&dev->knode_bus); pr_debug("bus %s: remove device %s\n", dev->bus->name, dev->bus_id); device_release_driver(dev); put_bus(dev->bus); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 319a73be4180..b5f43c3e44fa 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -162,6 +162,8 @@ int driver_probe_device(struct device_driver * drv, struct device * dev) struct task_struct *probe_task; int ret = 0; + if (!device_is_registered(dev)) + return -ENODEV; if (drv->bus->match && !drv->bus->match(dev, drv)) goto done; diff --git a/include/linux/device.h b/include/linux/device.h index 74246efba931..662e6a10144e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -329,6 +329,7 @@ struct device { struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ + unsigned is_registered:1; struct device_attribute uevent_attr; struct device_attribute *devt_attr; @@ -381,7 +382,7 @@ dev_set_drvdata (struct device *dev, void *data) static inline int device_is_registered(struct device *dev) { - return klist_node_attached(&dev->knode_bus); + return dev->is_registered; } /* -- cgit v1.2.3-71-gd317 From 407984f1af259b31957c7c05075a454a751bb801 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog with sysctl Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi watchdog. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/nmi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/nmi.h | 1 + include/asm-x86_64/nmi.h | 1 + include/linux/sysctl.h | 1 + kernel/sysctl.c | 11 ++++++++++ 6 files changed, 114 insertions(+) (limited to 'include/linux') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acd3fdea2a21..28065d0b71a9 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -846,6 +846,58 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) return 0; } +/* + * proc handler for /proc/sys/kernel/nmi_watchdog + */ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0) { + printk(KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EINVAL; + } + + if (nmi_watchdog == NMI_DEFAULT) { + if (nmi_known_cpu() > 0) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else if (nmi_watchdog == NMI_IO_APIC) { + /* FIXME + * for some reason these functions don't work + */ + printk("Can not enable/disable NMI on IO APIC\n"); + return -EINVAL; +#if 0 + if (nmi_watchdog_enabled) + enable_timer_nmi_watchdog(); + else + disable_timer_nmi_watchdog(); +#endif + } else { + printk( KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + #endif EXPORT_SYMBOL(nmi_active); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 9d175dcf3a2d..3a17411a9a19 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -750,6 +750,54 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) return 0; } +/* + * proc handler for /proc/sys/kernel/nmi + */ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0) { + printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EINVAL; + } + + /* if nmi_watchdog is not set yet, then set it */ + nmi_watchdog_default(); + + if (nmi_watchdog == NMI_LOCAL_APIC) + { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else if (nmi_watchdog == NMI_IO_APIC) { + /* FIXME + * for some reason these functions don't work + */ + printk("Can not enable/disable NMI on IO APIC\n"); + return -EIO; +#if 0 + if (nmi_watchdog_enabled) + enable_timer_nmi_watchdog(); + else + disable_timer_nmi_watchdog(); +#endif + } else { + printk(KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + #endif EXPORT_SYMBOL(nmi_active); diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 34d6bf063b6e..13b5d8311bf7 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -14,6 +14,7 @@ */ int do_nmi_callback(struct pt_regs *regs, int cpu); +extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 8818c39d34e0..2c23b0df87d2 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -43,6 +43,7 @@ extern void die_nmi(char *str, struct pt_regs *regs); extern int panic_on_timeout; extern int unknown_nmi_panic; +extern int nmi_watchdog_enabled; extern int check_nmi_watchdog(void); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 736ed917a4f8..ecb79ba52ae1 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -150,6 +150,7 @@ enum KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ KERN_COMPAT_LOG=73, /* int: print compat layer messages */ KERN_MAX_LOCK_DEPTH=74, + KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 83f168361624..040de6bd74dd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -76,6 +76,9 @@ extern int compat_log; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; +int nmi_watchdog_enabled; +extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, + void __user *, size_t *, loff_t *); #endif /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -628,6 +631,14 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_NMI_WATCHDOG, + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_nmi_enabled, + }, #endif #if defined(CONFIG_X86) { -- cgit v1.2.3-71-gd317 From 8da5adda91df3d2fcc5300e68da491694c9af019 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Allow users to force a panic on NMI To quote Alan Cox: The default Linux behaviour on an NMI of either memory or unknown is to continue operation. For many environments such as scientific computing it is preferable that the box is taken out and the error dealt with than an uncorrected parity/ECC error get propogated. A small number of systems do generate NMI's for bizarre random reasons such as power management so the default is unchanged. In other respects the new proc/sys entry works like the existing panic controls already in that directory. This is separate to the edac support - EDAC allows supported chipsets to handle ECC errors well, this change allows unsupported cases to at least panic rather than cause problems further down the line. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 6 ++++++ arch/x86_64/kernel/traps.c | 6 ++++++ include/linux/kernel.h | 1 + include/linux/sysctl.h | 1 + kernel/panic.c | 1 + kernel/sysctl.c | 8 ++++++++ 6 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7db664d0b25c..2f6cb8276480 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -635,6 +635,8 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs) "to continue\n"); printk(KERN_EMERG "You probably have a hardware problem with your RAM " "chips\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); @@ -670,6 +672,10 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) reason, smp_processor_id()); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + } static DEFINE_SPINLOCK(nmi_print_lock); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 42bc070fdf11..b18829db2a6a 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -732,6 +732,8 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); printk("You probably have a hardware problem with your RAM chips\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); /* Clear and disable the memory parity error line. */ reason = (reason & 0xf) | 4; @@ -757,6 +759,10 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + } /* Runs on IST stack. This code must keep interrupts off all the time. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b2ae4fdce8b..1ff9609300b4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -186,6 +186,7 @@ extern void bust_spinlocks(int yes); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; extern int panic_on_oops; +extern int panic_on_unrecovered_nmi; extern int tainted; extern const char *print_tainted(void); extern void add_taint(unsigned); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ecb79ba52ae1..432778446ad2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -151,6 +151,7 @@ enum KERN_COMPAT_LOG=73, /* int: print compat layer messages */ KERN_MAX_LOCK_DEPTH=74, KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ + KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ }; diff --git a/kernel/panic.c b/kernel/panic.c index 8010b9b17aca..d2db3e2209e0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -21,6 +21,7 @@ #include int panic_on_oops; +int panic_on_unrecovered_nmi; int tainted; static int pause_on_oops; static int pause_on_oops_flag; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 040de6bd74dd..220e20564124 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -641,6 +641,14 @@ static ctl_table kern_table[] = { }, #endif #if defined(CONFIG_X86) + { + .ctl_name = KERN_PANIC_ON_NMI, + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_BOOTLOADER_TYPE, .procname = "bootloader_type", -- cgit v1.2.3-71-gd317 From c08c820508233b424deab3302bc404bbecc6493a Mon Sep 17 00:00:00 2001 From: Vojtech Pavlik Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] Add the vgetcpu vsyscall This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP capability uses either the RDTSCP or CPUID to obtain a CPU and node numbers and pass them to the program. AK: Lots of changes over Vojtech's original code: Better prototype for vgetcpu() It's better to pass the cpu / node numbers as separate arguments to avoid mistakes when going from SMP to NUMA. Also add a fast time stamp based cache using a user supplied argument to speed things more up. Use fast method from Chuck Ebbert to retrieve node/cpu from GDT limit instead of CPUID Made sure RDTSCP init is always executed after node is known. Drop printk Signed-off-by: Vojtech Pavlik Signed-off-by: Andi Kleen --- arch/x86_64/kernel/head.S | 2 +- arch/x86_64/kernel/time.c | 13 ++++--- arch/x86_64/kernel/vmlinux.lds.S | 3 ++ arch/x86_64/kernel/vsyscall.c | 84 +++++++++++++++++++++++++++++++++++++++- include/asm-x86_64/segment.h | 5 ++- include/asm-x86_64/smp.h | 12 ++++-- include/asm-x86_64/vsyscall.h | 9 +++++ include/linux/getcpu.h | 16 ++++++++ 8 files changed, 130 insertions(+), 14 deletions(-) create mode 100644 include/linux/getcpu.h (limited to 'include/linux') diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index c9739ca81d06..505ec4a57506 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table) .quad 0,0 /* TSS */ .quad 0,0 /* LDT */ .quad 0,0,0 /* three TLS descriptors */ - .quad 0 /* unused */ + .quad 0x0000f40000000000 /* node/CPU stored in limit */ gdt_end: /* asm/segment.h:GDT_ENTRIES must match this */ /* This should be a multiple of the cache line size */ diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 97b9e46d1992..560ed944dc0e 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -899,12 +899,8 @@ static int __cpuinit time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned cpu = (unsigned long) hcpu; - if (action == CPU_ONLINE && - cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { - unsigned p; - p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12); - write_rdtscp_aux(p); - } + if (action == CPU_ONLINE) + vsyscall_set_cpu(cpu); return NOTIFY_DONE; } @@ -993,6 +989,11 @@ void time_init_gtod(void) if (unsynchronized_tsc()) notsc = 1; + if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) + vgetcpu_mode = VGETCPU_RDTSCP; + else + vgetcpu_mode = VGETCPU_LSL; + if (vxtime.hpet_address && notsc) { timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; if (hpet_use_timer) diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 7c4de31471d4..8d5a5149bb3a 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -99,6 +99,9 @@ SECTIONS .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } vxtime = VVIRT(.vxtime); + .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } + vgetcpu_mode = VVIRT(.vgetcpu_mode); + .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } wall_jiffies = VVIRT(.wall_jiffies); diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index f603037df162..902783bc4d53 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -33,11 +34,15 @@ #include #include #include +#include +#include +#include #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) int __sysctl_vsyscall __section_sysctl_vsyscall = 1; seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +int __vgetcpu_mode __section_vgetcpu_mode; #include @@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t) return __xtime.tv_sec; } -long __vsyscall(2) venosys_0(void) +/* Fast way to get current CPU and node. + This helps to do per node and per CPU caches in user space. + The result is not guaranteed without CPU affinity, but usually + works out because the scheduler tries to keep a thread on the same + CPU. + + tcache must point to a two element sized long array. + All arguments can be NULL. */ +long __vsyscall(2) +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) { - return -ENOSYS; + unsigned int dummy, p; + unsigned long j = 0; + + /* Fast cache - only recompute value once per jiffies and avoid + relatively costly rdtscp/cpuid otherwise. + This works because the scheduler usually keeps the process + on the same CPU and this syscall doesn't guarantee its + results anyways. + We do this here because otherwise user space would do it on + its own in a likely inferior way (no access to jiffies). + If you don't like it pass NULL. */ + if (tcache && tcache->t0 == (j = __jiffies)) { + p = tcache->t1; + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { + /* Load per CPU data from RDTSCP */ + rdtscp(dummy, dummy, p); + } else { + /* Load per CPU data from GDT */ + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + } + if (tcache) { + tcache->t0 = j; + tcache->t1 = p; + } + if (cpu) + *cpu = p & 0xfff; + if (node) + *node = p >> 12; + return 0; } long __vsyscall(3) venosys_1(void) @@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = { #endif +static void __cpuinit write_rdtscp_cb(void *info) +{ + write_rdtscp_aux((unsigned long)info); +} + +void __cpuinit vsyscall_set_cpu(int cpu) +{ + unsigned long *d; + unsigned long node = 0; +#ifdef CONFIG_NUMA + node = cpu_to_node[cpu]; +#endif + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { + void *info = (void *)((node << 12) | cpu); + /* Can happen on preemptive kernel */ + if (get_cpu() == cpu) + write_rdtscp_cb(info); +#ifdef CONFIG_SMP + else { + /* the notifier is unfortunately not executed on the + target CPU */ + smp_call_function_single(cpu,write_rdtscp_cb,info,0,1); + } +#endif + put_cpu(); + } + + /* Store cpu number in limit so that it can be loaded quickly + in user space in vgetcpu. + 12 bits for the CPU and 8 bits for the node. */ + d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); + *d = 0x0f40000000000ULL; + *d |= cpu; + *d |= (node & 0xf) << 12; + *d |= (node >> 4) << 48; +} + static void __init map_vsyscall(void) { extern char __vsyscall_0; @@ -214,6 +293,7 @@ static int __init vsyscall_init(void) VSYSCALL_ADDR(__NR_vgettimeofday))); BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); map_vsyscall(); #ifdef CONFIG_SYSCTL register_sysctl_table(kernel_root_table2, 0); diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h index d4bed33fb32c..334ddcdd8f92 100644 --- a/include/asm-x86_64/segment.h +++ b/include/asm-x86_64/segment.h @@ -20,15 +20,16 @@ #define __USER_CS 0x33 /* 6*8+3 */ #define __USER32_DS __USER_DS -#define GDT_ENTRY_TLS 1 #define GDT_ENTRY_TSS 8 /* needs two entries */ #define GDT_ENTRY_LDT 10 /* needs two entries */ #define GDT_ENTRY_TLS_MIN 12 #define GDT_ENTRY_TLS_MAX 14 -/* 15 free */ #define GDT_ENTRY_TLS_ENTRIES 3 +#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ +#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) + /* TLS indexes for 64bit - hardcoded in arch_prctl */ #define FS_TLS 0 #define GS_TLS 1 diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 6805e1feb300..d61547fd833b 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -133,13 +133,19 @@ static __inline int logical_smp_processor_id(void) /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); } -#endif #ifdef CONFIG_SMP #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] #else #define cpu_physical_id(cpu) boot_cpu_id -#endif - +static inline int smp_call_function_single(int cpuid, void (*func) (void *info), + void *info, int retry, int wait) +{ + /* Disable interrupts here? */ + func(info); + return 0; +} +#endif /* !CONFIG_SMP */ +#endif /* !__ASSEMBLY */ #endif diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 146b24402a5f..2281e9399b96 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h @@ -4,6 +4,7 @@ enum vsyscall_num { __NR_vgettimeofday, __NR_vtime, + __NR_vgetcpu, }; #define VSYSCALL_START (-10UL << 20) @@ -15,6 +16,7 @@ enum vsyscall_num { #include #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) +#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) @@ -26,6 +28,9 @@ enum vsyscall_num { #define VXTIME_HPET 2 #define VXTIME_PMTMR 3 +#define VGETCPU_RDTSCP 1 +#define VGETCPU_LSL 2 + struct vxtime_data { long hpet_address; /* HPET base address */ int last; @@ -40,6 +45,7 @@ struct vxtime_data { /* vsyscall space (readonly) */ extern struct vxtime_data __vxtime; +extern int __vgetcpu_mode; extern struct timespec __xtime; extern volatile unsigned long __jiffies; extern unsigned long __wall_jiffies; @@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock; /* kernel space (writeable) */ extern struct vxtime_data vxtime; +extern int vgetcpu_mode; extern unsigned long wall_jiffies; extern struct timezone sys_tz; extern int sysctl_vsyscall; @@ -55,6 +62,8 @@ extern seqlock_t xtime_lock; extern int sysctl_vsyscall; +extern void vsyscall_set_cpu(int cpu); + #define ARCH_HAVE_XTIME_LOCK 1 #endif /* __KERNEL__ */ diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h new file mode 100644 index 000000000000..031ed3780e45 --- /dev/null +++ b/include/linux/getcpu.h @@ -0,0 +1,16 @@ +#ifndef _LINUX_GETCPU_H +#define _LINUX_GETCPU_H 1 + +/* Cache for getcpu() to speed it up. Results might be upto a jiffie + out of date, but will be faster. + User programs should not refer to the contents of this structure. + It is only a cache for vgetcpu(). It might change in future kernels. + The user program must store this information per thread (__thread) + If you want 100% accurate information pass NULL instead. */ +struct getcpu_cache { + unsigned long t0; + unsigned long t1; + unsigned long res[4]; +}; + +#endif -- cgit v1.2.3-71-gd317 From 3cfc348bf90ffaa777c188652aa297f04eb94de8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] x86: Add portable getcpu call For NUMA optimization and some other algorithms it is useful to have a fast to get the current CPU and node numbers in user space. x86-64 added a fast way to do this in a vsyscall. This adds a generic syscall for other architectures to make it a generic portable facility. I expect some of them will also implement it as a faster vsyscall. The cache is an optimization for the x86-64 vsyscall optimization. Since what the syscall returns is an approximation anyways and user space often wants very fast results it can be cached for some time. The norma methods to get this information in user space are relatively slow The vsyscall is in a better position to manage the cache because it has direct access to a fast time stamp (jiffies). For the generic syscall optimization it doesn't help much, but enforce a valid argument to keep programs portable I only added an i386 syscall entry for now. Other architectures can follow as needed. AK: Also added some cleanups from Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/syscall_table.S | 1 + arch/x86_64/ia32/ia32entry.S | 1 + include/asm-i386/unistd.h | 3 ++- include/linux/syscalls.h | 2 ++ kernel/sys.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index dd63d4775398..7e639f78b0b9 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -317,3 +317,4 @@ ENTRY(sys_call_table) .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages + .long sys_getcpu diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 30ed0f6f4a2b..32fd32bea07c 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -713,4 +713,5 @@ ia32_sys_call_table: .quad sys_tee .quad compat_sys_vmsplice .quad compat_sys_move_pages + .quad sys_getcpu ia32_syscall_end: diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index fc1c8ddae149..565d0897b205 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -323,10 +323,11 @@ #define __NR_tee 315 #define __NR_vmsplice 316 #define __NR_move_pages 317 +#define __NR_getcpu 318 #ifdef __KERNEL__ -#define NR_syscalls 318 +#define NR_syscalls 319 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 008f04c56737..3f0f716225ec 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -53,6 +53,7 @@ struct mq_attr; struct compat_stat; struct compat_timeval; struct robust_list_head; +struct getcpu_cache; #include #include @@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int pid, size_t __user *len_ptr); asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); +asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache); #endif diff --git a/kernel/sys.c b/kernel/sys.c index e236f98f7ec5..3f894775488d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -2062,3 +2063,33 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, } return error; } + +asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, + struct getcpu_cache __user *cache) +{ + int err = 0; + int cpu = raw_smp_processor_id(); + if (cpup) + err |= put_user(cpu, cpup); + if (nodep) + err |= put_user(cpu_to_node(cpu), nodep); + if (cache) { + /* + * The cache is not needed for this implementation, + * but make sure user programs pass something + * valid. vsyscall implementations can instead make + * good use of the cache. Only use t0 and t1 because + * these are available in both 32bit and 64bit ABI (no + * need for a compat_getcpu). 32bit has enough + * padding + */ + unsigned long t0, t1; + get_user(t0, &cache->t0); + get_user(t1, &cache->t1); + t0++; + t1++; + put_user(t0, &cache->t0); + put_user(t1, &cache->t1); + } + return err ? -EFAULT : 0; +} -- cgit v1.2.3-71-gd317 From 5a1b3999d6cb7ab87f1f3b1700bc91839fd6fa29 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] x86: Some preparationary cleanup for stack trace - Remove unused all_contexts parameter No caller used it - Move skip argument into the structure (needed for followon patches) Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- arch/i386/kernel/stacktrace.c | 11 +++-------- arch/s390/kernel/stacktrace.c | 17 ++++++++--------- arch/x86_64/kernel/stacktrace.c | 14 +++++--------- include/linux/stacktrace.h | 7 ++++--- kernel/lockdep.c | 5 ++++- 5 files changed, 24 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c index e62a037ab399..ae3c32a87add 100644 --- a/arch/i386/kernel/stacktrace.c +++ b/arch/i386/kernel/stacktrace.c @@ -61,12 +61,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip, /* * Save stack-backtrace addresses into a stack_trace buffer. - * If all_contexts is set, all contexts (hardirq, softirq and process) - * are saved. If not set then only the current context is saved. */ -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { unsigned long ebp; unsigned long *stack = &ebp; @@ -85,10 +81,9 @@ void save_stack_trace(struct stack_trace *trace, struct thread_info *context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = save_context_stack(trace, skip, context, stack, ebp); + ebp = save_context_stack(trace, trace->skip, context, stack, ebp); stack = (unsigned long *)context->previous_esp; - if (!all_contexts || !stack || - trace->nr_entries >= trace->max_entries) + if (!stack || trace->nr_entries >= trace->max_entries) break; trace->entries[trace->nr_entries++] = ULONG_MAX; if (trace->nr_entries >= trace->max_entries) diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index de83f38288d0..d9428a0fc8fb 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -59,9 +59,7 @@ static inline unsigned long save_context_stack(struct stack_trace *trace, } } -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { register unsigned long sp asm ("15"); unsigned long orig_sp; @@ -69,22 +67,23 @@ void save_stack_trace(struct stack_trace *trace, sp &= PSW_ADDR_INSN; orig_sp = sp; - sp = save_context_stack(trace, &skip, sp, + sp = save_context_stack(trace, &trace->skip, sp, S390_lowcore.panic_stack - PAGE_SIZE, S390_lowcore.panic_stack); - if ((sp != orig_sp) && !all_contexts) + if ((sp != orig_sp) && !trace->all_contexts) return; - sp = save_context_stack(trace, &skip, sp, + sp = save_context_stack(trace, &trace->skip, sp, S390_lowcore.async_stack - ASYNC_SIZE, S390_lowcore.async_stack); - if ((sp != orig_sp) && !all_contexts) + if ((sp != orig_sp) && !trace->all_contexts) return; if (task) - save_context_stack(trace, &skip, sp, + save_context_stack(trace, &trace->skip, sp, (unsigned long) task_stack_page(task), (unsigned long) task_stack_page(task) + THREAD_SIZE); else - save_context_stack(trace, &skip, sp, S390_lowcore.thread_info, + save_context_stack(trace, &trace->skip, sp, + S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); return; } diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c index 32cf55eb9af8..1c022af8fe1e 100644 --- a/arch/x86_64/kernel/stacktrace.c +++ b/arch/x86_64/kernel/stacktrace.c @@ -109,9 +109,10 @@ out_restore: * Save stack-backtrace addresses into a stack_trace buffer: */ static inline unsigned long -save_context_stack(struct stack_trace *trace, unsigned int skip, +save_context_stack(struct stack_trace *trace, unsigned long stack, unsigned long stack_end) { + int skip = trace->skip; unsigned long addr; #ifdef CONFIG_FRAME_POINTER @@ -159,12 +160,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip, /* * Save stack-backtrace addresses into a stack_trace buffer. - * If all_contexts is set, all contexts (hardirq, softirq and process) - * are saved. If not set then only the current context is saved. */ -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { unsigned long stack = (unsigned long)&stack; int i, nr_stacks = 0, stacks_done[MAX_STACKS]; @@ -207,9 +204,8 @@ void save_stack_trace(struct stack_trace *trace, return; stacks_done[nr_stacks] = stack_end; - stack = save_context_stack(trace, skip, stack, stack_end); - if (!all_contexts || !stack || - trace->nr_entries >= trace->max_entries) + stack = save_context_stack(trace, stack, stack_end); + if (!stack || trace->nr_entries >= trace->max_entries) return; trace->entries[trace->nr_entries++] = ULONG_MAX; if (trace->nr_entries >= trace->max_entries) diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 9cc81e572224..50e2b01e517c 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -5,15 +5,16 @@ struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; + int skip; /* input argument: How many entries to skip */ + int all_contexts; /* input argument: if true do than one stack */ }; extern void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip); + struct task_struct *task); extern void print_stack_trace(struct stack_trace *trace, int spaces); #else -# define save_stack_trace(trace, task, all, skip) do { } while (0) +# define save_stack_trace(trace, task) do { } while (0) # define print_stack_trace(trace) do { } while (0) #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9bad17884513..900b4cb1a024 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -224,7 +224,10 @@ static int save_trace(struct stack_trace *trace) trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; trace->entries = stack_trace + nr_stack_trace_entries; - save_stack_trace(trace, NULL, 0, 3); + trace->skip = 3; + trace->all_contexts = 0; + + save_stack_trace(trace, NULL); trace->max_entries = trace->nr_entries; -- cgit v1.2.3-71-gd317 From d28c4393a7bf558538e9def269c1caeab6ec056f Mon Sep 17 00:00:00 2001 From: "Prasanna S.P" Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] x86: error_code is not safe for kprobes This patch moves the entry.S:error_entry to .kprobes.text section, since code marked unsafe for kprobes jumps directly to entry.S::error_entry, that must be marked unsafe as well. This patch also moves all the ".previous.text" asm directives to ".previous" for kprobes section. AK: Following a similar i386 patch from Chuck Ebbert AK: Also merged Jeremy's fix in. +From: Jeremy Fitzhardinge KPROBE_ENTRY does a .section .kprobes.text, and expects its users to do a .previous at the end of the function. Unfortunately, if any code within the function switches sections, for example .fixup, then the .previous ends up putting all subsequent code into .fixup. Worse, any subsequent .fixup code gets intermingled with the code its supposed to be fixing (which is also in .fixup). It's surprising this didn't cause more havok. The fix is to use .pushsection/.popsection, so this stuff nests properly. A further cleanup would be to get rid of all .section/.previous pairs, since they're inherently fragile. +From: Chuck Ebbert <76306.1226@compuserve.com> Because code marked unsafe for kprobes jumps directly to entry.S::error_code, that must be marked unsafe as well. The easiest way to do that is to move the page fault entry point to just before error_code and let it inherit the same section. Also moved all the ".previous" asm directives for kprobes sections to column 1 and removed ".text" from them. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 25 +++++++++++++------------ arch/x86_64/kernel/entry.S | 19 +++++++------------ include/linux/linkage.h | 6 +++++- 3 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 87f9f60b803b..ba22ec8fab54 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -591,11 +591,9 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error +KPROBE_ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: @@ -645,6 +643,7 @@ error_code: call *%edi jmp ret_from_exception CFI_ENDPROC +KPROBE_END(page_fault) ENTRY(coprocessor_error) RING0_INT_FRAME @@ -720,7 +719,8 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(debug) + /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -816,7 +816,7 @@ KPROBE_ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(int3) ENTRY(overflow) RING0_INT_FRAME @@ -881,7 +881,7 @@ KPROBE_ENTRY(general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text +KPROBE_END(general_protection) ENTRY(alignment_check) RING0_EC_FRAME @@ -890,13 +890,14 @@ ENTRY(alignment_check) jmp error_code CFI_ENDPROC -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault +ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_divide_error CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 2092f565aa87..780f9b26169f 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -819,7 +819,7 @@ paranoid_schedule\trace: * Exception entry point. This expects an error code/orig_rax on the stack * and the exception handler in %rax. */ -ENTRY(error_entry) +KPROBE_ENTRY(error_entry) _frame RDI /* rdi slot contains rax, oldrax contains error code */ cld @@ -903,7 +903,7 @@ error_kernelspace: cmpq $gs_change,RIP(%rsp) je error_swapgs jmp error_sti -END(error_entry) +KPROBE_END(error_entry) /* Reload gs selector with exception handling */ /* edi: new selector */ @@ -1025,8 +1025,7 @@ ENDPROC(execve) KPROBE_ENTRY(page_fault) errorentry do_page_fault -END(page_fault) - .previous .text +KPROBE_END(page_fault) ENTRY(coprocessor_error) zeroentry do_coprocessor_error @@ -1047,8 +1046,7 @@ KPROBE_ENTRY(debug) CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK paranoidexit -END(debug) - .previous .text +KPROBE_END(debug) /* runs on exception stack */ KPROBE_ENTRY(nmi) @@ -1062,8 +1060,7 @@ KPROBE_ENTRY(nmi) jmp paranoid_exit1 CFI_ENDPROC #endif -END(nmi) - .previous .text +KPROBE_END(nmi) KPROBE_ENTRY(int3) INTR_FRAME @@ -1072,8 +1069,7 @@ KPROBE_ENTRY(int3) paranoidentry do_int3, DEBUG_STACK jmp paranoid_exit1 CFI_ENDPROC -END(int3) - .previous .text +KPROBE_END(int3) ENTRY(overflow) zeroentry do_overflow @@ -1121,8 +1117,7 @@ END(stack_segment) KPROBE_ENTRY(general_protection) errorentry do_general_protection -END(general_protection) - .previous .text +KPROBE_END(general_protection) ENTRY(alignment_check) errorentry do_alignment_check diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 932021f872d5..6c9873f88287 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -35,9 +35,13 @@ #endif #define KPROBE_ENTRY(name) \ - .section .kprobes.text, "ax"; \ + .pushsection .kprobes.text, "ax"; \ ENTRY(name) +#define KPROBE_END(name) \ + END(name); \ + .popsection + #ifndef END #define END(name) \ .size name, .-name -- cgit v1.2.3-71-gd317 From e07e23e1fd3000289fc7ccc6c71879070d3b19e0 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] non lazy "sleazy" fpu implementation Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every* context switch a trap is taken for the first FPU use to restore the FPU context lazily. This is of course great for applications that have very sporadic or no FPU use (since then you avoid doing the expensive save/restore all the time). However for very frequent FPU users... you take an extra trap every context switch. The patch below adds a simple heuristic to this code: After 5 consecutive context switches of FPU use, the lazy behavior is disabled and the context gets restored every context switch. If the app indeed uses the FPU, the trap is avoided. (the chance of the 6th time slice using FPU after the previous 5 having done so are quite high obviously). After 256 switches, this is reset and lazy behavior is returned (until there are 5 consecutive ones again). The reason for this is to give apps that do longer bursts of FPU use still the lazy behavior back after some time. [akpm@osdl.org: place new task_struct field next to jit_keyring to save space] Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/x86_64/kernel/process.c | 10 ++++++++++ arch/x86_64/kernel/traps.c | 1 + include/asm-x86_64/i387.h | 5 ++++- include/linux/sched.h | 9 +++++++++ 4 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 6fbd19564e4e..9e9a70e50c72 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + /* we're going to use this soon, after a few expensive things */ + if (next_p->fpu_counter>5) + prefetch(&next->i387.fxsave); + /* * Reload esp0, LDT and the page table pointer: */ @@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) __switch_to_xtra(prev_p, next_p, tss); + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next_p->fpu_counter>5) + math_state_restore(); return prev_p; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28e53342f294..ffc40cff1e07 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -1136,6 +1136,7 @@ asmlinkage void math_state_restore(void) init_fpu(me); restore_fpu_checking(&me->thread.i387.fxsave); task_thread_info(me)->status |= TS_USEDFPU; + me->fpu_counter++; } void __init trap_init(void) diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h index cba8a3b0cded..60c0f4853fdb 100644 --- a/include/asm-x86_64/i387.h +++ b/include/asm-x86_64/i387.h @@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask; extern void mxcsr_feature_mask_init(void); extern void init_fpu(struct task_struct *child); extern int save_i387(struct _fpstate __user *buf); +extern asmlinkage void math_state_restore(void); /* * FPU lazy state save handling... @@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf); #define unlazy_fpu(tsk) do { \ if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu(tsk); \ + save_init_fpu(tsk); \ + else \ + tsk->fpu_counter = 0; \ } while (0) /* Ignore delayed exceptions from user space */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 34ed0d99b1bd..807556c5bcd2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,15 @@ struct task_struct { struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; int oomkilladj; /* OOM kill score adjustment (bit shift). */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock -- cgit v1.2.3-71-gd317 From 1bb4996bcebca1cde49d964b4e012699ce180e61 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] Move compiler check for modules to ia64 only Apparently IA64 needs it, but i386/x86-64 don't anymore since gcc 2.95 support was dropped. Nobody else on linux-arch requested keeping it generically Cc: tony.luck@intel.com Cc: kaos@sgi.com Signed-off-by: Andi Kleen --- include/asm-ia64/module.h | 3 ++- include/linux/vermagic.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/asm-ia64/module.h b/include/asm-ia64/module.h index 85c82bd819f2..d2da61e4c49b 100644 --- a/include/asm-ia64/module.h +++ b/include/asm-ia64/module.h @@ -28,7 +28,8 @@ struct mod_arch_specific { #define Elf_Ehdr Elf64_Ehdr #define MODULE_PROC_FAMILY "ia64" -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY \ + "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) #define ARCH_SHF_SMALL SHF_IA_64_SHORT diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 46919f9f5eb3..4d0909e53595 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -24,5 +24,5 @@ #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ - MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC \ - "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC + -- cgit v1.2.3-71-gd317 From 575400d1b483fbe9e03c68758059bfaf4e4768d1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 26 Sep 2006 10:52:38 +0200 Subject: [PATCH] i386: Fix the EDD code misparsing the command line The EDD code would scan the command line as a fixed array, without taking account of either whitespace, null-termination, the old command-line protocol, late overrides early, or the fact that the command line may not be reachable from INITSEG. This should fix those problems, and enable us to use a longer command line. Signed-off-by: H. Peter Anvin Signed-off-by: Andi Kleen --- arch/i386/boot/edd.S | 97 ++++++++++++++++++++++++++++++++++++++++------------ include/linux/edd.h | 1 + 2 files changed, 76 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S index 4b84ea216f2b..34321368011a 100644 --- a/arch/i386/boot/edd.S +++ b/arch/i386/boot/edd.S @@ -15,42 +15,95 @@ #include #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + +# It is assumed that %ds == INITSEG here + movb $0, (EDD_MBR_SIG_NR_BUF) movb $0, (EDDNR) -# Check the command line for two options: +# Check the command line for options: # edd=of disables EDD completely (edd=off) # edd=sk skips the MBR test (edd=skipmbr) +# edd=on re-enables EDD (edd=on) + pushl %esi - cmpl $0, %cs:cmd_line_ptr - jz done_cl + movw $edd_mbr_sig_start, %di # Default to edd=on + movl %cs:(cmd_line_ptr), %esi -# ds:esi has the pointer to the command line now - movl $(COMMAND_LINE_SIZE-7), %ecx -# loop through kernel command line one byte at a time -cl_loop: - cmpl $EDD_CL_EQUALS, (%si) + andl %esi, %esi + jz old_cl # Old boot protocol? + +# Convert to a real-mode pointer in fs:si + movl %esi, %eax + shrl $4, %eax + movw %ax, %fs + andw $0xf, %si + jmp have_cl_pointer + +# Old-style boot protocol? +old_cl: + push %ds # aka INITSEG + pop %fs + + cmpw $0xa33f, (0x20) + jne done_cl # No command line at all? + movw (0x22), %si # Pointer relative to INITSEG + +# fs:si has the pointer to the command line now +have_cl_pointer: + +# Loop through kernel command line one byte at a time. Just in +# case the loader is buggy and failed to null-terminate the command line +# terminate if we get close enough to the end of the segment that we +# cannot fit "edd=XX"... +cl_atspace: + cmpw $-5, %si # Watch for segment wraparound + jae done_cl + movl %fs:(%si), %eax + andb %al, %al # End of line? + jz done_cl + cmpl $EDD_CL_EQUALS, %eax jz found_edd_equals - incl %esi - loop cl_loop - jmp done_cl + cmpb $0x20, %al # <= space consider whitespace + ja cl_skipword + incw %si + jmp cl_atspace + +cl_skipword: + cmpw $-5, %si # Watch for segment wraparound + jae done_cl + movb %fs:(%si), %al # End of string? + andb %al, %al + jz done_cl + cmpb $0x20, %al + jbe cl_atspace + incw %si + jmp cl_skipword + found_edd_equals: # only looking at first two characters after equals - addl $4, %esi - cmpw $EDD_CL_OFF, (%si) # edd=of - jz do_edd_off - cmpw $EDD_CL_SKIP, (%si) # edd=sk - jz do_edd_skipmbr - jmp done_cl +# late overrides early on the command line, so keep going after finding something + movw %fs:4(%si), %ax + cmpw $EDD_CL_OFF, %ax # edd=of + je do_edd_off + cmpw $EDD_CL_SKIP, %ax # edd=sk + je do_edd_skipmbr + cmpw $EDD_CL_ON, %ax # edd=on + je do_edd_on + jmp cl_skipword do_edd_skipmbr: - popl %esi - jmp edd_start + movw $edd_start, %di + jmp cl_skipword do_edd_off: - popl %esi - jmp edd_done + movw $edd_done, %di + jmp cl_skipword +do_edd_on: + movw $edd_mbr_sig_start, %di + jmp cl_skipword + done_cl: popl %esi - + jmpw *%di # Read the first sector of each BIOS disk device and store the 4-byte signature edd_mbr_sig_start: diff --git a/include/linux/edd.h b/include/linux/edd.h index 162512b886f7..b2b3e68aa512 100644 --- a/include/linux/edd.h +++ b/include/linux/edd.h @@ -52,6 +52,7 @@ #define EDD_CL_EQUALS 0x3d646465 /* "edd=" */ #define EDD_CL_OFF 0x666f /* "of" for off */ #define EDD_CL_SKIP 0x6b73 /* "sk" for skipmbr */ +#define EDD_CL_ON 0x6e6f /* "on" for on */ #ifndef __ASSEMBLY__ -- cgit v1.2.3-71-gd317 From 0a4254058037eb172758961d0a5b94f4320a1425 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 26 Sep 2006 10:52:38 +0200 Subject: [PATCH] Add the canary field to the PDA area and the task struct This patch adds the per thread cookie field to the task struct and the PDA. Also it makes sure that the PDA value gets the new cookie value at context switch, and that a new task gets a new cookie at task creation time. Signed-off-by: Arjan van Ven Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen CC: Andi Kleen --- arch/x86_64/kernel/process.c | 8 ++++++++ include/asm-x86_64/pda.h | 7 ++++++- include/linux/sched.h | 5 +++++ kernel/fork.c | 5 +++++ 4 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 9e9a70e50c72..fba8dfeda67c 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -625,6 +625,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); +#ifdef CONFIG_CC_STACKPROTECTOR + write_pda(stack_canary, next_p->stack_canary); + /* + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement + */ + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); +#endif /* * Now maybe reload the debug registers and handle I/O bitmaps diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index 6794ffaae433..e7773e0af865 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h @@ -16,7 +16,12 @@ struct x8664_pda { unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts with -1 */ int cpunumber; /* 36 Logical CPU number */ - char *irqstackptr; /* 40 top of irqstack */ +#ifdef CONFIG_CC_STACKPROTECTOR + unsigned long stack_canary; /* 40 stack canary value */ + /* gcc-ABI: this canary MUST be at + offset 40!!! */ +#endif + char *irqstackptr; int nodenumber; /* number of current node */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 807556c5bcd2..9d4aa7f95bc8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -819,6 +819,11 @@ struct task_struct { unsigned did_exec:1; pid_t pid; pid_t tgid; + +#ifdef CONFIG_CC_STACKPROTECTOR + /* Canary value for the -fstack-protector gcc feature */ + unsigned long stack_canary; +#endif /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with diff --git a/kernel/fork.c b/kernel/fork.c index f9b014e3e700..a0dad84567c9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -175,6 +176,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->thread_info = ti; setup_thread_stack(tsk, orig); +#ifdef CONFIG_CC_STACKPROTECTOR + tsk->stack_canary = get_random_int(); +#endif + /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); -- cgit v1.2.3-71-gd317 From 3b171672831b9633c2ed8fa94805255cd4d5af19 Mon Sep 17 00:00:00 2001 From: Dmitriy Zavin Date: Tue, 26 Sep 2006 10:52:42 +0200 Subject: [PATCH] Add 64bit jiffies compares (for use with get_jiffies_64) The current time_before/time_after macros will fail typechecks when passed u64 values (as returned by get_jiffies_64()). On 64bit systems, this will just result in a warning about mismatching types without explicit casts, but since unsigned long and u64 (unsigned long long) are of same size, it will still work. On 32bit systems, a long is 32bits, so the value from get_jiffies_64() will be truncated by the cast and thus lose all the precision gained by 64bit jiffies. Signed-off-by: Dmitriy Zavin Signed-off-by: Andi Kleen --- include/linux/jiffies.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 329ebcffa106..c8d5f207c3d4 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -115,6 +115,21 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) +/* Same as above, but does so with platform independent 64bit types. + * These must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64() */ +#define time_after64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)(b) - (__s64)(a) < 0)) +#define time_before64(a,b) time_after64(b,a) + +#define time_after_eq64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)(a) - (__s64)(b) >= 0)) +#define time_before_eq64(a,b) time_after_eq64(b,a) + /* * Have the 32 bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. -- cgit v1.2.3-71-gd317 From 1739adea321788e380794c1072c810d445090bca Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 26 Aug 2006 03:05:17 -0300 Subject: V4L/DVB (4545): Add missing v4l2_buf_type to struct v4l2_sliced_vbi_cap. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index e3715d774197..d5746d470c73 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1135,7 +1135,8 @@ struct v4l2_sliced_vbi_cap (equals frame lines 313-336 for 625 line video standards, 263-286 for 525 line standards) */ __u16 service_lines[2][24]; - __u32 reserved[4]; /* must be 0 */ + enum v4l2_buf_type type; + __u32 reserved[3]; /* must be 0 */ }; struct v4l2_sliced_vbi_data -- cgit v1.2.3-71-gd317 From 616b8b639e6491cfa63f79238a5c6fbee383eb09 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 1 Sep 2006 18:36:48 -0300 Subject: V4L/DVB (4585): VIDIOC_G_SLICED_VBI_CAP now accepts a v4l2_buf_type, make it IOWR The VIDIOC_G_SLICED_VBI_CAP needs to receive the v4l2_buf_type field before it can return a result. Hence this ioctl must be IOWR, not IOR. Since this ioctl is still marked experimental we can make this change. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index d5746d470c73..44c59da26ed2 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1243,7 +1243,7 @@ struct v4l2_streamparm #define VIDIOC_G_PRIORITY _IOR ('V', 67, enum v4l2_priority) #define VIDIOC_S_PRIORITY _IOW ('V', 68, enum v4l2_priority) #if 1 -#define VIDIOC_G_SLICED_VBI_CAP _IOR ('V', 69, struct v4l2_sliced_vbi_cap) +#define VIDIOC_G_SLICED_VBI_CAP _IOWR ('V', 69, struct v4l2_sliced_vbi_cap) #endif #define VIDIOC_LOG_STATUS _IO ('V', 70) #define VIDIOC_G_EXT_CTRLS _IOWR ('V', 71, struct v4l2_ext_controls) -- cgit v1.2.3-71-gd317 From 632bbfeee4f042c05bc65150b4433a297d3fe387 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Mon, 25 Sep 2006 23:30:53 -0700 Subject: [PATCH] trigger a syntax error if percpu macros are incorrectly used get_cpu_var()/per_cpu()/__get_cpu_var() arguments must be simple identifiers. Otherwise the arch dependent implementations might break. This patch enforces the correct usage of the macros by producing a syntax error if the variable is not a simple identifier. Signed-off-by: Jan Blunck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/percpu.h | 4 +++- include/asm-s390/percpu.h | 20 +++++++++++--------- include/asm-x86_64/percpu.h | 12 +++++++++--- include/linux/percpu.h | 10 ++++++++-- 4 files changed, 31 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index e160e04290fb..6d45ee5472af 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -14,7 +14,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) +#define per_cpu(var, cpu) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) #define __get_cpu_var(var) per_cpu(var, smp_processor_id()) #define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id()) diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index 28b3517e787c..495ad99c7635 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -15,18 +15,20 @@ */ #if defined(__s390x__) && defined(MODULE) -#define __reloc_hide(var,offset) \ - (*({ unsigned long *__ptr; \ - asm ( "larl %0,per_cpu__"#var"@GOTENT" \ - : "=a" (__ptr) : "X" (per_cpu__##var) ); \ - (typeof(&per_cpu__##var))((*__ptr) + (offset)); })) +#define __reloc_hide(var,offset) (*({ \ + extern int simple_indentifier_##var(void); \ + unsigned long *__ptr; \ + asm ( "larl %0,per_cpu__"#var"@GOTENT" \ + : "=a" (__ptr) : "X" (per_cpu__##var) ); \ + (typeof(&per_cpu__##var))((*__ptr) + (offset)); })) #else -#define __reloc_hide(var, offset) \ - (*({ unsigned long __ptr; \ - asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ - (typeof(&per_cpu__##var)) (__ptr + (offset)); })) +#define __reloc_hide(var, offset) (*({ \ + extern int simple_indentifier_##var(void); \ + unsigned long __ptr; \ + asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ + (typeof(&per_cpu__##var)) (__ptr + (offset)); })) #endif diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 08dd9f9dda81..bffb2f886a51 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -21,9 +21,15 @@ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) -#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) -#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) +#define per_cpu(var, cpu) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); })) +#define __get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) +#define __raw_get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cb9039a21f2a..f926490a7d8b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -11,8 +11,14 @@ #define PERCPU_ENOUGH_ROOM 32768 #endif -/* Must be an lvalue. */ -#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) +/* + * Must be an lvalue. Since @var must be a simple identifier, + * we force a syntax error here if it isn't. + */ +#define get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + preempt_disable(); \ + &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() #ifdef CONFIG_SMP -- cgit v1.2.3-71-gd317 From a6ca1b99ed434f3fb41bbed647ed36c0420501e5 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 25 Sep 2006 23:30:55 -0700 Subject: [PATCH] update to the kernel kmap/kunmap API Give non-highmem architectures access to the kmap API for the purposes of overriding (this is what the attached patch does). The proposal is that we should now require all architectures with coherence issues to manage data coherence via the kmap/kunmap API. Thus driver writers never have to write code like kmap(page) modify data in page flush_kernel_dcache_page(page) kunmap(page) instead, kmap/kunmap will manage the coherence and driver (and filesystem) writers don't need to worry about how to flush between kmap and kunmap. For most architectures, the page only needs to be flushed if it was actually written to *and* there are user mappings of it, so the best implementation looks to be: clear the page dirty pte bit in the kernel page tables on kmap and on kunmap, check page->mappings for user maps, and then the dirty bit, and only flush if it both has user mappings and is dirty. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 85ce7ef9a512..42620e723abb 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -29,6 +29,7 @@ unsigned int nr_free_highpages(void); static inline unsigned int nr_free_highpages(void) { return 0; } +#ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { might_sleep(); @@ -41,6 +42,7 @@ static inline void *kmap(struct page *page) #define kunmap_atomic(addr, idx) do { } while (0) #define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) +#endif #endif /* CONFIG_HIGHMEM */ -- cgit v1.2.3-71-gd317 From 725d704ecaca4a43f067092c140d4f3271cf2856 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 25 Sep 2006 23:30:55 -0700 Subject: [PATCH] mm: VM_BUG_ON Introduce a VM_BUG_ON, which is turned on with CONFIG_DEBUG_VM. Use this in the lightweight, inline refcounting functions; PageLRU and PageActive checks in vmscan, because they're pretty well confined to vmscan. And in page allocate/free fastpaths which can be the hottest parts of the kernel for kbuilds. Unlike BUG_ON, VM_BUG_ON must not be used to execute statements with side-effects, and should not be used outside core mm code. Signed-off-by: Nick Piggin Cc: Hugh Dickins Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++++++++- mm/internal.h | 4 ++-- mm/page_alloc.c | 23 +++++++++++------------ mm/swap.c | 12 ++++++------ mm/vmscan.c | 16 ++++++++-------- 5 files changed, 36 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 224178a000d2..7d20b25c58fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -278,6 +278,12 @@ struct page { */ #include +#ifdef CONFIG_DEBUG_VM +#define VM_BUG_ON(cond) BUG_ON(cond) +#else +#define VM_BUG_ON(condition) do { } while(0) +#endif + /* * Methods to modify the page usage count. * @@ -297,7 +303,7 @@ struct page { */ static inline int put_page_testzero(struct page *page) { - BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON(atomic_read(&page->_count) == 0); return atomic_dec_and_test(&page->_count); } @@ -307,6 +313,7 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { + VM_BUG_ON(PageCompound(page)); return atomic_inc_not_zero(&page->_count); } @@ -323,6 +330,7 @@ static inline void get_page(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + VM_BUG_ON(atomic_read(&page->_count) == 0); atomic_inc(&page->_count); } diff --git a/mm/internal.h b/mm/internal.h index d20e3cc4aef0..d527b80b292f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); - BUG_ON(atomic_read(&page->_count)); + VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); + VM_BUG_ON(atomic_read(&page->_count)); set_page_count(page, 1); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8a52ba9fe693..4b33878e9488 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -127,7 +127,6 @@ static int bad_range(struct zone *zone, struct page *page) return 0; } - #else static inline int bad_range(struct zone *zone, struct page *page) { @@ -218,12 +217,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; - BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); + VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); /* * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO * and __GFP_HIGHMEM from hard or soft interrupt context. */ - BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); + VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); for (i = 0; i < (1 << order); i++) clear_highpage(page + i); } @@ -347,8 +346,8 @@ static inline void __free_one_page(struct page *page, page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - BUG_ON(page_idx & (order_size - 1)); - BUG_ON(bad_range(zone, page)); + VM_BUG_ON(page_idx & (order_size - 1)); + VM_BUG_ON(bad_range(zone, page)); zone->free_pages += order_size; while (order < MAX_ORDER-1) { @@ -421,7 +420,7 @@ static void free_pages_bulk(struct zone *zone, int count, while (count--) { struct page *page; - BUG_ON(list_empty(list)); + VM_BUG_ON(list_empty(list)); page = list_entry(list->prev, struct page, lru); /* have to delete it as __free_one_page list manipulates */ list_del(&page->lru); @@ -512,7 +511,7 @@ static inline void expand(struct zone *zone, struct page *page, area--; high--; size >>= 1; - BUG_ON(bad_range(zone, &page[size])); + VM_BUG_ON(bad_range(zone, &page[size])); list_add(&page[size].lru, &area->free_list); area->nr_free++; set_page_order(&page[size], high); @@ -761,8 +760,8 @@ void split_page(struct page *page, unsigned int order) { int i; - BUG_ON(PageCompound(page)); - BUG_ON(!page_count(page)); + VM_BUG_ON(PageCompound(page)); + VM_BUG_ON(!page_count(page)); for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); } @@ -809,7 +808,7 @@ again: local_irq_restore(flags); put_cpu(); - BUG_ON(bad_range(zone, page)); + VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; return page; @@ -1083,7 +1082,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) * get_zeroed_page() returns a 32-bit address, which cannot represent * a highmem page */ - BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); + VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); page = alloc_pages(gfp_mask | __GFP_ZERO, 0); if (page) @@ -1116,7 +1115,7 @@ EXPORT_SYMBOL(__free_pages); fastcall void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { - BUG_ON(!virt_addr_valid((void *)addr)); + VM_BUG_ON(!virt_addr_valid((void *)addr)); __free_pages(virt_to_page((void *)addr), order); } } diff --git a/mm/swap.c b/mm/swap.c index 687686a61f7c..600235e43704 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -233,7 +233,7 @@ void fastcall __page_cache_release(struct page *page) struct zone *zone = page_zone(page); spin_lock_irqsave(&zone->lru_lock, flags); - BUG_ON(!PageLRU(page)); + VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); del_page_from_lru(zone, page); spin_unlock_irqrestore(&zone->lru_lock, flags); @@ -284,7 +284,7 @@ void release_pages(struct page **pages, int nr, int cold) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - BUG_ON(!PageLRU(page)); + VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); del_page_from_lru(zone, page); } @@ -337,7 +337,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); if (put_page_testzero(page)) pagevec_add(&pages_to_free, page); } @@ -364,7 +364,7 @@ void __pagevec_lru_add(struct pagevec *pvec) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); add_page_to_inactive_list(zone, page); } @@ -391,9 +391,9 @@ void __pagevec_lru_add_active(struct pagevec *pvec) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - BUG_ON(PageActive(page)); + VM_BUG_ON(PageActive(page)); SetPageActive(page); add_page_to_active_list(zone, page); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 5d4c4d02254d..41a3da3d6ccc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -440,7 +440,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (TestSetPageLocked(page)) goto keep; - BUG_ON(PageActive(page)); + VM_BUG_ON(PageActive(page)); sc->nr_scanned++; @@ -564,7 +564,7 @@ keep_locked: unlock_page(page); keep: list_add(&page->lru, &ret_pages); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); } list_splice(&ret_pages, page_list); if (pagevec_count(&freed_pvec)) @@ -603,7 +603,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - BUG_ON(!PageLRU(page)); + VM_BUG_ON(!PageLRU(page)); list_del(&page->lru); target = src; @@ -674,7 +674,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, */ while (!list_empty(&page_list)) { page = lru_to_page(&page_list); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); list_del(&page->lru); if (PageActive(page)) @@ -797,9 +797,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, while (!list_empty(&l_inactive)) { page = lru_to_page(&l_inactive); prefetchw_prev_lru_page(page, &l_inactive, flags); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - BUG_ON(!PageActive(page)); + VM_BUG_ON(!PageActive(page)); ClearPageActive(page); list_move(&page->lru, &zone->inactive_list); @@ -827,9 +827,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, while (!list_empty(&l_active)) { page = lru_to_page(&l_active); prefetchw_prev_lru_page(page, &l_active, flags); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - BUG_ON(!PageActive(page)); + VM_BUG_ON(!PageActive(page)); list_move(&page->lru, &zone->active_list); pgmoved++; if (!pagevec_add(&pvec, page)) { -- cgit v1.2.3-71-gd317 From d08b3851da41d0ee60851f2c75b118e1f7a5fc89 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Sep 2006 23:30:57 -0700 Subject: [PATCH] mm: tracking shared dirty pages Tracking of dirty pages in shared writeable mmap()s. The idea is simple: write protect clean shared writeable pages, catch the write-fault, make writeable and set dirty. On page write-back clean all the PTE dirty bits and write protect them once again. The implementation is a tad harder, mainly because the default backing_dev_info capabilities were too loosely maintained. Hence it is not enough to test the backing_dev_info for cap_account_dirty. The current heuristic is as follows, a VMA is eligible when: - its shared writeable (vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED) - it is not a 'special' mapping (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) == 0 - the backing_dev_info is cap_account_dirty mapping_cap_account_dirty(vma->vm_file->f_mapping) - f_op->mmap() didn't change the default page protection Page from remap_pfn_range() are explicitly excluded because their COW semantics are already horrid enough (see vm_normal_page() in do_wp_page()) and because they don't have a backing store anyway. mprotect() is taught about the new behaviour as well. However it overrides the last condition. Cleaning the pages on write-back is done with page_mkclean() a new rmap call. It can be called on any page, but is currently only implemented for mapped pages, if the page is found the be of a VMA that accounts dirty pages it will also wrprotect the PTE. Finally, in fs/buffers.c:try_to_free_buffers(); remove clear_page_dirty() from under ->private_lock. This seems to be safe, since ->private_lock is used to serialize access to the buffers, not the page itself. This is needed because clear_page_dirty() will call into page_mkclean() and would thereby violate locking order. [dhowells@redhat.com: Provide a page_mkclean() implementation for NOMMU] Signed-off-by: Peter Zijlstra Cc: Hugh Dickins Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- include/linux/mm.h | 34 +++++++++++++++++++++++++++ include/linux/rmap.h | 14 +++++++++++ mm/memory.c | 29 ++++++++++++++++++----- mm/mmap.c | 10 ++++---- mm/mprotect.c | 21 +++++++---------- mm/page-writeback.c | 17 ++++++++++---- mm/rmap.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 162 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 71649ef9b658..3b6d701073e7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page) spin_lock(&mapping->private_lock); ret = drop_buffers(page, &buffers_to_free); + spin_unlock(&mapping->private_lock); if (ret) { /* * If the filesystem writes its buffers by hand (eg ext3) @@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page) */ clear_page_dirty(page); } - spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d20b25c58fc..449841413cf1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -15,6 +15,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -810,6 +811,39 @@ struct shrinker; extern struct shrinker *set_shrinker(int, shrinker_t); extern void remove_shrinker(struct shrinker *shrinker); +/* + * Some shared mappigns will want the pages marked read-only + * to track write events. If so, we'll downgrade vm_page_prot + * to the private version (using protection_map[] without the + * VM_SHARED bit). + */ +static inline int vma_wants_writenotify(struct vm_area_struct *vma) +{ + unsigned int vm_flags = vma->vm_flags; + + /* If it was private or non-writable, the write bit is already clear */ + if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) + return 0; + + /* The backer wishes to know when pages are first written to? */ + if (vma->vm_ops && vma->vm_ops->page_mkwrite) + return 1; + + /* The open routine did something to the protections already? */ + if (pgprot_val(vma->vm_page_prot) != + pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) + return 0; + + /* Specialty mapping? */ + if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) + return 0; + + /* Can the mapping track the dirty pages? */ + return vma->vm_file && vma->vm_file->f_mapping && + mapping_cap_account_dirty(vma->vm_file->f_mapping); +} + extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf97b0900014..db2c1df4fef9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *, */ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +/* + * Cleans the PTEs of shared mappings. + * (and since clean PTEs should also be readonly, write protects them too) + * + * returns the number of cleaned PTEs. + */ +int page_mkclean(struct page *); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); #define page_referenced(page,l) TestClearPageReferenced(page) #define try_to_unmap(page, refs) SWAP_FAIL +static inline int page_mkclean(struct page *page) +{ + return 0; +} + + #endif /* CONFIG_MMU */ /* diff --git a/mm/memory.c b/mm/memory.c index 109e9866237e..fa941b169071 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1458,14 +1458,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *old_page, *new_page; pte_t entry; - int reuse, ret = VM_FAULT_MINOR; + int reuse = 0, ret = VM_FAULT_MINOR; + struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) goto gotten; - if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == - (VM_SHARED|VM_WRITE))) { + /* + * Only catch write-faults on shared writable pages, read-only + * shared pages can get COWed by get_user_pages(.write=1, .force=1). + */ + if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED))) { if (vma->vm_ops && vma->vm_ops->page_mkwrite) { /* * Notify the address space that the page is about to @@ -1494,13 +1499,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_same(*page_table, orig_pte)) goto unlock; } - + dirty_page = old_page; + get_page(dirty_page); reuse = 1; } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { reuse = can_share_swap_page(old_page); unlock_page(old_page); - } else { - reuse = 0; } if (reuse) { @@ -1566,6 +1570,10 @@ gotten: page_cache_release(old_page); unlock: pte_unmap_unlock(page_table, ptl); + if (dirty_page) { + set_page_dirty(dirty_page); + put_page(dirty_page); + } return ret; oom: if (old_page) @@ -2098,6 +2106,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int sequence = 0; int ret = VM_FAULT_MINOR; int anon = 0; + struct page *dirty_page = NULL; pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); @@ -2192,6 +2201,10 @@ retry: } else { inc_mm_counter(mm, file_rss); page_add_file_rmap(new_page); + if (write_access) { + dirty_page = new_page; + get_page(dirty_page); + } } } else { /* One of our sibling threads was faster, back out. */ @@ -2204,6 +2217,10 @@ retry: lazy_mmu_prot_update(entry); unlock: pte_unmap_unlock(page_table, ptl); + if (dirty_page) { + set_page_dirty(dirty_page); + put_page(dirty_page); + } return ret; oom: page_cache_release(new_page); diff --git a/mm/mmap.c b/mm/mmap.c index d799d896d74a..8507ee9cd573 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1105,12 +1105,6 @@ munmap_back: goto free_vma; } - /* Don't make the VMA automatically writable if it's shared, but the - * backer wishes to know when pages are first written to */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) - vma->vm_page_prot = - protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; - /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) * that memory reservation must be checked; but that reservation @@ -1128,6 +1122,10 @@ munmap_back: pgoff = vma->vm_pgoff; vm_flags = vma->vm_flags; + if (vma_wants_writenotify(vma)) + vma->vm_page_prot = + protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + if (!file || !vma_merge(mm, prev, addr, vma->vm_end, vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { file = vma->vm_file; diff --git a/mm/mprotect.c b/mm/mprotect.c index 638edabaff71..367b7f6c0637 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -123,8 +123,6 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; unsigned long charged = 0; - unsigned int mask; - pgprot_t newprot; pgoff_t pgoff; int error; @@ -176,24 +174,21 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, } success: - /* Don't make the VMA automatically writable if it's shared, but the - * backer wishes to know when pages are first written to */ - mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED; - if (vma->vm_ops && vma->vm_ops->page_mkwrite) - mask &= ~VM_SHARED; - - newprot = protection_map[newflags & mask]; - /* * vm_flags and vm_page_prot are protected by the mmap_sem * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = newprot; + vma->vm_page_prot = protection_map[newflags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + if (vma_wants_writenotify(vma)) + vma->vm_page_prot = protection_map[newflags & + (VM_READ|VM_WRITE|VM_EXEC)]; + if (is_vm_hugetlb_page(vma)) - hugetlb_change_protection(vma, start, end, newprot); + hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else - change_protection(vma, start, end, newprot); + change_protection(vma, start, end, vma->vm_page_prot); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 77a0bc4e261a..1c87430b7a25 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -550,7 +551,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) return 0; wbc->for_writepages = 1; if (mapping->a_ops->writepages) - ret = mapping->a_ops->writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); wbc->for_writepages = 0; @@ -712,9 +713,15 @@ int test_clear_page_dirty(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - if (mapping_cap_account_dirty(mapping)) - __dec_zone_page_state(page, NR_FILE_DIRTY); write_unlock_irqrestore(&mapping->tree_lock, flags); + /* + * We can continue to use `mapping' here because the + * page is locked, which pins the address_space + */ + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); + dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } write_unlock_irqrestore(&mapping->tree_lock, flags); @@ -744,8 +751,10 @@ int clear_page_dirty_for_io(struct page *page) if (mapping) { if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } return 0; diff --git a/mm/rmap.c b/mm/rmap.c index 40158b59729e..e2155d791d99 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked) return referenced; } +static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte, entry; + spinlock_t *ptl; + int ret = 0; + + address = vma_address(page, vma); + if (address == -EFAULT) + goto out; + + pte = page_check_address(page, mm, address, &ptl); + if (!pte) + goto out; + + if (!pte_dirty(*pte) && !pte_write(*pte)) + goto unlock; + + entry = ptep_get_and_clear(mm, address, pte); + entry = pte_mkclean(entry); + entry = pte_wrprotect(entry); + ptep_establish(vma, address, pte, entry); + lazy_mmu_prot_update(entry); + ret = 1; + +unlock: + pte_unmap_unlock(pte, ptl); +out: + return ret; +} + +static int page_mkclean_file(struct address_space *mapping, struct page *page) +{ + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct vm_area_struct *vma; + struct prio_tree_iter iter; + int ret = 0; + + BUG_ON(PageAnon(page)); + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + if (vma->vm_flags & VM_SHARED) + ret += page_mkclean_one(page, vma); + } + spin_unlock(&mapping->i_mmap_lock); + return ret; +} + +int page_mkclean(struct page *page) +{ + int ret = 0; + + BUG_ON(!PageLocked(page)); + + if (page_mapped(page)) { + struct address_space *mapping = page_mapping(page); + if (mapping) + ret = page_mkclean_file(mapping, page); + } + + return ret; +} + /** * page_set_anon_rmap - setup new anonymous rmap * @page: the page to add the mapping to -- cgit v1.2.3-71-gd317 From edc79b2a46ed854595e40edcf3f8b37f9f14aa3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Sep 2006 23:30:58 -0700 Subject: [PATCH] mm: balance dirty pages Now that we can detect writers of shared mappings, throttle them. Avoids OOM by surprise. Signed-off-by: Peter Zijlstra Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 + mm/memory.c | 5 +++-- mm/page-writeback.c | 10 ++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0422036af4eb..56a23a0e7f2e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); +void set_page_dirty_balance(struct page *page); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/mm/memory.c b/mm/memory.c index fa941b169071..dd7d7fc5ed60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1571,7 +1572,7 @@ gotten: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { - set_page_dirty(dirty_page); + set_page_dirty_balance(dirty_page); put_page(dirty_page); } return ret; @@ -2218,7 +2219,7 @@ retry: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { - set_page_dirty(dirty_page); + set_page_dirty_balance(dirty_page); put_page(dirty_page); } return ret; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 1c87430b7a25..b9f4c6f1be86 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -244,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping) pdflush_operation(background_writeout, 0); } +void set_page_dirty_balance(struct page *page) +{ + if (set_page_dirty(page)) { + struct address_space *mapping = page_mapping(page); + + if (mapping) + balance_dirty_pages_ratelimited(mapping); + } +} + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied -- cgit v1.2.3-71-gd317 From b221385bc41d6789edde3d2fa0cb20d5045730eb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 25 Sep 2006 23:31:02 -0700 Subject: [PATCH] mm/: make functions static This patch makes the following needlessly global functions static: - slab.c: kmem_find_general_cachep() - swap.c: __page_cache_release() - vmalloc.c: __vmalloc_node() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 -- include/linux/slab.h | 2 -- include/linux/vmalloc.h | 2 -- mm/slab.c | 3 +-- mm/swap.c | 39 +++++++++++++++++++-------------------- mm/vmalloc.c | 8 +++++--- 6 files changed, 25 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 449841413cf1..45678b036955 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -318,8 +318,6 @@ static inline int get_page_unless_zero(struct page *page) return atomic_inc_not_zero(&page->_count); } -extern void FASTCALL(__page_cache_release(struct page *)); - static inline int page_count(struct page *page) { if (unlikely(PageCompound(page))) diff --git a/include/linux/slab.h b/include/linux/slab.h index 45ad55b70d1c..193c03c547ec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -223,7 +222,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); /* SLOB allocator routines */ void kmem_cache_init(void); -struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags); struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, unsigned long, void (*)(void *, struct kmem_cache *, unsigned long), diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 71b6363caaaf..dee88c6b6fa7 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); -extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, - pgprot_t prot, int node); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/mm/slab.c b/mm/slab.c index 21ba06035700..5870bcbd33cf 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -768,11 +768,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, return csizep->cs_cachep; } -struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) +static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) { return __find_general_cachep(size, gfpflags); } -EXPORT_SYMBOL(kmem_find_general_cachep); static size_t slab_mgmt_size(size_t nr_objs, size_t align) { diff --git a/mm/swap.c b/mm/swap.c index 600235e43704..2e0e871f542f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -34,6 +34,25 @@ /* How many pages do we try to swap or page in/out together? */ int page_cluster; +/* + * This path almost never happens for VM activity - pages are normally + * freed via pagevecs. But it gets used by networking. + */ +static void fastcall __page_cache_release(struct page *page) +{ + if (PageLRU(page)) { + unsigned long flags; + struct zone *zone = page_zone(page); + + spin_lock_irqsave(&zone->lru_lock, flags); + VM_BUG_ON(!PageLRU(page)); + __ClearPageLRU(page); + del_page_from_lru(zone, page); + spin_unlock_irqrestore(&zone->lru_lock, flags); + } + free_hot_page(page); +} + static void put_compound_page(struct page *page) { page = (struct page *)page_private(page); @@ -222,26 +241,6 @@ int lru_add_drain_all(void) } #endif -/* - * This path almost never happens for VM activity - pages are normally - * freed via pagevecs. But it gets used by networking. - */ -void fastcall __page_cache_release(struct page *page) -{ - if (PageLRU(page)) { - unsigned long flags; - struct zone *zone = page_zone(page); - - spin_lock_irqsave(&zone->lru_lock, flags); - VM_BUG_ON(!PageLRU(page)); - __ClearPageLRU(page); - del_page_from_lru(zone, page); - spin_unlock_irqrestore(&zone->lru_lock, flags); - } - free_hot_page(page); -} -EXPORT_SYMBOL(__page_cache_release); - /* * Batched page_cache_release(). Decrement the reference count on all the * passed pages. If it fell to zero then remove the page from the LRU and diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 266162d2ba28..9aad8b0cc6ee 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -24,6 +24,9 @@ DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; +static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, + int node); + static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte; @@ -478,8 +481,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. */ -void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, - int node) +static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, + int node) { struct vm_struct *area; @@ -493,7 +496,6 @@ void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, return __vmalloc_area_node(area, gfp_mask, prot, node); } -EXPORT_SYMBOL(__vmalloc_node); void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { -- cgit v1.2.3-71-gd317 From 2d1a07d487d8b36658404839cdf03a974968cefd Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 25 Sep 2006 23:31:03 -0700 Subject: [PATCH] bootmem: remove useless __init in header file __init in headers is pretty useless because the compiler doesn't check it, and they get out of sync relatively frequently. So if you see an __init in a header file, it's quite unreliable and you need to check the definition anyway. Signed-off-by: Franck Bui-Huu Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index e319c649e4fd..c7124d4ce7cf 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -41,23 +41,23 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; -extern unsigned long __init bootmem_bootmap_pages (unsigned long); -extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); -extern void __init free_bootmem (unsigned long addr, unsigned long size); -extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_low(unsigned long size, +extern unsigned long bootmem_bootmap_pages (unsigned long); +extern unsigned long init_bootmem (unsigned long addr, unsigned long memend); +extern void free_bootmem (unsigned long addr, unsigned long size); +extern void * __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); +extern void * __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); +extern void * __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, +extern void * __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, +extern void * __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern void __init reserve_bootmem (unsigned long addr, unsigned long size); +extern void reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ @@ -67,12 +67,12 @@ extern void __init reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_low((x), PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -extern unsigned long __init free_all_bootmem (void); -extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); -extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); -extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); -extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); +extern unsigned long free_all_bootmem (void); +extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); +extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); +extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); +extern unsigned long free_all_bootmem_node (pg_data_t *pgdat); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) @@ -94,14 +94,14 @@ static inline void *alloc_remap(int nid, unsigned long size) extern unsigned long __meminitdata nr_kernel_pages; extern unsigned long nr_all_pages; -extern void *__init alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long limit); +extern void * alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long limit); #define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ #define HASH_EARLY 0x00000002 /* Allocating during early boot? */ -- cgit v1.2.3-71-gd317 From 71fb2e8f8753b66b1f4295aa264a2eb4e69381e8 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 25 Sep 2006 23:31:05 -0700 Subject: [PATCH] bootmem: remove useless parentheses in bootmem header file Signed-off-by: Franck Bui-Huu Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index c7124d4ce7cf..fa2523f47628 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -59,13 +59,13 @@ extern void * __alloc_bootmem_core(struct bootmem_data *bdata, #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE extern void reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ - __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ - __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0) + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low((x), PAGE_SIZE, 0) + __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern unsigned long free_all_bootmem (void); extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); @@ -75,11 +75,11 @@ extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned lo extern unsigned long free_all_bootmem_node (pg_data_t *pgdat); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ - __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0) + __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP -- cgit v1.2.3-71-gd317 From bb0923a66820718f636736b22ce47372f79e3400 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 25 Sep 2006 23:31:05 -0700 Subject: [PATCH] bootmem: limit to 80 columns width Signed-off-by: Franck Bui-Huu Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 42 +++++++++++++++++++++++++++++------------- mm/bootmem.c | 28 ++++++++++++++++++---------- 2 files changed, 47 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index fa2523f47628..1e8dddfb3083 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -44,18 +44,24 @@ typedef struct bootmem_data { extern unsigned long bootmem_bootmap_pages (unsigned long); extern unsigned long init_bootmem (unsigned long addr, unsigned long memend); extern void free_bootmem (unsigned long addr, unsigned long size); -extern void * __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); -extern void * __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); +extern void * __alloc_bootmem (unsigned long size, + unsigned long align, + unsigned long goal); +extern void * __alloc_bootmem_nopanic (unsigned long size, + unsigned long align, + unsigned long goal); extern void * __alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); + unsigned long align, + unsigned long goal); extern void * __alloc_bootmem_low_node(pg_data_t *pgdat, - unsigned long size, - unsigned long align, - unsigned long goal); + unsigned long size, + unsigned long align, + unsigned long goal); extern void * __alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, unsigned long goal, - unsigned long limit); + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE extern void reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ @@ -68,10 +74,20 @@ extern void reserve_bootmem (unsigned long addr, unsigned long size); __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern unsigned long free_all_bootmem (void); -extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); -extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); -extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); +extern void * __alloc_bootmem_node (pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern unsigned long init_bootmem_node (pg_data_t *pgdat, + unsigned long freepfn, + unsigned long startpfn, + unsigned long endpfn); +extern void reserve_bootmem_node (pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size); +extern void free_bootmem_node (pg_data_t *pgdat, + unsigned long addr, + unsigned long size); extern unsigned long free_all_bootmem_node (pg_data_t *pgdat); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ diff --git a/mm/bootmem.c b/mm/bootmem.c index 70f1528a3c8a..9083f5029673 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -102,7 +102,8 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, * might be used for boot-time allocations - or it might get added * to the free page pool later on. */ -static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { unsigned long i; /* @@ -127,7 +128,8 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add } } -static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { unsigned long i; unsigned long start; @@ -355,17 +357,20 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) return total; } -unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) +unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, + unsigned long startpfn, unsigned long endpfn) { return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); } -void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { reserve_bootmem_core(pgdat->bdata, physaddr, size); } -void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { free_bootmem_core(pgdat->bdata, physaddr, size); } @@ -399,7 +404,8 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, + unsigned long goal) { bootmem_data_t *bdata; void *ptr; @@ -410,7 +416,8 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, u return NULL; } -void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem(unsigned long size, unsigned long align, + unsigned long goal) { void *mem = __alloc_bootmem_nopanic(size,align,goal); if (mem) @@ -424,8 +431,8 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned } -void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal) +void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) { void *ptr; @@ -438,7 +445,8 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigne #define LOW32LIMIT 0xffffffff -void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, + unsigned long goal) { bootmem_data_t *bdata; void *ptr; -- cgit v1.2.3-71-gd317 From e786e86a542ccc1133f333402526ad00b9c088ae Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 25 Sep 2006 23:31:06 -0700 Subject: [PATCH] bootmem: remove useless headers inclusions Signed-off-by: Franck Bui-Huu Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 5 +---- mm/bootmem.c | 10 +++------- 2 files changed, 4 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 1e8dddfb3083..b2067e4a4486 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -4,11 +4,8 @@ #ifndef _LINUX_BOOTMEM_H #define _LINUX_BOOTMEM_H -#include -#include -#include -#include #include +#include /* * simple boot-time physical memory area allocator. diff --git a/mm/bootmem.c b/mm/bootmem.c index 9083f5029673..e136c086fd98 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -8,17 +8,13 @@ * free memory collector. It's used to deal with reserved * system memory and memory holes as well. */ - -#include -#include -#include -#include #include #include -#include #include -#include + +#include #include + #include "internal.h" /* -- cgit v1.2.3-71-gd317 From f71bf0cac730ccb5ebcdf21747db75ae0445ccde Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 25 Sep 2006 23:31:08 -0700 Subject: [PATCH] bootmem: miscellaneous coding style fixes It fixes various coding style issues, specially when spaces are useless. For example '*' go next to the function name. Signed-off-by: Franck Bui-Huu Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 95 +++++++++++++++++++++++++------------------------ mm/bootmem.c | 81 ++++++++++++++++++++++------------------- 2 files changed, 93 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index b2067e4a4486..31e9abb6d977 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -38,29 +38,30 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; -extern unsigned long bootmem_bootmap_pages (unsigned long); -extern unsigned long init_bootmem (unsigned long addr, unsigned long memend); -extern void free_bootmem (unsigned long addr, unsigned long size); -extern void * __alloc_bootmem (unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __alloc_bootmem_nopanic (unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __alloc_bootmem_low(unsigned long size, +extern unsigned long bootmem_bootmap_pages(unsigned long); +extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); +extern void free_bootmem(unsigned long addr, unsigned long size); +extern void *__alloc_bootmem(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, unsigned long align, - unsigned long goal); -extern void * __alloc_bootmem_low_node(pg_data_t *pgdat, - unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, - unsigned long align, - unsigned long goal, - unsigned long limit); + unsigned long goal, + unsigned long limit); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern void reserve_bootmem (unsigned long addr, unsigned long size); +extern void reserve_bootmem(unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ @@ -70,22 +71,24 @@ extern void reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -extern unsigned long free_all_bootmem (void); -extern void * __alloc_bootmem_node (pg_data_t *pgdat, - unsigned long size, - unsigned long align, - unsigned long goal); -extern unsigned long init_bootmem_node (pg_data_t *pgdat, - unsigned long freepfn, - unsigned long startpfn, - unsigned long endpfn); -extern void reserve_bootmem_node (pg_data_t *pgdat, - unsigned long physaddr, - unsigned long size); -extern void free_bootmem_node (pg_data_t *pgdat, - unsigned long addr, - unsigned long size); -extern unsigned long free_all_bootmem_node (pg_data_t *pgdat); + +extern unsigned long free_all_bootmem(void); +extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); +extern void *__alloc_bootmem_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern unsigned long init_bootmem_node(pg_data_t *pgdat, + unsigned long freepfn, + unsigned long startpfn, + unsigned long endpfn); +extern void reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size); +extern void free_bootmem_node(pg_data_t *pgdat, + unsigned long addr, + unsigned long size); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) @@ -102,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size) { return NULL; } -#endif +#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ extern unsigned long __meminitdata nr_kernel_pages; extern unsigned long nr_all_pages; -extern void * alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long limit); +extern void *alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long limit); #define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ #define HASH_EARLY 0x00000002 /* Allocating during early boot? */ diff --git a/mm/bootmem.c b/mm/bootmem.c index 23c3317c09ed..f0f85fa713ca 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -38,7 +38,7 @@ unsigned long saved_max_pfn; #endif /* return the number of _pages_ that will be allocated for the boot bitmap */ -unsigned long __init bootmem_bootmap_pages (unsigned long pages) +unsigned long __init bootmem_bootmap_pages(unsigned long pages) { unsigned long mapsize; @@ -48,12 +48,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages) return mapsize; } + /* * link bdata in order */ static void __init link_bootmem(bootmem_data_t *bdata) { bootmem_data_t *ent; + if (list_empty(&bdata_list)) { list_add(&bdata->list, &bdata_list); return; @@ -66,7 +68,6 @@ static void __init link_bootmem(bootmem_data_t *bdata) } } list_add_tail(&bdata->list, &bdata_list); - return; } /* @@ -85,7 +86,7 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata) /* * Called once to set up the allocator itself. */ -static unsigned long __init init_bootmem_core (pg_data_t *pgdat, +static unsigned long __init init_bootmem_core(pg_data_t *pgdat, unsigned long mapstart, unsigned long start, unsigned long end) { bootmem_data_t *bdata = pgdat->bdata; @@ -185,7 +186,7 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long i, start = 0, incr, eidx, end_pfn; void *ret; - if(!size) { + if (!size) { printk("__alloc_bootmem_core(): zero-sized request\n"); BUG(); } @@ -234,7 +235,7 @@ restart_scan: for (j = i + 1; j < i + areasize; ++j) { if (j >= eidx) goto fail_block; - if (test_bit (j, bdata->node_bootmem_map)) + if (test_bit(j, bdata->node_bootmem_map)) goto fail_block; } start = i; @@ -262,19 +263,21 @@ found: bdata->last_offset && bdata->last_pos+1 == start) { offset = ALIGN(bdata->last_offset, align); BUG_ON(offset > PAGE_SIZE); - remaining_size = PAGE_SIZE-offset; + remaining_size = PAGE_SIZE - offset; if (size < remaining_size) { areasize = 0; /* last_pos unchanged */ - bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); + bdata->last_offset = offset + size; + ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + + offset + + bdata->node_boot_start); } else { remaining_size = size - remaining_size; - areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); - bdata->last_pos = start+areasize-1; + areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; + ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + + offset + + bdata->node_boot_start); + bdata->last_pos = start + areasize - 1; bdata->last_offset = remaining_size; } bdata->last_offset &= ~PAGE_MASK; @@ -287,7 +290,7 @@ found: /* * Reserve the area now: */ - for (i = start; i < start+areasize; i++) + for (i = start; i < start + areasize; i++) if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) BUG(); memset(ret, 0, size); @@ -338,7 +341,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) } } } else { - i+=BITS_PER_LONG; + i += BITS_PER_LONG; } pfn += BITS_PER_LONG; } @@ -361,51 +364,51 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) return total; } -unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, +unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) { - return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); + return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); } -void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, - unsigned long size) +void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { reserve_bootmem_core(pgdat->bdata, physaddr, size); } -void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, - unsigned long size) +void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { free_bootmem_core(pgdat->bdata, physaddr, size); } -unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) +unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { - return(free_all_bootmem_core(pgdat)); + return free_all_bootmem_core(pgdat); } -unsigned long __init init_bootmem (unsigned long start, unsigned long pages) +unsigned long __init init_bootmem(unsigned long start, unsigned long pages) { max_low_pfn = pages; min_low_pfn = start; - return(init_bootmem_core(NODE_DATA(0), start, 0, pages)); + return init_bootmem_core(NODE_DATA(0), start, 0, pages); } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -void __init reserve_bootmem (unsigned long addr, unsigned long size) +void __init reserve_bootmem(unsigned long addr, unsigned long size) { reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -void __init free_bootmem (unsigned long addr, unsigned long size) +void __init free_bootmem(unsigned long addr, unsigned long size) { free_bootmem_core(NODE_DATA(0)->bdata, addr, size); } -unsigned long __init free_all_bootmem (void) +unsigned long __init free_all_bootmem(void) { - return(free_all_bootmem_core(NODE_DATA(0))); + return free_all_bootmem_core(NODE_DATA(0)); } void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, @@ -414,9 +417,11 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, bootmem_data_t *bdata; void *ptr; - list_for_each_entry(bdata, &bdata_list, list) - if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) - return(ptr); + list_for_each_entry(bdata, &bdata_list, list) { + ptr = __alloc_bootmem_core(bdata, size, align, goal, 0); + if (ptr) + return ptr; + } return NULL; } @@ -424,6 +429,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) { void *mem = __alloc_bootmem_nopanic(size,align,goal); + if (mem) return mem; /* @@ -442,7 +448,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) - return (ptr); + return ptr; return __alloc_bootmem(size, align, goal); } @@ -455,10 +461,11 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, bootmem_data_t *bdata; void *ptr; - list_for_each_entry(bdata, &bdata_list, list) - if ((ptr = __alloc_bootmem_core(bdata, size, - align, goal, LOW32LIMIT))) - return(ptr); + list_for_each_entry(bdata, &bdata_list, list) { + ptr = __alloc_bootmem_core(bdata, size, align, goal, LOW32LIMIT); + if (ptr) + return ptr; + } /* * Whoops, we cannot satisfy the allocation request. -- cgit v1.2.3-71-gd317 From c1f60a5a419cc60aff27daffb150f5a3a3a79ef4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:11 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: move HIGHMEM counters into highmem.c/.h Move totalhigh_pages and nr_free_highpages() into highmem.c/.h Move the totalhigh_pages definition into highmem.c/.h. Move the nr_free_highpages function into highmem.c [yoichi_yuasa@tripeaks.co.jp: build fix] Signed-off-by: Christoph Lameter Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/sgi-ip27/ip27-memory.c | 1 + arch/um/kernel/mem.c | 2 ++ include/linux/highmem.h | 3 +++ include/linux/swap.h | 1 - mm/highmem.c | 13 +++++++++++++ mm/page_alloc.c | 15 --------------- mm/shmem.c | 1 + 7 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 59bfc0fc3f45..16e5682b01f1 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 61280167c560..b39e624c3291 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -79,8 +79,10 @@ void mem_init(void) /* this will put all low memory onto the freelists */ totalram_pages = free_all_bootmem(); +#ifdef CONFIG_HIGHMEM totalhigh_pages = highmem >> PAGE_SHIFT; totalram_pages += totalhigh_pages; +#endif num_physpages = totalram_pages; max_pfn = totalram_pages; printk(KERN_INFO "Memory: %luk available\n", diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 42620e723abb..fd7d12daa94f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -24,11 +24,14 @@ static inline void flush_kernel_dcache_page(struct page *page) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); +extern unsigned long totalhigh_pages; #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } +#define totalhigh_pages 0 + #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e59184c9096..34a6bc3e6cf3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -162,7 +162,6 @@ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct * /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; -extern unsigned long totalhigh_pages; extern unsigned long totalreserve_pages; extern long nr_swap_pages; extern unsigned int nr_free_pages(void); diff --git a/mm/highmem.c b/mm/highmem.c index 9b2a5403c447..ee5519b176ee 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) */ #ifdef CONFIG_HIGHMEM +unsigned long totalhigh_pages __read_mostly; + +unsigned int nr_free_highpages (void) +{ + pg_data_t *pgdat; + unsigned int pages = 0; + + for_each_online_pgdat(pgdat) + pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; + + return pages; +} + static int pkmap_count[LAST_PKMAP]; static unsigned int last_pkmap_nr; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e26491cb5a27..5cde54695cfb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map); nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); unsigned long totalram_pages __read_mostly; -unsigned long totalhigh_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; long nr_swap_pages; int percpu_pagelist_fraction; @@ -1185,20 +1184,6 @@ unsigned int nr_free_pagecache_pages(void) { return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); } - -#ifdef CONFIG_HIGHMEM -unsigned int nr_free_highpages (void) -{ - pg_data_t *pgdat; - unsigned int pages = 0; - - for_each_online_pgdat(pgdat) - pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; - - return pages; -} -#endif - #ifdef CONFIG_NUMA static void show_node(struct zone *zone) { diff --git a/mm/shmem.c b/mm/shmem.c index db21c51531ca..8631be45b40d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3-71-gd317 From 2f1b6248682f8b39ca3c7e549dfc216d26c4109b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:13 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: use enum to define zones, reformat and comment Use enum for zones and reformat zones dependent information Add comments explaning the use of zones and add a zones_t type for zone numbers. Line up information that will be #ifdefd by the following patches. [akpm@osdl.org: comment cleanups] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++--- include/linux/mmzone.h | 66 +++++++++++++++++++++++++++++++++++--------------- mm/page_alloc.c | 24 +++++++++++++----- 3 files changed, 69 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 45678b036955..2db4229a0066 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -470,7 +470,7 @@ void split_page(struct page *page, unsigned int order); #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) -static inline unsigned long page_zonenum(struct page *page) +static inline enum zone_type page_zonenum(struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -499,11 +499,12 @@ static inline unsigned long page_to_section(struct page *page) return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -static inline void set_page_zone(struct page *page, unsigned long zone) +static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; } + static inline void set_page_node(struct page *page, unsigned long node) { page->flags &= ~(NODES_MASK << NODES_PGSHIFT); @@ -515,7 +516,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline void set_page_links(struct page *page, unsigned long zone, +static inline void set_page_links(struct page *page, enum zone_type zone, unsigned long node, unsigned long pfn) { set_page_zone(page, zone); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f45163c528e8..03a5a6eb0ffa 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -88,14 +88,53 @@ struct per_cpu_pageset { #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) #endif -#define ZONE_DMA 0 -#define ZONE_DMA32 1 -#define ZONE_NORMAL 2 -#define ZONE_HIGHMEM 3 +enum zone_type { + /* + * ZONE_DMA is used when there are devices that are not able + * to do DMA to all of addressable memory (ZONE_NORMAL). Then we + * carve out the portion of memory that is needed for these devices. + * The range is arch specific. + * + * Some examples + * + * Architecture Limit + * --------------------------- + * parisc, ia64, sparc <4G + * s390 <2G + * arm26 <48M + * arm Various + * alpha Unlimited or 0-16MB. + * + * i386, x86_64 and multiple other arches + * <16M. + */ + ZONE_DMA, + /* + * x86_64 needs two ZONE_DMAs because it supports devices that are + * only able to do DMA to the lower 16M but also 32 bit devices that + * can only do DMA areas below 4G. + */ + ZONE_DMA32, + /* + * Normal addressable memory is in ZONE_NORMAL. DMA operations can be + * performed on pages in ZONE_NORMAL if the DMA devices support + * transfers to all addressable memory. + */ + ZONE_NORMAL, + /* + * A memory area that is only addressable by the kernel through + * mapping portions into its own address space. This is for example + * used by i386 to allow the kernel to address the memory beyond + * 900MB. The kernel will set up special mappings (page + * table entries on i386) for each page that the kernel needs to + * access. + */ + ZONE_HIGHMEM, -#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ + MAX_NR_ZONES +}; +#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ /* * When a memory allocation must conform to specific limitations (such @@ -126,16 +165,6 @@ struct per_cpu_pageset { /* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ #define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -/* - * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a 32bit PC we have 4 zones: - * - * ZONE_DMA < 16 MB ISA DMA capable memory - * ZONE_DMA32 0 MB Empty - * ZONE_NORMAL 16-896 MB direct mapped by the kernel - * ZONE_HIGHMEM > 896 MB only page cache and user processes - */ - struct zone { /* Fields commonly accessed by the page allocator */ unsigned long free_pages; @@ -266,7 +295,6 @@ struct zone { char *name; } ____cacheline_internodealigned_in_smp; - /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the @@ -373,12 +401,12 @@ static inline int populated_zone(struct zone *zone) return (!!zone->present_pages); } -static inline int is_highmem_idx(int idx) +static inline int is_highmem_idx(enum zone_type idx) { return (idx == ZONE_HIGHMEM); } -static inline int is_normal_idx(int idx) +static inline int is_normal_idx(enum zone_type idx) { return (idx == ZONE_NORMAL); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 13c102b95c57..2410a3cb1c53 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -68,7 +68,11 @@ static void __free_pages_ok(struct page *page, unsigned int order); * TBD: should special case ZONE_DMA32 machines here - in those we normally * don't need any ZONE_NORMAL reservation */ -int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; +int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { + 256, + 256, + 32 +}; EXPORT_SYMBOL(totalram_pages); @@ -79,7 +83,13 @@ EXPORT_SYMBOL(totalram_pages); struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; EXPORT_SYMBOL(zone_table); -static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; +static char *zone_names[MAX_NR_ZONES] = { + "DMA", + "DMA32", + "Normal", + "HighMem" +}; + int min_free_kbytes = 1024; unsigned long __meminitdata nr_kernel_pages; @@ -1487,7 +1497,9 @@ static void __meminit build_zonelists(pg_data_t *pgdat) static void __meminit build_zonelists(pg_data_t *pgdat) { - int i, j, k, node, local_node; + int i, node, local_node; + enum zone_type k; + enum zone_type j; local_node = pgdat->node_id; for (i = 0; i < GFP_ZONETYPES; i++) { @@ -1675,8 +1687,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, } #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) -void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, - unsigned long size) +void zonetable_add(struct zone *zone, int nid, enum zone_type zid, + unsigned long pfn, unsigned long size) { unsigned long snum = pfn_to_section_nr(pfn); unsigned long end = pfn_to_section_nr(pfn + size); @@ -1960,7 +1972,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, static void __meminit free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { - unsigned long j; + enum zone_type j; int nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; int ret; -- cgit v1.2.3-71-gd317 From fb0e7942bdcbbd2f90e61cb4cfa4fa892a873f8a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:13 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_DMA32 optional Make ZONE_DMA32 optional - Add #ifdefs around ZONE_DMA32 specific code and definitions. - Add CONFIG_ZONE_DMA32 config option and use that for x86_64 that alone needs this zone. - Remove the use of CONFIG_DMA_IS_DMA32 and CONFIG_DMA_IS_NORMAL for ia64 and fix up the way per node ZVCs are calculated. - Fall back to prior GFP_ZONEMASK of 0x03 if there is no DMA32 zone. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 9 --------- arch/x86_64/Kconfig | 4 ++++ include/linux/gfp.h | 2 +- include/linux/mmzone.h | 16 +++++++++++++--- include/linux/vmstat.h | 4 +--- mm/page_alloc.c | 6 ++++++ 6 files changed, 25 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index db274da7dba1..f521f2f60a78 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR -config DMA_IS_DMA32 - bool - default y - -config DMA_IS_NORMAL - bool - depends on IA64_SGI_SN2 - default y - config AUDIT_ARCH bool default y diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 6cd4878625f1..581ce9af0ec8 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -24,6 +24,10 @@ config X86 bool default y +config ZONE_DMA32 + bool + default y + config LOCKDEP_SUPPORT bool default y diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cc9e60844484..14610b56c132 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -13,7 +13,7 @@ struct vm_area_struct; /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) -#ifdef CONFIG_DMA_IS_DMA32 +#ifndef CONFIG_ZONE_DMA32 #define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ #elif BITS_PER_LONG < 64 #define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 03a5a6eb0ffa..adae3c915938 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -109,12 +109,14 @@ enum zone_type { * <16M. */ ZONE_DMA, +#ifdef CONFIG_ZONE_DMA32 /* * x86_64 needs two ZONE_DMAs because it supports devices that are * only able to do DMA to the lower 16M but also 32 bit devices that * can only do DMA areas below 4G. */ ZONE_DMA32, +#endif /* * Normal addressable memory is in ZONE_NORMAL. DMA operations can be * performed on pages in ZONE_NORMAL if the DMA devices support @@ -161,9 +163,13 @@ enum zone_type { * * NOTE! Make sure this matches the zones in */ -#define GFP_ZONEMASK 0x07 -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ +#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ + +#ifdef CONFIG_ZONE_DMA32 +#define GFP_ZONEMASK 0x07 +#else +#define GFP_ZONEMASK 0x03 +#endif struct zone { /* Fields commonly accessed by the page allocator */ @@ -429,7 +435,11 @@ static inline int is_normal(struct zone *zone) static inline int is_dma32(struct zone *zone) { +#ifdef CONFIG_ZONE_DMA32 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; +#else + return 0; +#endif } static inline int is_dma(struct zone *zone) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 2d9b1b60798a..9c6e62c56ec2 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -124,12 +124,10 @@ static inline unsigned long node_page_state(int node, struct zone *zones = NODE_DATA(node)->node_zones; return -#ifndef CONFIG_DMA_IS_NORMAL -#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64 +#ifdef CONFIG_ZONE_DMA32 zone_page_state(&zones[ZONE_DMA32], item) + #endif zone_page_state(&zones[ZONE_NORMAL], item) + -#endif #ifdef CONFIG_HIGHMEM zone_page_state(&zones[ZONE_HIGHMEM], item) + #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2410a3cb1c53..5b5cbb5e1816 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -70,7 +70,9 @@ static void __free_pages_ok(struct page *page, unsigned int order); */ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, +#ifdef CONFIG_ZONE_DMA32 256, +#endif 32 }; @@ -85,7 +87,9 @@ EXPORT_SYMBOL(zone_table); static char *zone_names[MAX_NR_ZONES] = { "DMA", +#ifdef CONFIG_ZONE_DMA32 "DMA32", +#endif "Normal", "HighMem" }; @@ -1373,8 +1377,10 @@ static inline int highest_zone(int zone_bits) int res = ZONE_NORMAL; if (zone_bits & (__force int)__GFP_HIGHMEM) res = ZONE_HIGHMEM; +#ifdef CONFIG_ZONE_DMA32 if (zone_bits & (__force int)__GFP_DMA32) res = ZONE_DMA32; +#endif if (zone_bits & (__force int)__GFP_DMA) res = ZONE_DMA; return res; -- cgit v1.2.3-71-gd317 From e53ef38d05dd59ed281a35590e4a5b64d8ff4c52 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:14 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_HIGHMEM optional Make ZONE_HIGHMEM optional - ifdef out code and definitions related to CONFIG_HIGHMEM - __GFP_HIGHMEM falls back to normal allocations if there is no ZONE_HIGHMEM - GFP_ZONEMASK becomes 0x01 if there is no DMA32 and no HIGHMEM zone. [jdike@addtoit.com: build fix] Signed-off-by: Jeff Dike Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/mem.c | 2 ++ include/linux/gfp.h | 18 ++++++++++-------- include/linux/mmzone.h | 36 +++++++++++++++++++++++++++++++++--- mm/page_alloc.c | 6 ++++++ 4 files changed, 51 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b39e624c3291..b1cd5c6e468b 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -226,7 +226,9 @@ void paging_init(void) for(i=0;i> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); +#ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT; +#endif free_area_init(zones_size); /* diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 14610b56c132..2a2153ebfe0b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -9,17 +9,19 @@ struct vm_area_struct; /* * GFP bitmasks.. + * + * Zone modifiers (see linux/mmzone.h - low three bits) + * + * These may be masked by GFP_ZONEMASK to make allocations with this bit + * set fall back to ZONE_NORMAL. + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use the consistently. The definitions here may + * be used in bit comparisons. */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) -#ifndef CONFIG_ZONE_DMA32 -#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ -#elif BITS_PER_LONG < 64 -#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ -#else -#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ -#endif +#define __GFP_DMA32 ((__force gfp_t)0x04u) /* * Action modifiers - doesn't change the zoning diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index adae3c915938..76d33e688593 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -123,6 +123,7 @@ enum zone_type { * transfers to all addressable memory. */ ZONE_NORMAL, +#ifdef CONFIG_HIGHMEM /* * A memory area that is only addressable by the kernel through * mapping portions into its own address space. This is for example @@ -132,11 +133,10 @@ enum zone_type { * access. */ ZONE_HIGHMEM, - +#endif MAX_NR_ZONES }; -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ /* * When a memory allocation must conform to specific limitations (such @@ -163,12 +163,34 @@ enum zone_type { * * NOTE! Make sure this matches the zones in */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ #ifdef CONFIG_ZONE_DMA32 + +#ifdef CONFIG_HIGHMEM +#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ #define GFP_ZONEMASK 0x07 +#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ #else +#define GFP_ZONETYPES ((0x07 + 1) / 2 + 1) /* Loner */ +/* Mask __GFP_HIGHMEM */ +#define GFP_ZONEMASK 0x05 +#define ZONES_SHIFT 2 +#endif + +#else +#ifdef CONFIG_HIGHMEM + #define GFP_ZONEMASK 0x03 +#define ZONES_SHIFT 2 +#define GFP_ZONETYPES 3 + +#else + +#define GFP_ZONEMASK 0x01 +#define ZONES_SHIFT 1 +#define GFP_ZONETYPES 2 + +#endif #endif struct zone { @@ -409,7 +431,11 @@ static inline int populated_zone(struct zone *zone) static inline int is_highmem_idx(enum zone_type idx) { +#ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM); +#else + return 0; +#endif } static inline int is_normal_idx(enum zone_type idx) @@ -425,7 +451,11 @@ static inline int is_normal_idx(enum zone_type idx) */ static inline int is_highmem(struct zone *zone) { +#ifdef CONFIG_HIGHMEM return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; +#else + return 0; +#endif } static inline int is_normal(struct zone *zone) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5b5cbb5e1816..6c7c2dd1b3ed 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -73,7 +73,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { #ifdef CONFIG_ZONE_DMA32 256, #endif +#ifdef CONFIG_HIGHMEM 32 +#endif }; EXPORT_SYMBOL(totalram_pages); @@ -91,7 +93,9 @@ static char *zone_names[MAX_NR_ZONES] = { "DMA32", #endif "Normal", +#ifdef CONFIG_HIGHMEM "HighMem" +#endif }; int min_free_kbytes = 1024; @@ -1375,8 +1379,10 @@ static int __meminit build_zonelists_node(pg_data_t *pgdat, static inline int highest_zone(int zone_bits) { int res = ZONE_NORMAL; +#ifdef CONFIG_HIGHMEM if (zone_bits & (__force int)__GFP_HIGHMEM) res = ZONE_HIGHMEM; +#endif #ifdef CONFIG_ZONE_DMA32 if (zone_bits & (__force int)__GFP_DMA32) res = ZONE_DMA32; -- cgit v1.2.3-71-gd317 From 27bf71c2a7e596ed34e9bf2d4a5030321a09a1ad Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:15 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: remove display of counters for unconfigured zones eventcounters: Do not display counters for zones that are not available on an arch Do not define or display counters for the DMA32 and the HIGHMEM zone if such zones were not configured. [akpm@osdl.org: s390 fix] [heiko.carstens@de.ibm.com: s390 fix] Signed-off-by: Christoph Lameter Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/appldata/appldata_mem.c | 3 +-- include/linux/vmstat.h | 14 ++++++++++++- mm/vmstat.c | 43 ++++++++++++++++++--------------------- 3 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index ab3b0765a64e..8aea3698a77b 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -117,8 +117,7 @@ static void appldata_get_mem_data(void *data) mem_data->pgpgout = ev[PGPGOUT] >> 1; mem_data->pswpin = ev[PSWPIN]; mem_data->pswpout = ev[PSWPOUT]; - mem_data->pgalloc = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] + - ev[PGALLOC_DMA]; + mem_data->pgalloc = ev[PGALLOC_NORMAL] + ev[PGALLOC_DMA]; mem_data->pgfault = ev[PGFAULT]; mem_data->pgmajfault = ev[PGMAJFAULT]; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9c6e62c56ec2..176c7f797339 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -18,7 +18,19 @@ * generated will simply be the increment of a global address. */ -#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH +#ifdef CONFIG_ZONE_DMA32 +#define DMA32_ZONE(xx) xx##_DMA32, +#else +#define DMA32_ZONE(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define HIGHMEM_ZONE(xx) , xx##_HIGH +#else +#define HIGHMEM_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), diff --git a/mm/vmstat.c b/mm/vmstat.c index c1b5f4106b38..04a9093f649e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -435,6 +435,21 @@ struct seq_operations fragmentation_op = { .show = frag_show, }; +#ifdef CONFIG_ZONE_DMA32 +#define TEXT_FOR_DMA32(xx) xx "_dma32", +#else +#define TEXT_FOR_DMA32(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define TEXT_FOR_HIGHMEM(xx) xx "_high", +#else +#define TEXT_FOR_HIGHMEM(xx) +#endif + +#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ + TEXT_FOR_HIGHMEM(xx) + static char *vmstat_text[] = { /* Zoned VM counters */ "nr_anon_pages", @@ -462,10 +477,7 @@ static char *vmstat_text[] = { "pswpin", "pswpout", - "pgalloc_dma", - "pgalloc_dma32", - "pgalloc_normal", - "pgalloc_high", + TEXTS_FOR_ZONES("pgalloc") "pgfree", "pgactivate", @@ -474,25 +486,10 @@ static char *vmstat_text[] = { "pgfault", "pgmajfault", - "pgrefill_dma", - "pgrefill_dma32", - "pgrefill_normal", - "pgrefill_high", - - "pgsteal_dma", - "pgsteal_dma32", - "pgsteal_normal", - "pgsteal_high", - - "pgscan_kswapd_dma", - "pgscan_kswapd_dma32", - "pgscan_kswapd_normal", - "pgscan_kswapd_high", - - "pgscan_direct_dma", - "pgscan_direct_dma32", - "pgscan_direct_normal", - "pgscan_direct_high", + TEXTS_FOR_ZONES("pgrefill") + TEXTS_FOR_ZONES("pgsteal") + TEXTS_FOR_ZONES("pgscan_kswapd") + TEXTS_FOR_ZONES("pgscan_direct") "pginodesteal", "slabs_scanned", -- cgit v1.2.3-71-gd317 From 4e4785bcf0c8503224fa6c17d8e0228de781bff6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:17 -0700 Subject: [PATCH] mempolicies: fix policy_zone check There is a check in zonelist_policy that compares pieces of the bitmap obtained from a gfp mask via GFP_ZONETYPES with a zone number in function zonelist_policy(). The bitmap is an ORed mask of __GFP_DMA, __GFP_DMA32 and __GFP_HIGHMEM. The policy_zone is a zone number with the possible values of ZONE_DMA, ZONE_DMA32, ZONE_HIGHMEM and ZONE_NORMAL. These are two different domains of values. For some reason seemed to work before the zone reduction patchset (It definitely works on SGI boxes since we just have one zone and the check cannot fail). With the zone reduction patchset this check definitely fails on systems with two zones if the system actually has memory in both zones. This is because ZONE_NORMAL is selected using no __GFP flag at all and thus gfp_zone(gfpmask) == 0. ZONE_DMA is selected when __GFP_DMA is set. __GFP_DMA is 0x01. So gfp_zone(gfpmask) == 1. policy_zone is set to ZONE_NORMAL (==1) if ZONE_NORMAL and ZONE_DMA are populated. For ZONE_NORMAL gfp_zone() yields 0 which is < policy_zone(ZONE_NORMAL) and so policy is not applied to regular memory allocations! Instead gfp_zone(__GFP_DMA) == 1 which results in policy being applied to DMA allocations! What we realy want in that place is to establish the highest allowable zone for a given gfp_mask. If the highest zone is higher or equal to the policy_zone then memory policies need to be applied. We have such a highest_zone() function in page_alloc.c. So move the highest_zone() function from mm/page_alloc.c into include/linux/gfp.h. On the way we simplify the function and use the new zone_type that was also introduced with the zone reduction patchset plus we also specify the right type for the gfp flags parameter. Signed-off-by: Christoph Lameter Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 15 +++++++++++++++ mm/mempolicy.c | 2 +- mm/page_alloc.c | 16 ---------------- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2a2153ebfe0b..a0992d392d79 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,6 +85,21 @@ static inline int gfp_zone(gfp_t gfp) return zone; } +static inline enum zone_type highest_zone(gfp_t flags) +{ + if (flags & __GFP_DMA) + return ZONE_DMA; +#ifdef CONFIG_ZONE_DMA32 + if (flags & __GFP_DMA32) + return ZONE_DMA32; +#endif +#ifdef CONFIG_HIGHMEM + if (flags & __GFP_HIGHMEM) + return ZONE_HIGHMEM; +#endif + return ZONE_NORMAL; +} + /* * There is only one page-allocator function, and two main namespaces to * it. The alloc_page*() variants return 'struct page *' and as such diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a9963ceddd65..9870624d72a6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1096,7 +1096,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy) case MPOL_BIND: /* Lower zones don't get a policy applied */ /* Careful: current->mems_allowed might have moved */ - if (gfp_zone(gfp) >= policy_zone) + if (highest_zone(gfp) >= policy_zone) if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist)) return policy->v.zonelist; /*FALL THROUGH*/ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6c7c2dd1b3ed..25f39865e324 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1376,22 +1376,6 @@ static int __meminit build_zonelists_node(pg_data_t *pgdat, return nr_zones; } -static inline int highest_zone(int zone_bits) -{ - int res = ZONE_NORMAL; -#ifdef CONFIG_HIGHMEM - if (zone_bits & (__force int)__GFP_HIGHMEM) - res = ZONE_HIGHMEM; -#endif -#ifdef CONFIG_ZONE_DMA32 - if (zone_bits & (__force int)__GFP_DMA32) - res = ZONE_DMA32; -#endif - if (zone_bits & (__force int)__GFP_DMA) - res = ZONE_DMA; - return res; -} - #ifdef CONFIG_NUMA #define MAX_NODE_LOAD (num_online_nodes()) static int __meminitdata node_load[MAX_NUMNODES]; -- cgit v1.2.3-71-gd317 From 2f6726e54a9410e2e4cee864947c05e954051916 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:18 -0700 Subject: [PATCH] Apply type enum zone_type After we have done this we can now do some typing cleanup. The memory policy layer keeps a policy_zone that specifies the zone that gets memory policies applied. This variable can now be of type enum zone_type. The check_highest_zone function and the build_zonelists funnctionm must then also take a enum zone_type parameter. Plus there are a number of loops over zones that also should use zone_type. We run into some troubles at some points with functions that need a zone_type variable to become -1. Fix that up. [pj@sgi.com: fix set_mempolicy() crash] Signed-off-by: Christoph Lameter Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 4 ++-- mm/mempolicy.c | 11 ++++++++--- mm/page_alloc.c | 27 +++++++++++++++++---------- 3 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 72440f0a443d..09f0f575ddff 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); extern unsigned slab_node(struct mempolicy *policy); -extern int policy_zone; +extern enum zone_type policy_zone; -static inline void check_highest_zone(int k) +static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone) policy_zone = k; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9870624d72a6..7da4142ce960 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ -int policy_zone = ZONE_DMA; +enum zone_type policy_zone = ZONE_DMA; struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ @@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) static struct zonelist *bind_zonelist(nodemask_t *nodes) { struct zonelist *zl; - int num, max, nd, k; + int num, max, nd; + enum zone_type k; max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); @@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) lower zones etc. Avoid empty zones because the memory allocator doesn't like them. If you implement node hot removal you have to fix that. */ - for (k = policy_zone; k >= 0; k--) { + k = policy_zone; + while (1) { for_each_node_mask(nd, *nodes) { struct zone *z = &NODE_DATA(nd)->node_zones[k]; if (z->present_pages > 0) zl->zones[num++] = z; } + if (k == 0) + break; + k--; } zl->zones[num] = NULL; return zl; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 25f39865e324..9c44b9a39d30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -637,7 +637,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, */ void drain_node_pages(int nodeid) { - int i, z; + int i; + enum zone_type z; unsigned long flags; for (z = 0; z < MAX_NR_ZONES; z++) { @@ -1158,7 +1159,8 @@ EXPORT_SYMBOL(nr_free_pages); #ifdef CONFIG_NUMA unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) { - unsigned int i, sum = 0; + unsigned int sum = 0; + enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) sum += pgdat->node_zones[i].free_pages; @@ -1358,21 +1360,22 @@ void show_free_areas(void) * Add all populated zones of a node to the zonelist. */ static int __meminit build_zonelists_node(pg_data_t *pgdat, - struct zonelist *zonelist, int nr_zones, int zone_type) + struct zonelist *zonelist, int nr_zones, enum zone_type zone_type) { struct zone *zone; BUG_ON(zone_type >= MAX_NR_ZONES); + zone_type++; do { + zone_type--; zone = pgdat->node_zones + zone_type; if (populated_zone(zone)) { zonelist->zones[nr_zones++] = zone; check_highest_zone(zone_type); } - zone_type--; - } while (zone_type >= 0); + } while (zone_type); return nr_zones; } @@ -1441,10 +1444,11 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) static void __meminit build_zonelists(pg_data_t *pgdat) { - int i, j, k, node, local_node; + int i, j, node, local_node; int prev_node, load; struct zonelist *zonelist; nodemask_t used_mask; + enum zone_type k; /* initialize zonelists */ for (i = 0; i < GFP_ZONETYPES; i++) { @@ -1628,7 +1632,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { unsigned long realtotalpages, totalpages = 0; - int i; + enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zones_size[i]; @@ -2116,7 +2120,7 @@ static void calculate_totalreserve_pages(void) { struct pglist_data *pgdat; unsigned long reserve_pages = 0; - int i, j; + enum zone_type i, j; for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { @@ -2149,7 +2153,7 @@ static void calculate_totalreserve_pages(void) static void setup_per_zone_lowmem_reserve(void) { struct pglist_data *pgdat; - int j, idx; + enum zone_type j, idx; for_each_online_pgdat(pgdat) { for (j = 0; j < MAX_NR_ZONES; j++) { @@ -2158,9 +2162,12 @@ static void setup_per_zone_lowmem_reserve(void) zone->lowmem_reserve[j] = 0; - for (idx = j-1; idx >= 0; idx--) { + idx = j; + while (idx) { struct zone *lower_zone; + idx--; + if (sysctl_lowmem_reserve_ratio[idx] < 1) sysctl_lowmem_reserve_ratio[idx] = 1; -- cgit v1.2.3-71-gd317 From 19655d3487001d7df0e10e9cbfc27c758b77c2b5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:19 -0700 Subject: [PATCH] linearly index zone->node_zonelists[] I wonder why we need this bitmask indexing into zone->node_zonelists[]? We always start with the highest zone and then include all lower zones if we build zonelists. Are there really cases where we need allocation from ZONE_DMA or ZONE_HIGHMEM but not ZONE_NORMAL? It seems that the current implementation of highest_zone() makes that already impossible. If we go linear on the index then gfp_zone() == highest_zone() and a lot of definitions fall by the wayside. We can now revert back to the use of gfp_zone() in mempolicy.c ;-) Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 +----------- include/linux/mmzone.h | 52 +++++--------------------------------------------- mm/mempolicy.c | 2 +- mm/page_alloc.c | 27 +++++++++++--------------- 4 files changed, 18 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a0992d392d79..63ab88a36f39 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -12,9 +12,6 @@ struct vm_area_struct; * * Zone modifiers (see linux/mmzone.h - low three bits) * - * These may be masked by GFP_ZONEMASK to make allocations with this bit - * set fall back to ZONE_NORMAL. - * * Do not put any conditional on these. If necessary modify the definitions * without the underscores and use the consistently. The definitions here may * be used in bit comparisons. @@ -78,14 +75,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 -static inline int gfp_zone(gfp_t gfp) -{ - int zone = GFP_ZONEMASK & (__force int) gfp; - BUG_ON(zone >= GFP_ZONETYPES); - return zone; -} - -static inline enum zone_type highest_zone(gfp_t flags) +static inline enum zone_type gfp_zone(gfp_t flags) { if (flags & __GFP_DMA) return ZONE_DMA; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 76d33e688593..7fe317164b73 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -137,60 +137,18 @@ enum zone_type { MAX_NR_ZONES }; - /* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the * allocator in the gfp_mask, in the zone modifier bits. These bits * are used to select a priority ordered list of memory zones which - * match the requested limits. GFP_ZONEMASK defines which bits within - * the gfp_mask should be considered as zone modifiers. Each valid - * combination of the zone modifier bits has a corresponding list - * of zones (in node_zonelists). Thus for two zone modifiers there - * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will - * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible - * combinations of zone modifiers in "zone modifier space". - * - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form for GFP_ZONETYPES when the left most bit is not - * a "loner", otherwise use the second. - * - * NOTE! Make sure this matches the zones in + * match the requested limits. See gfp_zone() in include/linux/gfp.h */ -#ifdef CONFIG_ZONE_DMA32 - -#ifdef CONFIG_HIGHMEM -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -#define GFP_ZONEMASK 0x07 -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ -#else -#define GFP_ZONETYPES ((0x07 + 1) / 2 + 1) /* Loner */ -/* Mask __GFP_HIGHMEM */ -#define GFP_ZONEMASK 0x05 -#define ZONES_SHIFT 2 -#endif - -#else -#ifdef CONFIG_HIGHMEM - -#define GFP_ZONEMASK 0x03 -#define ZONES_SHIFT 2 -#define GFP_ZONETYPES 3 - +#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) +#define ZONES_SHIFT 1 #else - -#define GFP_ZONEMASK 0x01 -#define ZONES_SHIFT 1 -#define GFP_ZONETYPES 2 - -#endif +#define ZONES_SHIFT 2 #endif struct zone { @@ -360,7 +318,7 @@ struct zonelist { struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[GFP_ZONETYPES]; + struct zonelist node_zonelists[MAX_NR_ZONES]; int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7da4142ce960..c3429a710ab1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1101,7 +1101,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy) case MPOL_BIND: /* Lower zones don't get a policy applied */ /* Careful: current->mems_allowed might have moved */ - if (highest_zone(gfp) >= policy_zone) + if (gfp_zone(gfp) >= policy_zone) if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist)) return policy->v.zonelist; /*FALL THROUGH*/ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9c44b9a39d30..208a6b03aa78 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1444,14 +1444,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) static void __meminit build_zonelists(pg_data_t *pgdat) { - int i, j, node, local_node; + int j, node, local_node; + enum zone_type i; int prev_node, load; struct zonelist *zonelist; nodemask_t used_mask; - enum zone_type k; /* initialize zonelists */ - for (i = 0; i < GFP_ZONETYPES; i++) { + for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = pgdat->node_zonelists + i; zonelist->zones[0] = NULL; } @@ -1481,13 +1481,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat) node_load[node] += load; prev_node = node; load--; - for (i = 0; i < GFP_ZONETYPES; i++) { + for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++); - k = highest_zone(i); - - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); zonelist->zones[j] = NULL; } } @@ -1497,19 +1495,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat) static void __meminit build_zonelists(pg_data_t *pgdat) { - int i, node, local_node; - enum zone_type k; - enum zone_type j; + int node, local_node; + enum zone_type i,j; local_node = pgdat->node_id; - for (i = 0; i < GFP_ZONETYPES; i++) { + for (i = 0; i < MAX_NR_ZONES; i++) { struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i; - j = 0; - k = highest_zone(i); - j = build_zonelists_node(pgdat, zonelist, j, k); + j = build_zonelists_node(pgdat, zonelist, 0, i); /* * Now we build the zonelist so that it contains the zones * of all the other nodes. @@ -1521,12 +1516,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat) for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); } for (node = 0; node < local_node; node++) { if (!node_online(node)) continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); } zonelist->zones[j] = NULL; -- cgit v1.2.3-71-gd317 From 8bc719d3cab8414938f9ea6e33b58d8810d18068 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Sep 2006 23:31:20 -0700 Subject: [PATCH] out of memory notifier Add a notifer chain to the out of memory killer. If one of the registered callbacks could release some memory, do not kill the process but return and retry the allocation that forced the oom killer to run. The purpose of the notifier is to add a safety net in the presence of memory ballooners. If the resource manager inflated the balloon to a size where memory allocations can not be satisfied anymore, it is better to deflate the balloon a bit instead of killing processes. The implementation for the s390 ballooner is included. [akpm@osdl.org: cleanups] Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/cmm.c | 155 +++++++++++++++++++++++++++++++-------------------- include/linux/swap.h | 4 ++ mm/oom_kill.c | 22 ++++++++ 3 files changed, 121 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 786a44dba5bf..f2e00df99bae 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -34,17 +35,18 @@ struct cmm_page_array { unsigned long pages[CMM_NR_PAGES]; }; -static long cmm_pages = 0; -static long cmm_timed_pages = 0; -static volatile long cmm_pages_target = 0; -static volatile long cmm_timed_pages_target = 0; -static long cmm_timeout_pages = 0; -static long cmm_timeout_seconds = 0; +static long cmm_pages; +static long cmm_timed_pages; +static volatile long cmm_pages_target; +static volatile long cmm_timed_pages_target; +static long cmm_timeout_pages; +static long cmm_timeout_seconds; -static struct cmm_page_array *cmm_page_list = NULL; -static struct cmm_page_array *cmm_timed_page_list = NULL; +static struct cmm_page_array *cmm_page_list; +static struct cmm_page_array *cmm_timed_page_list; +static DEFINE_SPINLOCK(cmm_lock); -static unsigned long cmm_thread_active = 0; +static unsigned long cmm_thread_active; static struct work_struct cmm_thread_starter; static wait_queue_head_t cmm_thread_wait; static struct timer_list cmm_timer; @@ -53,58 +55,89 @@ static void cmm_timer_fn(unsigned long); static void cmm_set_timer(void); static long -cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) +cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list) { - struct cmm_page_array *pa; - unsigned long page; + struct cmm_page_array *pa, *npa; + unsigned long addr; - pa = *list; - while (pages) { - page = __get_free_page(GFP_NOIO); - if (!page) + while (nr) { + addr = __get_free_page(GFP_NOIO); + if (!addr) break; + spin_lock(&cmm_lock); + pa = *list; if (!pa || pa->index >= CMM_NR_PAGES) { /* Need a new page for the page list. */ - pa = (struct cmm_page_array *) + spin_unlock(&cmm_lock); + npa = (struct cmm_page_array *) __get_free_page(GFP_NOIO); - if (!pa) { - free_page(page); + if (!npa) { + free_page(addr); break; } - pa->next = *list; - pa->index = 0; - *list = pa; + spin_lock(&cmm_lock); + pa = *list; + if (!pa || pa->index >= CMM_NR_PAGES) { + npa->next = pa; + npa->index = 0; + pa = npa; + *list = pa; + } else + free_page((unsigned long) npa); } - diag10(page); - pa->pages[pa->index++] = page; + diag10(addr); + pa->pages[pa->index++] = addr; (*counter)++; - pages--; + spin_unlock(&cmm_lock); + nr--; } - return pages; + return nr; } -static void -cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) +static long +cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) { struct cmm_page_array *pa; - unsigned long page; + unsigned long addr; + spin_lock(&cmm_lock); pa = *list; - while (pages) { + while (nr) { if (!pa || pa->index <= 0) break; - page = pa->pages[--pa->index]; + addr = pa->pages[--pa->index]; if (pa->index == 0) { pa = pa->next; free_page((unsigned long) *list); *list = pa; } - free_page(page); + free_page(addr); (*counter)--; - pages--; + nr--; } + spin_unlock(&cmm_lock); + return nr; } +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = 256; + + nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list); + if (nr > 0) + nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list); + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + *freed += 256 - nr; + return NOTIFY_OK; +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify +}; + static int cmm_thread(void *dummy) { @@ -177,21 +210,21 @@ cmm_set_timer(void) static void cmm_timer_fn(unsigned long ignored) { - long pages; + long nr; - pages = cmm_timed_pages_target - cmm_timeout_pages; - if (pages < 0) + nr = cmm_timed_pages_target - cmm_timeout_pages; + if (nr < 0) cmm_timed_pages_target = 0; else - cmm_timed_pages_target = pages; + cmm_timed_pages_target = nr; cmm_kick_thread(); cmm_set_timer(); } void -cmm_set_pages(long pages) +cmm_set_pages(long nr) { - cmm_pages_target = pages; + cmm_pages_target = nr; cmm_kick_thread(); } @@ -202,9 +235,9 @@ cmm_get_pages(void) } void -cmm_add_timed_pages(long pages) +cmm_add_timed_pages(long nr) { - cmm_timed_pages_target += pages; + cmm_timed_pages_target += nr; cmm_kick_thread(); } @@ -215,9 +248,9 @@ cmm_get_timed_pages(void) } void -cmm_set_timeout(long pages, long seconds) +cmm_set_timeout(long nr, long seconds) { - cmm_timeout_pages = pages; + cmm_timeout_pages = nr; cmm_timeout_seconds = seconds; cmm_set_timer(); } @@ -245,7 +278,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; - long pages; + long nr; int len; if (!*lenp || (*ppos && !write)) { @@ -260,17 +293,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = simple_strtoul(p, &p, 0); + nr = simple_strtoul(p, &p, 0); if (ctl == &cmm_table[0]) - cmm_set_pages(pages); + cmm_set_pages(nr); else - cmm_add_timed_pages(pages); + cmm_add_timed_pages(nr); } else { if (ctl == &cmm_table[0]) - pages = cmm_get_pages(); + nr = cmm_get_pages(); else - pages = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", pages); + nr = cmm_get_timed_pages(); + len = sprintf(buf, "%ld\n", nr); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) @@ -286,7 +319,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; - long pages, seconds; + long nr, seconds; int len; if (!*lenp || (*ppos && !write)) { @@ -301,10 +334,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = simple_strtoul(p, &p, 0); + nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); - cmm_set_timeout(pages, seconds); + cmm_set_timeout(nr, seconds); } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -357,7 +390,7 @@ static struct ctl_table cmm_dir_table[] = { static void cmm_smsg_target(char *from, char *msg) { - long pages, seconds; + long nr, seconds; if (strlen(sender) > 0 && strcmp(from, sender) != 0) return; @@ -366,27 +399,27 @@ cmm_smsg_target(char *from, char *msg) if (strncmp(msg, "SHRINK", 6) == 0) { if (!cmm_skip_blanks(msg + 6, &msg)) return; - pages = simple_strtoul(msg, &msg, 0); + nr = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_set_pages(pages); + cmm_set_pages(nr); } else if (strncmp(msg, "RELEASE", 7) == 0) { if (!cmm_skip_blanks(msg + 7, &msg)) return; - pages = simple_strtoul(msg, &msg, 0); + nr = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_add_timed_pages(pages); + cmm_add_timed_pages(nr); } else if (strncmp(msg, "REUSE", 5) == 0) { if (!cmm_skip_blanks(msg + 5, &msg)) return; - pages = simple_strtoul(msg, &msg, 0); + nr = simple_strtoul(msg, &msg, 0); if (!cmm_skip_blanks(msg, &msg)) return; seconds = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_set_timeout(pages, seconds); + cmm_set_timeout(nr, seconds); } } #endif @@ -402,6 +435,7 @@ cmm_init (void) #ifdef CONFIG_CMM_IUCV smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); #endif + register_oom_notifier(&cmm_oom_nb); INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL); init_waitqueue_head(&cmm_thread_wait); init_timer(&cmm_timer); @@ -411,6 +445,7 @@ cmm_init (void) static void cmm_exit(void) { + unregister_oom_notifier(&cmm_oom_nb); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); #ifdef CONFIG_CMM_PROC diff --git a/include/linux/swap.h b/include/linux/swap.h index 34a6bc3e6cf3..32db06c8ffe0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,8 @@ #include #include +struct notifier_block; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -156,6 +158,8 @@ struct swap_list_t { /* linux/mm/oom_kill.c */ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); +extern int register_oom_notifier(struct notifier_block *nb); +extern int unregister_oom_notifier(struct notifier_block *nb); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b9af136e5cfa..7d056843fa2d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include int sysctl_panic_on_oom; /* #define DEBUG */ @@ -306,6 +308,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, return oom_kill_task(p, message); } +static BLOCKING_NOTIFIER_HEAD(oom_notify_list); + +int register_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&oom_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_oom_notifier); + +int unregister_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&oom_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_oom_notifier); + /** * out_of_memory - kill the "best" process when we run out of memory * @@ -318,6 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { struct task_struct *p; unsigned long points = 0; + unsigned long freed = 0; + + blocking_notifier_call_chain(&oom_notify_list, 0, &freed); + if (freed > 0) + /* Got some memory back in the last second. */ + return; if (printk_ratelimit()) { printk("oom-killer: gfp_mask=0x%x, order=%d\n", -- cgit v1.2.3-71-gd317 From 7ff6f08295d90ab20d25200ef485ebb45b1b8d71 Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Mon, 25 Sep 2006 23:31:21 -0700 Subject: [PATCH] CPU hotplug compatible alloc_percpu() This patch splits alloc_percpu() up into two phases. Likewise for free_percpu(). This allows clients to limit initial allocations to online cpu's, and to populate or depopulate per-cpu data at run time as needed: struct my_struct *obj; /* initial allocation for online cpu's */ obj = percpu_alloc(sizeof(struct my_struct), GFP_KERNEL); ... /* populate per-cpu data for cpu coming online */ ptr = percpu_populate(obj, sizeof(struct my_struct), GFP_KERNEL, cpu); ... /* access per-cpu object */ ptr = percpu_ptr(obj, smp_processor_id()); ... /* depopulate per-cpu data for cpu going offline */ percpu_depopulate(obj, cpu); ... /* final removal */ percpu_free(obj); Signed-off-by: Martin Peschke Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 79 +++++++++++++++++------ mm/slab.c | 166 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 169 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f926490a7d8b..3835a9642f13 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,9 +1,12 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H + #include /* For preempt_disable() */ #include /* For kmalloc() */ #include #include /* For memset() */ +#include + #include /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ @@ -27,39 +30,77 @@ struct percpu_data { void *ptrs[NR_CPUS]; }; +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) /* - * Use this to get to a cpu's version of the per-cpu object allocated using - * alloc_percpu. Non-atomic access to the current CPU's version should + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should * probably be combined with get_cpu()/put_cpu(). */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ +#define percpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -extern void *__alloc_percpu(size_t size); -extern void free_percpu(const void *); +extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu); +extern void percpu_depopulate(void *__pdata, int cpu); +extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask); +extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask); +extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); +extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) + +static inline void percpu_depopulate(void *__pdata, int cpu) +{ +} + +static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ +} -static inline void *__alloc_percpu(size_t size) +static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, + int cpu) { - void *ret = kmalloc(size, GFP_KERNEL); - if (ret) - memset(ret, 0, size); - return ret; + return percpu_ptr(__pdata, cpu); } -static inline void free_percpu(const void *ptr) -{ - kfree(ptr); + +static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + return 0; +} + +static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + return kzalloc(size, gfp); +} + +static inline void percpu_free(void *__pdata) +{ + kfree(__pdata); } #endif /* CONFIG_SMP */ -/* Simple wrapper for the common case: zeros memory. */ -#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) +#define percpu_populate_mask(__pdata, size, gfp, mask) \ + __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) +#define percpu_depopulate_mask(__pdata, mask) \ + __percpu_depopulate_mask((__pdata), &(mask)) +#define percpu_alloc_mask(size, gfp, mask) \ + __percpu_alloc_mask((size), (gfp), &(mask)) + +#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) + +/* (legacy) interface for use without CPU hotplug handling */ + +#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ + cpu_possible_map) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define free_percpu(ptr) percpu_free((ptr)) +#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) #endif /* __LINUX_PERCPU_H */ diff --git a/mm/slab.c b/mm/slab.c index 5870bcbd33cf..00584dbbec03 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3371,52 +3371,127 @@ EXPORT_SYMBOL(__kmalloc_track_caller); #ifdef CONFIG_SMP /** - * __alloc_percpu - allocate one copy of the object for every present - * cpu in the system, zeroing them. - * Objects should be dereferenced using the per_cpu_ptr macro only. + * percpu_depopulate - depopulate per-cpu data for given cpu + * @__pdata: per-cpu data to depopulate + * @cpu: depopulate per-cpu data for this cpu * - * @size: how many bytes of memory are required. + * Depopulating per-cpu data for a cpu going offline would be a typical + * use case. You need to register a cpu hotplug handler for that purpose. */ -void *__alloc_percpu(size_t size) +void percpu_depopulate(void *__pdata, int cpu) { - int i; - struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); + struct percpu_data *pdata = __percpu_disguise(__pdata); + if (pdata->ptrs[cpu]) { + kfree(pdata->ptrs[cpu]); + pdata->ptrs[cpu] = NULL; + } +} +EXPORT_SYMBOL_GPL(percpu_depopulate); - if (!pdata) - return NULL; +/** + * percpu_depopulate_mask - depopulate per-cpu data for some cpu's + * @__pdata: per-cpu data to depopulate + * @mask: depopulate per-cpu data for cpu's selected through mask bits + */ +void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ + int cpu; + for_each_cpu_mask(cpu, *mask) + percpu_depopulate(__pdata, cpu); +} +EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); - /* - * Cannot use for_each_online_cpu since a cpu may come online - * and we have no way of figuring out how to fix the array - * that we have allocated then.... - */ - for_each_possible_cpu(i) { - int node = cpu_to_node(i); +/** + * percpu_populate - populate per-cpu data for given cpu + * @__pdata: per-cpu data to populate further + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @cpu: populate per-data for this cpu + * + * Populating per-cpu data for a cpu coming online would be a typical + * use case. You need to register a cpu hotplug handler for that purpose. + * Per-cpu object is populated with zeroed buffer. + */ +void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) +{ + struct percpu_data *pdata = __percpu_disguise(__pdata); + int node = cpu_to_node(cpu); - if (node_online(node)) - pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); - else - pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); + BUG_ON(pdata->ptrs[cpu]); + if (node_online(node)) { + /* FIXME: kzalloc_node(size, gfp, node) */ + pdata->ptrs[cpu] = kmalloc_node(size, gfp, node); + if (pdata->ptrs[cpu]) + memset(pdata->ptrs[cpu], 0, size); + } else + pdata->ptrs[cpu] = kzalloc(size, gfp); + return pdata->ptrs[cpu]; +} +EXPORT_SYMBOL_GPL(percpu_populate); - if (!pdata->ptrs[i]) - goto unwind_oom; - memset(pdata->ptrs[i], 0, size); - } +/** + * percpu_populate_mask - populate per-cpu data for more cpu's + * @__pdata: per-cpu data to populate further + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @mask: populate per-cpu data for cpu's selected through mask bits + * + * Per-cpu objects are populated with zeroed buffers. + */ +int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + cpumask_t populated = CPU_MASK_NONE; + int cpu; + + for_each_cpu_mask(cpu, *mask) + if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { + __percpu_depopulate_mask(__pdata, &populated); + return -ENOMEM; + } else + cpu_set(cpu, populated); + return 0; +} +EXPORT_SYMBOL_GPL(__percpu_populate_mask); - /* Catch derefs w/o wrappers */ - return (void *)(~(unsigned long)pdata); +/** + * percpu_alloc_mask - initial setup of per-cpu data + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @mask: populate per-data for cpu's selected through mask bits + * + * Populating per-cpu data for all online cpu's would be a typical use case, + * which is simplified by the percpu_alloc() wrapper. + * Per-cpu objects are populated with zeroed buffers. + */ +void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + void *pdata = kzalloc(sizeof(struct percpu_data), gfp); + void *__pdata = __percpu_disguise(pdata); -unwind_oom: - while (--i >= 0) { - if (!cpu_possible(i)) - continue; - kfree(pdata->ptrs[i]); - } + if (unlikely(!pdata)) + return NULL; + if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) + return __pdata; kfree(pdata); return NULL; } -EXPORT_SYMBOL(__alloc_percpu); -#endif +EXPORT_SYMBOL_GPL(__percpu_alloc_mask); + +/** + * percpu_free - final cleanup of per-cpu data + * @__pdata: object to clean up + * + * We simply clean up any per-cpu object left. No need for the client to + * track and specify through a bis mask which per-cpu objects are to free. + */ +void percpu_free(void *__pdata) +{ + __percpu_depopulate_mask(__pdata, &cpu_possible_map); + kfree(__percpu_disguise(__pdata)); +} +EXPORT_SYMBOL_GPL(percpu_free); +#endif /* CONFIG_SMP */ /** * kmem_cache_free - Deallocate an object @@ -3463,29 +3538,6 @@ void kfree(const void *objp) } EXPORT_SYMBOL(kfree); -#ifdef CONFIG_SMP -/** - * free_percpu - free previously allocated percpu memory - * @objp: pointer returned by alloc_percpu. - * - * Don't free memory not originally allocated by alloc_percpu() - * The complemented objp is to check for that. - */ -void free_percpu(const void *objp) -{ - int i; - struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); - - /* - * We allocate for all cpus so we cannot use for online cpu here. - */ - for_each_possible_cpu(i) - kfree(p->ptrs[i]); - kfree(p); -} -EXPORT_SYMBOL(free_percpu); -#endif - unsigned int kmem_cache_size(struct kmem_cache *cachep) { return obj_size(cachep); -- cgit v1.2.3-71-gd317 From db37648cd6ce9b828abd6d49aa3d269926ee7b7d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 25 Sep 2006 23:31:24 -0700 Subject: [PATCH] mm: non syncing lock_page() lock_page needs the caller to have a reference on the page->mapping inode due to sync_page, ergo set_page_dirty_lock is obviously buggy according to its comments. Solve it by introducing a new lock_page_nosync which does not do a sync_page. akpm: unpleasant solution to an unpleasant problem. If it goes wrong it could cause great slowdowns while the lock_page() caller waits for kblockd to perform the unplug. And if a filesystem has special sync_page() requirements (none presently do), permanent hangs are possible. otoh, set_page_dirty_lock() is usually (always?) called against userspace pages. They are always up-to-date, so there shouldn't be any pending read I/O against these pages. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 15 +++++++++++++++ mm/filemap.c | 17 +++++++++++++++++ mm/page-writeback.c | 2 +- 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a2f5d27f60e..64f950925151 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } extern void FASTCALL(__lock_page(struct page *page)); +extern void FASTCALL(__lock_page_nosync(struct page *page)); extern void FASTCALL(unlock_page(struct page *page)); +/* + * lock_page may only be called if we have the page's inode pinned. + */ static inline void lock_page(struct page *page) { might_sleep(); if (TestSetPageLocked(page)) __lock_page(page); } + +/* + * lock_page_nosync should only be used if we can't pin the page's inode. + * Doesn't play quite so well with block device plugging. + */ +static inline void lock_page_nosync(struct page *page) +{ + might_sleep(); + if (TestSetPageLocked(page)) + __lock_page_nosync(page); +} /* * This is exported only for wait_on_page_locked/wait_on_page_writeback. diff --git a/mm/filemap.c b/mm/filemap.c index b9a60c43b61a..d5af1cab4268 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x) EXPORT_SYMBOL(page_cache_alloc_cold); #endif +static int __sleep_on_page_lock(void *word) +{ + io_schedule(); + return 0; +} + /* * In order to wait for pages to become available there must be * waitqueues associated with pages. By using a hash table of @@ -577,6 +583,17 @@ void fastcall __lock_page(struct page *page) } EXPORT_SYMBOL(__lock_page); +/* + * Variant of lock_page that does not require the caller to hold a reference + * on the page's mapping. + */ +void fastcall __lock_page_nosync(struct page *page) +{ + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); + __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, + TASK_UNINTERRUPTIBLE); +} + /** * find_get_page - find and get a page reference * @mapping: the address_space to search diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b9f4c6f1be86..555752907dc3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -701,7 +701,7 @@ int set_page_dirty_lock(struct page *page) { int ret; - lock_page(page); + lock_page_nosync(page); ret = set_page_dirty(page); unlock_page(page); return ret; -- cgit v1.2.3-71-gd317 From da6052f7b33abe55fbfd7d2213815f58c00a88d4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 25 Sep 2006 23:31:35 -0700 Subject: [PATCH] update some mm/ comments Let's try to keep mm/ comments more useful and up to date. This is a start. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 68 +++++++++++++++++++++++++++------------------- include/linux/page-flags.h | 35 ++++++++++++++---------- mm/filemap.c | 8 +++--- 3 files changed, 64 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2db4229a0066..f2018775b995 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -219,7 +219,8 @@ struct inode; * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using - * a page. + * a page, though if it is a pagecache page, rmap structures can tell us + * who is mapping it. */ struct page { unsigned long flags; /* Atomic flags, some possibly @@ -299,8 +300,7 @@ struct page { */ /* - * Drop a ref, return true if the logical refcount fell to zero (the page has - * no users) + * Drop a ref, return true if the refcount fell to zero (the page has no users) */ static inline int put_page_testzero(struct page *page) { @@ -356,43 +356,55 @@ void split_page(struct page *page, unsigned int order); * For the non-reserved pages, page_count(page) denotes a reference count. * page_count() == 0 means the page is free. page->lru is then used for * freelist management in the buddy allocator. - * page_count() == 1 means the page is used for exactly one purpose - * (e.g. a private data page of one process). + * page_count() > 0 means the page has been allocated. * - * A page may be used for kmalloc() or anyone else who does a - * __get_free_page(). In this case the page_count() is at least 1, and - * all other fields are unused but should be 0 or NULL. The - * management of this page is the responsibility of the one who uses - * it. + * Pages are allocated by the slab allocator in order to provide memory + * to kmalloc and kmem_cache_alloc. In this case, the management of the + * page, and the fields in 'struct page' are the responsibility of mm/slab.c + * unless a particular usage is carefully commented. (the responsibility of + * freeing the kmalloc memory is the caller's, of course). * - * The other pages (we may call them "process pages") are completely + * A page may be used by anyone else who does a __get_free_page(). + * In this case, page_count still tracks the references, and should only + * be used through the normal accessor functions. The top bits of page->flags + * and page->virtual store page management information, but all other fields + * are unused and could be used privately, carefully. The management of this + * page is the responsibility of the one who allocated it, and those who have + * subsequently been given references to it. + * + * The other pages (we may call them "pagecache pages") are completely * managed by the Linux memory manager: I/O, buffers, swapping etc. * The following discussion applies only to them. * - * A page may belong to an inode's memory mapping. In this case, - * page->mapping is the pointer to the inode, and page->index is the - * file offset of the page, in units of PAGE_CACHE_SIZE. + * A pagecache page contains an opaque `private' member, which belongs to the + * page's address_space. Usually, this is the address of a circular list of + * the page's disk buffers. PG_private must be set to tell the VM to call + * into the filesystem to release these pages. * - * A page contains an opaque `private' member, which belongs to the - * page's address_space. Usually, this is the address of a circular - * list of the page's disk buffers. + * A page may belong to an inode's memory mapping. In this case, page->mapping + * is the pointer to the inode, and page->index is the file offset of the page, + * in units of PAGE_CACHE_SIZE. * - * For pages belonging to inodes, the page_count() is the number of - * attaches, plus 1 if `private' contains something, plus one for - * the page cache itself. + * If pagecache pages are not associated with an inode, they are said to be + * anonymous pages. These may become associated with the swapcache, and in that + * case PG_swapcache is set, and page->private is an offset into the swapcache. * - * Instead of keeping dirty/clean pages in per address-space lists, we instead - * now tag pages as dirty/under writeback in the radix tree. + * In either case (swapcache or inode backed), the pagecache itself holds one + * reference to the page. Setting PG_private should also increment the + * refcount. The each user mapping also has a reference to the page. * - * There is also a per-mapping radix tree mapping index to the page - * in memory if present. The tree is rooted at mapping->root. + * The pagecache pages are stored in a per-mapping radix tree, which is + * rooted at mapping->page_tree, and indexed by offset. + * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space + * lists, we instead now tag pages as dirty/writeback in the radix tree. * - * All process pages can do I/O: + * All pagecache pages may be subject to I/O: * - inode pages may need to be read from disk, * - inode pages which have been modified and are MAP_SHARED may need - * to be written to disk, - * - private pages which have been modified may need to be swapped out - * to swap space and (later) to be read back into memory. + * to be written back to the inode on disk, + * - anonymous pages (including MAP_PRIVATE file mappings) which have been + * modified may need to be swapped out to swap space and (later) to be read + * back into memory. */ /* diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5748642e9f36..9d7921dd50f0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -13,24 +13,25 @@ * PG_reserved is set for special pages, which can never be swapped out. Some * of them might not even exist (eg empty_bad_page)... * - * The PG_private bitflag is set if page->private contains a valid value. + * The PG_private bitflag is set on pagecache pages if they contain filesystem + * specific data (which is normally at page->private). It can be used by + * private allocations for its own usage. * - * During disk I/O, PG_locked is used. This bit is set before I/O and - * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks - * waiting for the I/O on this page to complete. + * During initiation of disk I/O, PG_locked is set. This bit is set before I/O + * and cleared when writeback _starts_ or when read _completes_. PG_writeback + * is set before writeback starts and cleared when it finishes. + * + * PG_locked also pins a page in pagecache, and blocks truncation of the file + * while it is held. + * + * page_waitqueue(page) is a wait queue of all tasks waiting for the page + * to become unlocked. * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * - * For choosing which pages to swap out, inode pages carry a PG_referenced bit, - * which is set any time the system accesses that page through the (mapping, - * index) hash table. This referenced bit, together with the referenced bit - * in the page tables, is used to manipulate page->age and move the page across - * the active, inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the active, - * inactive_dirty and inactive_clean lists are protected by the - * zone->lru_lock, and *NOT* by the usual PG_locked bit! + * PG_referenced, PG_reclaim are used for page reclaim for anonymous and + * file-backed pagecache (see mm/vmscan.c). * * PG_error is set to indicate that an I/O error occurred on this page. * @@ -42,6 +43,10 @@ * space, they need to be kmapped separately for doing IO on the pages. The * struct page (these bits with information) are always mapped into kernel * address space... + * + * PG_buddy is set to indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + * */ /* @@ -74,7 +79,7 @@ #define PG_checked 8 /* kill me in 2.5.. */ #define PG_arch_1 9 #define PG_reserved 10 -#define PG_private 11 /* Has something at ->private */ +#define PG_private 11 /* If pagecache, has fs-private data */ #define PG_writeback 12 /* Page is under writeback */ #define PG_nosave 13 /* Used for system suspend/resume */ @@ -83,7 +88,7 @@ #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ #define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_nosave_free 18 /* Free, should not be written */ +#define PG_nosave_free 18 /* Used for system suspend/resume */ #define PG_buddy 19 /* Page is free, on buddy lists */ diff --git a/mm/filemap.c b/mm/filemap.c index d5af1cab4268..afcdc72b5e90 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -599,8 +599,8 @@ void fastcall __lock_page_nosync(struct page *page) * @mapping: the address_space to search * @offset: the page index * - * A rather lightweight function, finding and getting a reference to a - * hashed page atomically. + * Is there a pagecache struct page at the given (mapping, offset) tuple? + * If yes, increment its refcount and return it; if no, return NULL. */ struct page * find_get_page(struct address_space *mapping, unsigned long offset) { @@ -987,7 +987,7 @@ page_not_up_to_date: /* Get exclusive access to the page ... */ lock_page(page); - /* Did it get unhashed before we got the lock? */ + /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); page_cache_release(page); @@ -1627,7 +1627,7 @@ no_cached_page: page_not_uptodate: lock_page(page); - /* Did it get unhashed while we waited for it? */ + /* Did it get truncated while we waited for it? */ if (!page->mapping) { unlock_page(page); goto err; -- cgit v1.2.3-71-gd317 From dbe5e69d2d6e591996ea2b817b887d03b60bb143 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 Sep 2006 23:31:36 -0700 Subject: [PATCH] slab: optimize kmalloc_node the same way as kmalloc [akpm@osdl.org: export fix] Signed-off-by: Christoph Hellwig Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 25 ++++++++++++++++++++++++- mm/slab.c | 4 ++-- 2 files changed, 26 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 193c03c547ec..2f6bef6a98c9 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -202,7 +202,30 @@ extern int slab_is_available(void); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); -extern void *kmalloc_node(size_t size, gfp_t flags, int node); +extern void *__kmalloc_node(size_t size, gfp_t flags, int node); + +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kmalloc_that_much(void); + __you_cannot_kmalloc_that_much(); + } +found: + return kmem_cache_alloc_node((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags, node); + } + return __kmalloc_node(size, flags, node); +} #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) { diff --git a/mm/slab.c b/mm/slab.c index 3ad2f64998fd..5e59ce7a46c8 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3348,7 +3348,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) } EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmalloc_node(size_t size, gfp_t flags, int node) +void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *cachep; @@ -3357,7 +3357,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node) return NULL; return kmem_cache_alloc_node(cachep, flags, node); } -EXPORT_SYMBOL(kmalloc_node); +EXPORT_SYMBOL(__kmalloc_node); #endif /** -- cgit v1.2.3-71-gd317 From 9b819d204cf602eab1a53a9ec4b8d2ca51e02a1d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:40 -0700 Subject: [PATCH] Add __GFP_THISNODE to avoid fallback to other nodes and ignore cpuset/memory policy restrictions Add a new gfp flag __GFP_THISNODE to avoid fallback to other nodes. This flag is essential if a kernel component requires memory to be located on a certain node. It will be needed for alloc_pages_node() to force allocation on the indicated node and for alloc_pages() to force allocation on the current node. Signed-off-by: Christoph Lameter Cc: Andy Whitcroft Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 ++- kernel/cpuset.c | 2 +- mm/mempolicy.c | 2 +- mm/page_alloc.c | 3 +++ 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 63ab88a36f39..0eda5b6dedbd 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -45,6 +45,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) @@ -53,7 +54,7 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4ea6f0dc2fc5..76940361273e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2316,7 +2316,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) const struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ - if (in_interrupt()) + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) return 1; node = z->zone_pgdat->node_id; might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c3429a710ab1..8002e1faccda 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1290,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) if ((gfp & __GFP_WAIT) && !in_interrupt()) cpuset_update_task_memory_state(); - if (!pol || in_interrupt()) + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) return alloc_page_interleave(gfp, order, interleave_nodes(pol)); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 208a6b03aa78..ea498788af53 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -893,6 +893,9 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ do { + if (unlikely((gfp_mask & __GFP_THISNODE) && + (*z)->zone_pgdat != zonelist->zones[0]->zone_pgdat)) + break; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed(*z, gfp_mask)) continue; -- cgit v1.2.3-71-gd317 From 980128f223fa3c75e3ebdde650c9f1bcabd4c0a2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:46 -0700 Subject: [PATCH] Define easier to handle GFP_THISNODE In many places we will need to use the same combination of flags. Specify a single GFP_THISNODE definition for ease of use in gfp.h. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/uncached.c | 3 +-- include/linux/gfp.h | 2 ++ mm/migrate.c | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index f813caa80570..c58e933694d5 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -98,8 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ - page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | - __GFP_THISNODE | __GFP_NORETRY | __GFP_NOWARN, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0eda5b6dedbd..8b34aabfe4c6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -67,6 +67,8 @@ struct vm_area_struct; #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) + /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ diff --git a/mm/migrate.c b/mm/migrate.c index 6196f45c5263..20a8c2687b1e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -741,9 +741,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, *result = &pm->status; - return alloc_pages_node(pm->node, - GFP_HIGHUSER | __GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY, - 0); + return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0); } /* -- cgit v1.2.3-71-gd317 From 8417bba4b151346ed475fcc923693c9e3be89063 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:51 -0700 Subject: [PATCH] Replace min_unmapped_ratio by min_unmapped_pages in struct zone *_pages is a better description of the role of the variable. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- mm/page_alloc.c | 4 ++-- mm/vmscan.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7fe317164b73..a703527e2b45 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -169,7 +169,7 @@ struct zone { /* * zone reclaim becomes active if more unmapped pages exist. */ - unsigned long min_unmapped_ratio; + unsigned long min_unmapped_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f7ea020c23ea..5da6bc4e0a6b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2002,7 +2002,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; #ifdef CONFIG_NUMA - zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) + zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; #endif zone->name = zone_names[j]; @@ -2313,7 +2313,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, return rc; for_each_zone(zone) - zone->min_unmapped_ratio = (zone->present_pages * + zone->min_unmapped_pages = (zone->present_pages * sysctl_min_unmapped_ratio) / 100; return 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 8f35d7d585cb..5154c25e8440 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1618,7 +1618,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * unmapped file backed pages. */ if (zone_page_state(zone, NR_FILE_PAGES) - - zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages) return 0; /* -- cgit v1.2.3-71-gd317 From 972d1a7b140569084439a81265a0f15b74e924e0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:51 -0700 Subject: [PATCH] ZVC: Support NR_SLAB_RECLAIMABLE / NR_SLAB_UNRECLAIMABLE Remove the atomic counter for slab_reclaim_pages and replace the counter and NR_SLAB with two ZVC counter that account for unreclaimable and reclaimable slab pages: NR_SLAB_RECLAIMABLE and NR_SLAB_UNRECLAIMABLE. Change the check in vmscan.c to refer to to NR_SLAB_RECLAIMABLE. The intend seems to be to check for slab pages that could be freed. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/pgtable.c | 4 +++- drivers/base/node.c | 9 +++++++-- fs/proc/proc_misc.c | 7 ++++++- include/linux/mmzone.h | 3 ++- include/linux/slab.h | 2 -- mm/mmap.c | 2 +- mm/nommu.c | 2 +- mm/page_alloc.c | 3 ++- mm/slab.c | 24 +++++++++++------------- mm/slob.c | 4 ---- mm/vmscan.c | 2 +- mm/vmstat.c | 3 ++- 12 files changed, 36 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index bd98768d8764..a9f4910a22f8 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -60,7 +60,9 @@ void show_mem(void) printk(KERN_INFO "%lu pages writeback\n", global_page_state(NR_WRITEBACK)); printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); + printk(KERN_INFO "%lu pages slab\n", + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)); printk(KERN_INFO "%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); } diff --git a/drivers/base/node.c b/drivers/base/node.c index e09f5c2c11ee..001e6f6b9c1b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -68,7 +68,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" - "Node %d Slab: %8lu kB\n", + "Node %d Slab: %8lu kB\n" + "Node %d SReclaimable: %8lu kB\n" + "Node %d SUnreclaim: %8lu kB\n", nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), @@ -88,7 +90,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) nid, K(node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), nid, K(node_page_state(nid, NR_BOUNCE)), - nid, K(node_page_state(nid, NR_SLAB))); + nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) + + node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), + nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), + nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); n += hugetlb_report_node_meminfo(nid, buf + n); return n; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index caa0a51560a0..5bbd60896050 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -170,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "AnonPages: %8lu kB\n" "Mapped: %8lu kB\n" "Slab: %8lu kB\n" + "SReclaimable: %8lu kB\n" + "SUnreclaim: %8lu kB\n" "PageTables: %8lu kB\n" "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" @@ -197,7 +199,10 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(global_page_state(NR_WRITEBACK)), K(global_page_state(NR_ANON_PAGES)), K(global_page_state(NR_FILE_MAPPED)), - K(global_page_state(NR_SLAB)), + K(global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)), + K(global_page_state(NR_SLAB_RECLAIMABLE)), + K(global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_PAGETABLE)), K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a703527e2b45..08c41b9f92e0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -51,7 +51,8 @@ enum zone_stat_item { NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, - NR_SLAB, /* Pages used by slab allocator */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_FILE_DIRTY, NR_WRITEBACK, diff --git a/include/linux/slab.h b/include/linux/slab.h index 2f6bef6a98c9..66d6eb78d1c6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -284,8 +284,6 @@ extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; -extern atomic_t slab_reclaim_pages; - #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/mm/mmap.c b/mm/mmap.c index 8507ee9cd573..eea8eefd51a8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -116,7 +116,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += atomic_read(&slab_reclaim_pages); + free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root diff --git a/mm/nommu.c b/mm/nommu.c index c576df71e3bb..d99dea31e443 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1133,7 +1133,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += atomic_read(&slab_reclaim_pages); + free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5da6bc4e0a6b..47e98423b30d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1304,7 +1304,8 @@ void show_free_areas(void) global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), nr_free_pages(), - global_page_state(NR_SLAB), + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), global_page_state(NR_PAGETABLE)); diff --git a/mm/slab.c b/mm/slab.c index 13b5050f84cc..7a48eb1a60c8 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -735,14 +735,6 @@ static inline void init_lock_keys(void) static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; -/* - * vm_enough_memory() looks at this to determine how many slab-allocated pages - * are possibly freeable under pressure - * - * SLAB_RECLAIM_ACCOUNT turns this on per-slab - */ -atomic_t slab_reclaim_pages; - /* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. @@ -1580,8 +1572,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_add(nr_pages, &slab_reclaim_pages); - add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); + add_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, nr_pages); + else + add_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, nr_pages); for (i = 0; i < nr_pages; i++) __SetPageSlab(page + i); return page_address(page); @@ -1596,7 +1591,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; - sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + sub_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, nr_freed); + else + sub_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); __ClearPageSlab(page); @@ -1605,8 +1605,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_pages((unsigned long)addr, cachep->gfporder); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); } static void kmem_rcu_free(struct rcu_head *head) diff --git a/mm/slob.c b/mm/slob.c index 4c28a421b270..20188627347c 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -339,7 +339,3 @@ void kmem_cache_init(void) mod_timer(&slob_timer, jiffies + HZ); } - -atomic_t slab_reclaim_pages = ATOMIC_INIT(0); -EXPORT_SYMBOL(slab_reclaim_pages); - diff --git a/mm/vmscan.c b/mm/vmscan.c index 5154c25e8440..349797ba4bac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1378,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) for_each_zone(zone) lru_pages += zone->nr_active + zone->nr_inactive; - nr_slab = global_page_state(NR_SLAB); + nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); /* If slab caches are huge, it's better to hit them first */ while (nr_slab >= lru_pages) { reclaim_state.reclaimed_slab = 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 968c0072e19a..490d8c1a0ded 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -458,7 +458,8 @@ static char *vmstat_text[] = { "nr_anon_pages", "nr_mapped", "nr_file_pages", - "nr_slab", + "nr_slab_reclaimable", + "nr_slab_unreclaimable", "nr_page_table_pages", "nr_dirty", "nr_writeback", -- cgit v1.2.3-71-gd317 From 0ff38490c836dc379ff7ec45b10a15a662f4e5f6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:52 -0700 Subject: [PATCH] zone_reclaim: dynamic slab reclaim Currently one can enable slab reclaim by setting an explicit option in /proc/sys/vm/zone_reclaim_mode. Slab reclaim is then used as a final option if the freeing of unmapped file backed pages is not enough to free enough pages to allow a local allocation. However, that means that the slab can grow excessively and that most memory of a node may be used by slabs. We have had a case where a machine with 46GB of memory was using 40-42GB for slab. Zone reclaim was effective in dealing with pagecache pages. However, slab reclaim was only done during global reclaim (which is a bit rare on NUMA systems). This patch implements slab reclaim during zone reclaim. Zone reclaim occurs if there is a danger of an off node allocation. At that point we 1. Shrink the per node page cache if the number of pagecache pages is more than min_unmapped_ratio percent of pages in a zone. 2. Shrink the slab cache if the number of the nodes reclaimable slab pages (patch depends on earlier one that implements that counter) are more than min_slab_ratio (a new /proc/sys/vm tunable). The shrinking of the slab cache is a bit problematic since it is not node specific. So we simply calculate what point in the slab we want to reach (current per node slab use minus the number of pages that neeed to be allocated) and then repeately run the global reclaim until that is unsuccessful or we have reached the limit. I hope we will have zone based slab reclaim at some point which will make that easier. The default for the min_slab_ratio is 5% Also remove the slab option from /proc/sys/vm/zone_reclaim_mode. [akpm@osdl.org: cleanups] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 27 +++++++++++++++------ include/linux/mmzone.h | 3 +++ include/linux/swap.h | 1 + include/linux/sysctl.h | 1 + kernel/sysctl.c | 11 +++++++++ mm/page_alloc.c | 17 +++++++++++++ mm/vmscan.c | 58 +++++++++++++++++++++++++++++---------------- 7 files changed, 90 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 7cee90223d3a..20d0d797f539 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm: - drop-caches - zone_reclaim_mode - min_unmapped_ratio +- min_slab_ratio - panic_on_oom ============================================================== @@ -138,7 +139,6 @@ This is value ORed together of 1 = Zone reclaim on 2 = Zone reclaim writes dirty pages out 4 = Zone reclaim swaps pages -8 = Also do a global slab reclaim pass zone_reclaim_mode is set during bootup to 1 if it is determined that pages from remote zones will cause a measurable performance reduction. The @@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. -It may be advisable to allow slab reclaim if the system makes heavy -use of files and builds up large slab caches. However, the slab -shrink operation is global, may take a long time and free slabs -in all nodes of the system. - ============================================================= min_unmapped_ratio: This is available only on NUMA kernels. -A percentage of the file backed pages in each zone. Zone reclaim will only +A percentage of the total pages in each zone. Zone reclaim will only occur if more than this percentage of pages are file backed and unmapped. This is to insure that a minimal amount of local pages is still available for file I/O even if the node is overallocated. @@ -182,6 +177,24 @@ The default is 1 percent. ============================================================= +min_slab_ratio: + +This is available only on NUMA kernels. + +A percentage of the total pages in each zone. On Zone reclaim +(fallback from the local zone occurs) slabs will be reclaimed if more +than this percentage of pages in a zone are reclaimable slab pages. +This insures that the slab growth stays under control even in NUMA +systems that rarely perform global reclaim. + +The default is 5 percent. + +Note that slab reclaim is triggered in a per zone / node fashion. +The process of reclaiming slab memory is currently not node specific +and may not be fast. + +============================================================= + panic_on_oom This enables or disables panic on out-of-memory feature. If this is set to 1, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 08c41b9f92e0..3693f1a52788 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -171,6 +171,7 @@ struct zone { * zone reclaim becomes active if more unmapped pages exist. */ unsigned long min_unmapped_pages; + unsigned long min_slab_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -448,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include /* Returns the number of the current Node. */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 32db06c8ffe0..a2f5ad7c2d2e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -193,6 +193,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; extern int sysctl_min_unmapped_ratio; +extern int sysctl_min_slab_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 736ed917a4f8..eca555781d05 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -191,6 +191,7 @@ enum VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ + VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc37138..fd43c3e6786b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -943,6 +943,17 @@ static ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_MIN_SLAB, + .procname = "min_slab_ratio", + .data = &sysctl_min_slab_ratio, + .maxlen = sizeof(sysctl_min_slab_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, #endif #ifdef CONFIG_X86_32 { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 47e98423b30d..cf913bdd433e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2005,6 +2005,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, #ifdef CONFIG_NUMA zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; + zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); @@ -2318,6 +2319,22 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, sysctl_min_unmapped_ratio) / 100; return 0; } + +int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + int rc; + + rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (rc) + return rc; + + for_each_zone(zone) + zone->min_slab_pages = (zone->present_pages * + sysctl_min_slab_ratio) / 100; + return 0; +} #endif /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 349797ba4bac..089e943c4d38 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1527,7 +1527,6 @@ int zone_reclaim_mode __read_mostly; #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ -#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ /* * Priority for ZONE_RECLAIM. This determines the fraction of pages @@ -1542,6 +1541,12 @@ int zone_reclaim_mode __read_mostly; */ int sysctl_min_unmapped_ratio = 1; +/* + * If the number of slab pages in a zone grows beyond this percentage then + * slab reclaim needs to occur. + */ +int sysctl_min_slab_ratio = 5; + /* * Try to free up some pages from this zone through reclaim. */ @@ -1573,29 +1578,37 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - /* - * Free memory by calling shrink zone with increasing priorities - * until we have enough memory freed. - */ - priority = ZONE_RECLAIM_PRIORITY; - do { - nr_reclaimed += shrink_zone(priority, zone, &sc); - priority--; - } while (priority >= 0 && nr_reclaimed < nr_pages); + if (zone_page_state(zone, NR_FILE_PAGES) - + zone_page_state(zone, NR_FILE_MAPPED) > + zone->min_unmapped_pages) { + /* + * Free memory by calling shrink zone with increasing + * priorities until we have enough memory freed. + */ + priority = ZONE_RECLAIM_PRIORITY; + do { + nr_reclaimed += shrink_zone(priority, zone, &sc); + priority--; + } while (priority >= 0 && nr_reclaimed < nr_pages); + } - if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { + if (zone_page_state(zone, NR_SLAB_RECLAIMABLE) > zone->min_slab_pages) { /* * shrink_slab() does not currently allow us to determine how - * many pages were freed in this zone. So we just shake the slab - * a bit and then go off node for this particular allocation - * despite possibly having freed enough memory to allocate in - * this zone. If we freed local memory then the next - * allocations will be local again. + * many pages were freed in this zone. So we take the current + * number of slab pages and shake the slab until it is reduced + * by the same nr_pages that we used for reclaiming unmapped + * pages. * - * shrink_slab will free memory on all zones and may take - * a long time. + * Note that shrink_slab will free memory on all zones and may + * take a long time. */ - shrink_slab(sc.nr_scanned, gfp_mask, order); + unsigned long limit = zone_page_state(zone, + NR_SLAB_RECLAIMABLE) - nr_pages; + + while (shrink_slab(sc.nr_scanned, gfp_mask, order) && + zone_page_state(zone, NR_SLAB_RECLAIMABLE) > limit) + ; } p->reclaim_state = NULL; @@ -1609,7 +1622,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_id; /* - * Zone reclaim reclaims unmapped file backed pages. + * Zone reclaim reclaims unmapped file backed pages and + * slab pages if we are over the defined limits. * * A small portion of unmapped file backed pages is needed for * file I/O otherwise pages read by file I/O will be immediately @@ -1618,7 +1632,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * unmapped file backed pages. */ if (zone_page_state(zone, NR_FILE_PAGES) - - zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages) + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages + && zone_page_state(zone, NR_SLAB_RECLAIMABLE) + <= zone->min_slab_pages) return 0; /* -- cgit v1.2.3-71-gd317 From 89fa30242facca249aead2aac03c4c69764f911c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:55 -0700 Subject: [PATCH] NUMA: Add zone_to_nid function There are many places where we need to determine the node of a zone. Currently we use a difficult to read sequence of pointer dereferencing. Put that into an inline function and use throughout VM. Maybe we can find a way to optimize the lookup in the future. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/discontig.c | 2 +- arch/parisc/mm/init.c | 2 +- include/linux/mm.h | 7 ++++++- kernel/cpuset.c | 4 ++-- mm/hugetlb.c | 2 +- mm/mempolicy.c | 6 +++--- mm/oom_kill.c | 3 +-- mm/page_alloc.c | 2 +- mm/vmscan.c | 2 +- 9 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 07c300f93764..fb5d8b747de4 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -422,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro) zone_end_pfn = zone_start_pfn + zone->spanned_pages; printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone->zone_pgdat->node_id, + zone->name, zone_to_nid(zone), zone_start_pfn, zone_end_pfn); for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index c7329615ef94..25ad28d63e88 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -551,7 +551,7 @@ void show_mem(void) printk("Zone list for zone %d on node %d: ", j, i); for (k = 0; zl->zones[k] != NULL; k++) - printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name); + printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); printk("\n"); } } diff --git a/include/linux/mm.h b/include/linux/mm.h index f2018775b995..856f0ee7e84a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -499,12 +499,17 @@ static inline struct zone *page_zone(struct page *page) return zone_table[page_zone_id(page)]; } +static inline unsigned long zone_to_nid(struct zone *zone) +{ + return zone->zone_pgdat->node_id; +} + static inline unsigned long page_to_nid(struct page *page) { if (FLAGS_HAS_NODE) return (page->flags >> NODES_PGSHIFT) & NODES_MASK; else - return page_zone(page)->zone_pgdat->node_id; + return zone_to_nid(page_zone(page)); } static inline unsigned long page_to_section(struct page *page) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 76940361273e..cff41511269f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) int i; for (i = 0; zl->zones[i]; i++) { - int nid = zl->zones[i]->zone_pgdat->node_id; + int nid = zone_to_nid(zl->zones[i]); if (node_isset(nid, current->mems_allowed)) return 1; @@ -2318,7 +2318,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) return 1; - node = z->zone_pgdat->node_id; + node = zone_to_nid(z); might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3aceadce1a76..7c7d03dbf73d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, struct zone **z; for (z = zonelist->zones; *z; z++) { - nid = (*z)->zone_pgdat->node_id; + nid = zone_to_nid(*z); if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && !list_empty(&hugepage_freelists[nid])) break; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8002e1faccda..38f89650bc84 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -487,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) switch (p->policy) { case MPOL_BIND: for (i = 0; p->v.zonelist->zones[i]; i++) - node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, + node_set(zone_to_nid(p->v.zonelist->zones[i]), *nodes); break; case MPOL_DEFAULT: @@ -1145,7 +1145,7 @@ unsigned slab_node(struct mempolicy *policy) * Follow bind policy behavior and start allocation at the * first node. */ - return policy->v.zonelist->zones[0]->zone_pgdat->node_id; + return zone_to_nid(policy->v.zonelist->zones[0]); case MPOL_PREFERRED: if (policy->v.preferred_node >= 0) @@ -1649,7 +1649,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) nodes_clear(nodes); for (z = pol->v.zonelist->zones; *z; z++) - node_set((*z)->zone_pgdat->node_id, nodes); + node_set(zone_to_nid(*z), nodes); nodes_remap(tmp, nodes, *mpolmask, *newmask); nodes = tmp; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f1c0ef1fd21f..bada3d03119f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -177,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) for (z = zonelist->zones; *z; z++) if (cpuset_zone_allowed(*z, gfp_mask)) - node_clear((*z)->zone_pgdat->node_id, - nodes); + node_clear(zone_to_nid(*z), nodes); else return CONSTRAINT_CPUSET; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cf913bdd433e..51070b6d593f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1217,7 +1217,7 @@ unsigned int nr_free_pagecache_pages(void) #ifdef CONFIG_NUMA static void show_node(struct zone *zone) { - printk("Node %d ", zone->zone_pgdat->node_id); + printk("Node %ld ", zone_to_nid(zone)); } #else #define show_node(zone) do { } while (0) diff --git a/mm/vmscan.c b/mm/vmscan.c index b950f193816e..87779dda4ec6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1661,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * over remote processors and spread off node memory allocations * as wide as possible. */ - node_id = zone->zone_pgdat->node_id; + node_id = zone_to_nid(zone); mask = node_to_cpumask(node_id); if (!cpus_empty(mask) && node_id != numa_node_id()) return 0; -- cgit v1.2.3-71-gd317 From 62bac0185ad3dfef11d9602980445c54d45199c6 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 25 Sep 2006 23:31:56 -0700 Subject: [PATCH] selinux: eliminate selinux_task_ctxid Eliminate selinux_task_ctxid since it duplicates selinux_task_get_sid. Signed-off-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/selinux.h | 15 --------------- kernel/auditsc.c | 2 +- security/selinux/exports.c | 9 --------- 3 files changed, 1 insertion(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/selinux.h b/include/linux/selinux.h index aad4e390d6a5..79e4707ca772 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -69,16 +69,6 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, */ void selinux_audit_set_callback(int (*callback)(void)); -/** - * selinux_task_ctxid - determine a context ID for a process. - * @tsk: the task object - * @ctxid: ID value returned via this - * - * On return, ctxid will contain an ID for the context. This value - * should only be used opaquely. - */ -void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); - /** * selinux_ctxid_to_string - map a security context ID to a string * @ctxid: security context ID to be converted. @@ -166,11 +156,6 @@ static inline void selinux_audit_set_callback(int (*callback)(void)) return; } -static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) -{ - *ctxid = 0; -} - static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) { *ctx = NULL; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1bd8827a0102..331e17010393 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk, logged upon error */ if (f->se_rule) { if (need_sid) { - selinux_task_ctxid(tsk, &sid); + selinux_get_task_sid(tsk, &sid); need_sid = 0; } result = selinux_audit_rule_match(sid, f->type, diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 9d7737db5e51..ee0fb47f81ae 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -21,15 +21,6 @@ #include "security.h" #include "objsec.h" -void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) -{ - struct task_security_struct *tsec = tsk->security; - if (selinux_enabled) - *ctxid = tsec->sid; - else - *ctxid = 0; -} - int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) { if (selinux_enabled) -- cgit v1.2.3-71-gd317 From 1a70cd40cb291c25b67ec0da715a49d76719329d Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 25 Sep 2006 23:31:57 -0700 Subject: [PATCH] selinux: rename selinux_ctxid_to_string Rename selinux_ctxid_to_string to selinux_sid_to_string to be consistent with other interfaces. Signed-off-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/selinux.h | 8 ++++---- kernel/audit.c | 14 +++++++------- kernel/auditfilter.c | 2 +- kernel/auditsc.c | 4 ++-- security/selinux/exports.c | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 79e4707ca772..df9098de4c99 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -70,8 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, void selinux_audit_set_callback(int (*callback)(void)); /** - * selinux_ctxid_to_string - map a security context ID to a string - * @ctxid: security context ID to be converted. + * selinux_sid_to_string - map a security context ID to a string + * @sid: security context ID to be converted. * @ctx: address of context string to be returned * @ctxlen: length of returned context string. * @@ -79,7 +79,7 @@ void selinux_audit_set_callback(int (*callback)(void)); * string will be allocated internally, and the caller must call * kfree() on it after use. */ -int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); +int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen); /** * selinux_get_inode_sid - get the inode's security context ID @@ -156,7 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void)) return; } -static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen) { *ctx = NULL; *ctxlen = 0; diff --git a/kernel/audit.c b/kernel/audit.c index 963fd15c9621..f9889ee77825 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; if (sid) { - if ((err = selinux_ctxid_to_string( + if ((err = selinux_sid_to_string( sid, &ctx, &len))) return err; else @@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) "user pid=%d uid=%u auid=%u", pid, uid, loginuid); if (sid) { - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( sid, &ctx, &len)) { audit_log_format(ab, " ssid=%u", sid); @@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) loginuid, sid); break; case AUDIT_SIGNAL_INFO: - err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); + err = selinux_sid_to_string(audit_sig_sid, &ctx, &len); if (err) return err; sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a44879b0c72f..1a58a81fb09d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, if (sid) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string(sid, &ctx, &len)) + if (selinux_sid_to_string(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); else audit_log_format(ab, " subj=%s", ctx); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 331e17010393..fb83c5cb8c32 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (axi->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( axi->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", axi->osid); @@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( n->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", n->osid); call_panic = 2; diff --git a/security/selinux/exports.c b/security/selinux/exports.c index ee0fb47f81ae..b6f96943be1f 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -21,10 +21,10 @@ #include "security.h" #include "objsec.h" -int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen) { if (selinux_enabled) - return security_sid_to_context(ctxid, ctx, ctxlen); + return security_sid_to_context(sid, ctx, ctxlen); else { *ctx = NULL; *ctxlen = 0; -- cgit v1.2.3-71-gd317 From 9a2f44f01a67a6ecca71515af999895b45a2aeb0 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 25 Sep 2006 23:31:58 -0700 Subject: [PATCH] selinux: replace ctxid with sid in selinux_audit_rule_match interface Replace ctxid with sid in selinux_audit_rule_match interface for consistency with other interfaces. Signed-off-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/selinux.h | 6 +++--- security/selinux/ss/services.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/selinux.h b/include/linux/selinux.h index df9098de4c99..d1b7ca6c1c57 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); /** * selinux_audit_rule_match - determine if a context ID matches a rule. - * @ctxid: the context ID to check + * @sid: the context ID to check * @field: the field this rule refers to * @op: the operater the rule uses * @rule: pointer to the audit rule to check against @@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx); @@ -144,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule) return; } -static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx) { diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 22ed17c17718..988079f45294 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2003,7 +2003,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, return rc; } -int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx) { @@ -2026,11 +2026,11 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, goto out; } - ctxt = sidtab_search(&sidtab, ctxid); + ctxt = sidtab_search(&sidtab, sid); if (!ctxt) { audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR, "selinux_audit_rule_match: unrecognized SID %d\n", - ctxid); + sid); match = -ENOENT; goto out; } -- cgit v1.2.3-71-gd317 From af8c65b57aaa4ae321af34dbfc5ca7f5625263fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2006 23:32:07 -0700 Subject: [PATCH] FRV: permit __do_IRQ() to be dispensed with Permit __do_IRQ() to be dispensed with based on a configuration option. Signed-off-by: David Howells Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/Kconfig | 4 ++++ include/linux/irq.h | 6 ++++++ kernel/irq/handle.c | 2 ++ 3 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 5833808fb823..f7b171b92ea2 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -29,6 +29,10 @@ config GENERIC_HARDIRQS bool default y +config GENERIC_HARDIRQS_NO__DO_IRQ + bool + default y + config GENERIC_TIME bool default y diff --git a/include/linux/irq.h b/include/linux/irq.h index fbf6d901e9c2..48d3cb3b6a47 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *, * Monolithic do_IRQ implementation. * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly) */ +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); +#endif /* * Architectures call this to let the generic IRQ layer @@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs) { struct irq_desc *desc = irq_desc + irq; +#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + desc->handle_irq(irq, desc, regs); +#else if (likely(desc->handle_irq)) desc->handle_irq(irq, desc, regs); else __do_IRQ(irq, regs); +#endif } /* Handling of unhandled and spurious interrupts: */ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 48a53f68af96..4c6cdbaed661 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, return retval; } +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ /** * __do_IRQ - original all in one highlevel IRQ handler * @irq: the interrupt number @@ -253,6 +254,7 @@ out: return 1; } +#endif #ifdef CONFIG_TRACE_IRQFLAGS -- cgit v1.2.3-71-gd317 From 5f97f7f9400de47ae837170bb274e90ad3934386 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 25 Sep 2006 23:32:13 -0700 Subject: [PATCH] avr32 architecture This adds support for the Atmel AVR32 architecture as well as the AT32AP7000 CPU and the AT32STK1000 development board. AVR32 is a new high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular emphasis on low power consumption and high code density. The AVR32 architecture is not binary compatible with earlier 8-bit AVR architectures. The AVR32 architecture, including the instruction set, is described by the AVR32 Architecture Manual, available from http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf The Atmel AT32AP7000 is the first CPU implementing the AVR32 architecture. It features a 7-stage pipeline, 16KB instruction and data caches and a full Memory Management Unit. It also comes with a large set of integrated peripherals, many of which are shared with the AT91 ARM-based controllers from Atmel. Full data sheet is available from http://www.atmel.com/dyn/resources/prod_documents/doc32003.pdf while the CPU core implementation including caches and MMU is documented by the AVR32 AP Technical Reference, available from http://www.atmel.com/dyn/resources/prod_documents/doc32001.pdf Information about the AT32STK1000 development board can be found at http://www.atmel.com/dyn/products/tools_card.asp?tool_id=3918 including a BSP CD image with an earlier version of this patch, development tools (binaries and source/patches) and a root filesystem image suitable for booting from SD card. Alternatively, there's a preliminary "getting started" guide available at http://avr32linux.org/twiki/bin/view/Main/GettingStarted which provides links to the sources and patches you will need in order to set up a cross-compiling environment for avr32-linux. This patch, as well as the other patches included with the BSP and the toolchain patches, is actively supported by Atmel Corporation. [dmccr@us.ibm.com: Fix more pxx_page macro locations] [bunk@stusta.de: fix `make defconfig'] Signed-off-by: Haavard Skinnemoen Signed-off-by: Adrian Bunk Signed-off-by: Dave McCracken Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 17 + arch/avr32/Kconfig | 196 +++++ arch/avr32/Kconfig.debug | 19 + arch/avr32/Makefile | 84 +++ arch/avr32/boards/atstk1000/Makefile | 2 + arch/avr32/boards/atstk1000/atstk1002.c | 37 + arch/avr32/boards/atstk1000/setup.c | 59 ++ arch/avr32/boards/atstk1000/spi.c | 27 + arch/avr32/boot/images/Makefile | 62 ++ arch/avr32/boot/u-boot/Makefile | 3 + arch/avr32/boot/u-boot/empty.S | 1 + arch/avr32/boot/u-boot/head.S | 60 ++ arch/avr32/configs/atstk1002_defconfig | 754 ++++++++++++++++++++ arch/avr32/kernel/Makefile | 18 + arch/avr32/kernel/asm-offsets.c | 25 + arch/avr32/kernel/avr32_ksyms.c | 55 ++ arch/avr32/kernel/cpu.c | 327 +++++++++ arch/avr32/kernel/entry-avr32b.S | 678 ++++++++++++++++++ arch/avr32/kernel/head.S | 45 ++ arch/avr32/kernel/init_task.c | 38 + arch/avr32/kernel/irq.c | 71 ++ arch/avr32/kernel/kprobes.c | 270 +++++++ arch/avr32/kernel/module.c | 324 +++++++++ arch/avr32/kernel/process.c | 276 ++++++++ arch/avr32/kernel/ptrace.c | 371 ++++++++++ arch/avr32/kernel/semaphore.c | 148 ++++ arch/avr32/kernel/setup.c | 335 +++++++++ arch/avr32/kernel/signal.c | 328 +++++++++ arch/avr32/kernel/switch_to.S | 35 + arch/avr32/kernel/sys_avr32.c | 51 ++ arch/avr32/kernel/syscall-stubs.S | 102 +++ arch/avr32/kernel/syscall_table.S | 289 ++++++++ arch/avr32/kernel/time.c | 238 +++++++ arch/avr32/kernel/traps.c | 425 +++++++++++ arch/avr32/kernel/vmlinux.lds.c | 139 ++++ arch/avr32/lib/Makefile | 10 + arch/avr32/lib/__avr32_asr64.S | 31 + arch/avr32/lib/__avr32_lsl64.S | 31 + arch/avr32/lib/__avr32_lsr64.S | 31 + arch/avr32/lib/clear_user.S | 76 ++ arch/avr32/lib/copy_user.S | 119 ++++ arch/avr32/lib/csum_partial.S | 47 ++ arch/avr32/lib/csum_partial_copy_generic.S | 99 +++ arch/avr32/lib/delay.c | 55 ++ arch/avr32/lib/findbit.S | 154 ++++ arch/avr32/lib/io-readsl.S | 24 + arch/avr32/lib/io-readsw.S | 43 ++ arch/avr32/lib/io-writesl.S | 20 + arch/avr32/lib/io-writesw.S | 38 + arch/avr32/lib/libgcc.h | 33 + arch/avr32/lib/longlong.h | 98 +++ arch/avr32/lib/memcpy.S | 62 ++ arch/avr32/lib/memset.S | 72 ++ arch/avr32/lib/strncpy_from_user.S | 60 ++ arch/avr32/lib/strnlen_user.S | 67 ++ arch/avr32/mach-at32ap/Makefile | 2 + arch/avr32/mach-at32ap/at32ap.c | 90 +++ arch/avr32/mach-at32ap/at32ap7000.c | 866 +++++++++++++++++++++++ arch/avr32/mach-at32ap/clock.c | 148 ++++ arch/avr32/mach-at32ap/clock.h | 30 + arch/avr32/mach-at32ap/extint.c | 171 +++++ arch/avr32/mach-at32ap/intc.c | 133 ++++ arch/avr32/mach-at32ap/intc.h | 327 +++++++++ arch/avr32/mach-at32ap/pio.c | 118 +++ arch/avr32/mach-at32ap/pio.h | 178 +++++ arch/avr32/mach-at32ap/sm.c | 289 ++++++++ arch/avr32/mach-at32ap/sm.h | 240 +++++++ arch/avr32/mm/Makefile | 6 + arch/avr32/mm/cache.c | 150 ++++ arch/avr32/mm/clear_page.S | 25 + arch/avr32/mm/copy_page.S | 28 + arch/avr32/mm/dma-coherent.c | 139 ++++ arch/avr32/mm/fault.c | 315 +++++++++ arch/avr32/mm/init.c | 480 +++++++++++++ arch/avr32/mm/ioremap.c | 197 ++++++ arch/avr32/mm/tlb.c | 378 ++++++++++ include/asm-avr32/Kbuild | 3 + include/asm-avr32/a.out.h | 26 + include/asm-avr32/addrspace.h | 43 ++ include/asm-avr32/arch-at32ap/at91rm9200_pdc.h | 36 + include/asm-avr32/arch-at32ap/at91rm9200_usart.h | 123 ++++ include/asm-avr32/arch-at32ap/board.h | 35 + include/asm-avr32/arch-at32ap/init.h | 21 + include/asm-avr32/arch-at32ap/portmux.h | 16 + include/asm-avr32/arch-at32ap/sm.h | 27 + include/asm-avr32/asm.h | 102 +++ include/asm-avr32/atomic.h | 201 ++++++ include/asm-avr32/auxvec.h | 4 + include/asm-avr32/bitops.h | 296 ++++++++ include/asm-avr32/bug.h | 47 ++ include/asm-avr32/bugs.h | 15 + include/asm-avr32/byteorder.h | 25 + include/asm-avr32/cache.h | 29 + include/asm-avr32/cachectl.h | 11 + include/asm-avr32/cacheflush.h | 129 ++++ include/asm-avr32/checksum.h | 156 ++++ include/asm-avr32/cputime.h | 6 + include/asm-avr32/current.h | 15 + include/asm-avr32/delay.h | 26 + include/asm-avr32/div64.h | 6 + include/asm-avr32/dma-mapping.h | 320 +++++++++ include/asm-avr32/dma.h | 8 + include/asm-avr32/elf.h | 110 +++ include/asm-avr32/emergency-restart.h | 6 + include/asm-avr32/errno.h | 6 + include/asm-avr32/fcntl.h | 6 + include/asm-avr32/futex.h | 6 + include/asm-avr32/hardirq.h | 34 + include/asm-avr32/hw_irq.h | 9 + include/asm-avr32/intc.h | 128 ++++ include/asm-avr32/io.h | 253 +++++++ include/asm-avr32/ioctl.h | 6 + include/asm-avr32/ioctls.h | 83 +++ include/asm-avr32/ipcbuf.h | 29 + include/asm-avr32/irq.h | 10 + include/asm-avr32/irqflags.h | 68 ++ include/asm-avr32/kdebug.h | 38 + include/asm-avr32/kmap_types.h | 30 + include/asm-avr32/kprobes.h | 34 + include/asm-avr32/linkage.h | 7 + include/asm-avr32/local.h | 6 + include/asm-avr32/mach/serial_at91.h | 33 + include/asm-avr32/mman.h | 17 + include/asm-avr32/mmu.h | 10 + include/asm-avr32/mmu_context.h | 148 ++++ include/asm-avr32/module.h | 28 + include/asm-avr32/msgbuf.h | 31 + include/asm-avr32/mutex.h | 9 + include/asm-avr32/namei.h | 7 + include/asm-avr32/numnodes.h | 7 + include/asm-avr32/ocd.h | 78 ++ include/asm-avr32/page.h | 112 +++ include/asm-avr32/param.h | 23 + include/asm-avr32/pci.h | 8 + include/asm-avr32/percpu.h | 6 + include/asm-avr32/pgalloc.h | 96 +++ include/asm-avr32/pgtable-2level.h | 47 ++ include/asm-avr32/pgtable.h | 408 +++++++++++ include/asm-avr32/poll.h | 27 + include/asm-avr32/posix_types.h | 129 ++++ include/asm-avr32/processor.h | 147 ++++ include/asm-avr32/ptrace.h | 154 ++++ include/asm-avr32/resource.h | 6 + include/asm-avr32/scatterlist.h | 21 + include/asm-avr32/sections.h | 6 + include/asm-avr32/semaphore.h | 109 +++ include/asm-avr32/sembuf.h | 25 + include/asm-avr32/setup.h | 141 ++++ include/asm-avr32/shmbuf.h | 42 ++ include/asm-avr32/shmparam.h | 6 + include/asm-avr32/sigcontext.h | 34 + include/asm-avr32/siginfo.h | 6 + include/asm-avr32/signal.h | 168 +++++ include/asm-avr32/socket.h | 53 ++ include/asm-avr32/sockios.h | 12 + include/asm-avr32/stat.h | 79 +++ include/asm-avr32/statfs.h | 6 + include/asm-avr32/string.h | 17 + include/asm-avr32/sysreg.h | 332 +++++++++ include/asm-avr32/system.h | 155 ++++ include/asm-avr32/termbits.h | 173 +++++ include/asm-avr32/termios.h | 80 +++ include/asm-avr32/thread_info.h | 106 +++ include/asm-avr32/timex.h | 40 ++ include/asm-avr32/tlb.h | 32 + include/asm-avr32/tlbflush.h | 40 ++ include/asm-avr32/topology.h | 6 + include/asm-avr32/traps.h | 23 + include/asm-avr32/types.h | 70 ++ include/asm-avr32/uaccess.h | 335 +++++++++ include/asm-avr32/ucontext.h | 12 + include/asm-avr32/unaligned.h | 25 + include/asm-avr32/unistd.h | 387 ++++++++++ include/asm-avr32/user.h | 65 ++ include/linux/elf-em.h | 1 + lib/Kconfig.debug | 4 +- 176 files changed, 18124 insertions(+), 2 deletions(-) create mode 100644 arch/avr32/Kconfig create mode 100644 arch/avr32/Kconfig.debug create mode 100644 arch/avr32/Makefile create mode 100644 arch/avr32/boards/atstk1000/Makefile create mode 100644 arch/avr32/boards/atstk1000/atstk1002.c create mode 100644 arch/avr32/boards/atstk1000/setup.c create mode 100644 arch/avr32/boards/atstk1000/spi.c create mode 100644 arch/avr32/boot/images/Makefile create mode 100644 arch/avr32/boot/u-boot/Makefile create mode 100644 arch/avr32/boot/u-boot/empty.S create mode 100644 arch/avr32/boot/u-boot/head.S create mode 100644 arch/avr32/configs/atstk1002_defconfig create mode 100644 arch/avr32/kernel/Makefile create mode 100644 arch/avr32/kernel/asm-offsets.c create mode 100644 arch/avr32/kernel/avr32_ksyms.c create mode 100644 arch/avr32/kernel/cpu.c create mode 100644 arch/avr32/kernel/entry-avr32b.S create mode 100644 arch/avr32/kernel/head.S create mode 100644 arch/avr32/kernel/init_task.c create mode 100644 arch/avr32/kernel/irq.c create mode 100644 arch/avr32/kernel/kprobes.c create mode 100644 arch/avr32/kernel/module.c create mode 100644 arch/avr32/kernel/process.c create mode 100644 arch/avr32/kernel/ptrace.c create mode 100644 arch/avr32/kernel/semaphore.c create mode 100644 arch/avr32/kernel/setup.c create mode 100644 arch/avr32/kernel/signal.c create mode 100644 arch/avr32/kernel/switch_to.S create mode 100644 arch/avr32/kernel/sys_avr32.c create mode 100644 arch/avr32/kernel/syscall-stubs.S create mode 100644 arch/avr32/kernel/syscall_table.S create mode 100644 arch/avr32/kernel/time.c create mode 100644 arch/avr32/kernel/traps.c create mode 100644 arch/avr32/kernel/vmlinux.lds.c create mode 100644 arch/avr32/lib/Makefile create mode 100644 arch/avr32/lib/__avr32_asr64.S create mode 100644 arch/avr32/lib/__avr32_lsl64.S create mode 100644 arch/avr32/lib/__avr32_lsr64.S create mode 100644 arch/avr32/lib/clear_user.S create mode 100644 arch/avr32/lib/copy_user.S create mode 100644 arch/avr32/lib/csum_partial.S create mode 100644 arch/avr32/lib/csum_partial_copy_generic.S create mode 100644 arch/avr32/lib/delay.c create mode 100644 arch/avr32/lib/findbit.S create mode 100644 arch/avr32/lib/io-readsl.S create mode 100644 arch/avr32/lib/io-readsw.S create mode 100644 arch/avr32/lib/io-writesl.S create mode 100644 arch/avr32/lib/io-writesw.S create mode 100644 arch/avr32/lib/libgcc.h create mode 100644 arch/avr32/lib/longlong.h create mode 100644 arch/avr32/lib/memcpy.S create mode 100644 arch/avr32/lib/memset.S create mode 100644 arch/avr32/lib/strncpy_from_user.S create mode 100644 arch/avr32/lib/strnlen_user.S create mode 100644 arch/avr32/mach-at32ap/Makefile create mode 100644 arch/avr32/mach-at32ap/at32ap.c create mode 100644 arch/avr32/mach-at32ap/at32ap7000.c create mode 100644 arch/avr32/mach-at32ap/clock.c create mode 100644 arch/avr32/mach-at32ap/clock.h create mode 100644 arch/avr32/mach-at32ap/extint.c create mode 100644 arch/avr32/mach-at32ap/intc.c create mode 100644 arch/avr32/mach-at32ap/intc.h create mode 100644 arch/avr32/mach-at32ap/pio.c create mode 100644 arch/avr32/mach-at32ap/pio.h create mode 100644 arch/avr32/mach-at32ap/sm.c create mode 100644 arch/avr32/mach-at32ap/sm.h create mode 100644 arch/avr32/mm/Makefile create mode 100644 arch/avr32/mm/cache.c create mode 100644 arch/avr32/mm/clear_page.S create mode 100644 arch/avr32/mm/copy_page.S create mode 100644 arch/avr32/mm/dma-coherent.c create mode 100644 arch/avr32/mm/fault.c create mode 100644 arch/avr32/mm/init.c create mode 100644 arch/avr32/mm/ioremap.c create mode 100644 arch/avr32/mm/tlb.c create mode 100644 include/asm-avr32/Kbuild create mode 100644 include/asm-avr32/a.out.h create mode 100644 include/asm-avr32/addrspace.h create mode 100644 include/asm-avr32/arch-at32ap/at91rm9200_pdc.h create mode 100644 include/asm-avr32/arch-at32ap/at91rm9200_usart.h create mode 100644 include/asm-avr32/arch-at32ap/board.h create mode 100644 include/asm-avr32/arch-at32ap/init.h create mode 100644 include/asm-avr32/arch-at32ap/portmux.h create mode 100644 include/asm-avr32/arch-at32ap/sm.h create mode 100644 include/asm-avr32/asm.h create mode 100644 include/asm-avr32/atomic.h create mode 100644 include/asm-avr32/auxvec.h create mode 100644 include/asm-avr32/bitops.h create mode 100644 include/asm-avr32/bug.h create mode 100644 include/asm-avr32/bugs.h create mode 100644 include/asm-avr32/byteorder.h create mode 100644 include/asm-avr32/cache.h create mode 100644 include/asm-avr32/cachectl.h create mode 100644 include/asm-avr32/cacheflush.h create mode 100644 include/asm-avr32/checksum.h create mode 100644 include/asm-avr32/cputime.h create mode 100644 include/asm-avr32/current.h create mode 100644 include/asm-avr32/delay.h create mode 100644 include/asm-avr32/div64.h create mode 100644 include/asm-avr32/dma-mapping.h create mode 100644 include/asm-avr32/dma.h create mode 100644 include/asm-avr32/elf.h create mode 100644 include/asm-avr32/emergency-restart.h create mode 100644 include/asm-avr32/errno.h create mode 100644 include/asm-avr32/fcntl.h create mode 100644 include/asm-avr32/futex.h create mode 100644 include/asm-avr32/hardirq.h create mode 100644 include/asm-avr32/hw_irq.h create mode 100644 include/asm-avr32/intc.h create mode 100644 include/asm-avr32/io.h create mode 100644 include/asm-avr32/ioctl.h create mode 100644 include/asm-avr32/ioctls.h create mode 100644 include/asm-avr32/ipcbuf.h create mode 100644 include/asm-avr32/irq.h create mode 100644 include/asm-avr32/irqflags.h create mode 100644 include/asm-avr32/kdebug.h create mode 100644 include/asm-avr32/kmap_types.h create mode 100644 include/asm-avr32/kprobes.h create mode 100644 include/asm-avr32/linkage.h create mode 100644 include/asm-avr32/local.h create mode 100644 include/asm-avr32/mach/serial_at91.h create mode 100644 include/asm-avr32/mman.h create mode 100644 include/asm-avr32/mmu.h create mode 100644 include/asm-avr32/mmu_context.h create mode 100644 include/asm-avr32/module.h create mode 100644 include/asm-avr32/msgbuf.h create mode 100644 include/asm-avr32/mutex.h create mode 100644 include/asm-avr32/namei.h create mode 100644 include/asm-avr32/numnodes.h create mode 100644 include/asm-avr32/ocd.h create mode 100644 include/asm-avr32/page.h create mode 100644 include/asm-avr32/param.h create mode 100644 include/asm-avr32/pci.h create mode 100644 include/asm-avr32/percpu.h create mode 100644 include/asm-avr32/pgalloc.h create mode 100644 include/asm-avr32/pgtable-2level.h create mode 100644 include/asm-avr32/pgtable.h create mode 100644 include/asm-avr32/poll.h create mode 100644 include/asm-avr32/posix_types.h create mode 100644 include/asm-avr32/processor.h create mode 100644 include/asm-avr32/ptrace.h create mode 100644 include/asm-avr32/resource.h create mode 100644 include/asm-avr32/scatterlist.h create mode 100644 include/asm-avr32/sections.h create mode 100644 include/asm-avr32/semaphore.h create mode 100644 include/asm-avr32/sembuf.h create mode 100644 include/asm-avr32/setup.h create mode 100644 include/asm-avr32/shmbuf.h create mode 100644 include/asm-avr32/shmparam.h create mode 100644 include/asm-avr32/sigcontext.h create mode 100644 include/asm-avr32/siginfo.h create mode 100644 include/asm-avr32/signal.h create mode 100644 include/asm-avr32/socket.h create mode 100644 include/asm-avr32/sockios.h create mode 100644 include/asm-avr32/stat.h create mode 100644 include/asm-avr32/statfs.h create mode 100644 include/asm-avr32/string.h create mode 100644 include/asm-avr32/sysreg.h create mode 100644 include/asm-avr32/system.h create mode 100644 include/asm-avr32/termbits.h create mode 100644 include/asm-avr32/termios.h create mode 100644 include/asm-avr32/thread_info.h create mode 100644 include/asm-avr32/timex.h create mode 100644 include/asm-avr32/tlb.h create mode 100644 include/asm-avr32/tlbflush.h create mode 100644 include/asm-avr32/topology.h create mode 100644 include/asm-avr32/traps.h create mode 100644 include/asm-avr32/types.h create mode 100644 include/asm-avr32/uaccess.h create mode 100644 include/asm-avr32/ucontext.h create mode 100644 include/asm-avr32/unaligned.h create mode 100644 include/asm-avr32/unistd.h create mode 100644 include/asm-avr32/user.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 23348c0d37bc..767a43434ae8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -443,6 +443,23 @@ W: http://people.redhat.com/sgrubb/audit/ T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git S: Maintained +AVR32 ARCHITECTURE +P: Atmel AVR32 Support Team +M: avr32@atmel.com +P: Haavard Skinnemoen +M: hskinnemoen@atmel.com +W: http://www.atmel.com/products/AVR32/ +W: http://avr32linux.org/ +W: http://avrfreaks.net/ +S: Supported + +AVR32/AT32AP MACHINE SUPPORT +P: Atmel AVR32 Support Team +M: avr32@atmel.com +P: Haavard Skinnemoen +M: hskinnemoen@atmel.com +S: Supported + AX.25 NETWORK LAYER P: Ralf Baechle M: ralf@linux-mips.org diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig new file mode 100644 index 000000000000..5f1694eea842 --- /dev/null +++ b/arch/avr32/Kconfig @@ -0,0 +1,196 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "Linux Kernel Configuration" + +config AVR32 + bool + default y + # With EMBEDDED=n, we get lots of stuff automatically selected + # that we usually don't need on AVR32. + select EMBEDDED + help + AVR32 is a high-performance 32-bit RISC microprocessor core, + designed for cost-sensitive embedded applications, with particular + emphasis on low power consumption and high code density. + + There is an AVR32 Linux project with a web page at + http://avr32linux.org/. + +config UID16 + bool + +config GENERIC_HARDIRQS + bool + default y + +config HARDIRQS_SW_RESEND + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config GENERIC_TIME + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + +config GENERIC_BUST_SPINLOCK + bool + +config GENERIC_HWEIGHT + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +source "init/Kconfig" + +menu "System Type and features" + +config SUBARCH_AVR32B + bool +config MMU + bool +config PERFORMANCE_COUNTERS + bool + +config PLATFORM_AT32AP + bool + select SUBARCH_AVR32B + select MMU + select PERFORMANCE_COUNTERS + +choice + prompt "AVR32 CPU type" + default CPU_AT32AP7000 + +config CPU_AT32AP7000 + bool "AT32AP7000" + select PLATFORM_AT32AP +endchoice + +# +# CPU Daughterboards for ATSTK1000 +config BOARD_ATSTK1002 + bool + +choice + prompt "AVR32 board type" + default BOARD_ATSTK1000 + +config BOARD_ATSTK1000 + bool "ATSTK1000 evaluation board" + select BOARD_ATSTK1002 if CPU_AT32AP7000 +endchoice + +choice + prompt "Boot loader type" + default LOADER_U_BOOT + +config LOADER_U_BOOT + bool "U-Boot (or similar) bootloader" +endchoice + +config LOAD_ADDRESS + hex + default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y + +config ENTRY_ADDRESS + hex + default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y + +config PHYS_OFFSET + hex + default 0x10000000 if CPU_AT32AP7000=y + +source "kernel/Kconfig.preempt" + +config HAVE_ARCH_BOOTMEM_NODE + bool + default n + +config ARCH_HAVE_MEMORY_PRESENT + bool + default n + +config NEED_NODE_MEMMAP_SIZE + bool + default n + +config ARCH_FLATMEM_ENABLE + bool + default y + +config ARCH_DISCONTIGMEM_ENABLE + bool + default n + +config ARCH_SPARSEMEM_ENABLE + bool + default n + +source "mm/Kconfig" + +config OWNERSHIP_TRACE + bool "Ownership trace support" + default y + help + Say Y to generate an Ownership Trace message on every context switch, + enabling Nexus-compliant debuggers to keep track of the PID of the + currently executing task. + +# FPU emulation goes here + +source "kernel/Kconfig.hz" + +config CMDLINE + string "Default kernel command line" + default "" + help + If you don't have a boot loader capable of passing a command line string + to the kernel, you may specify one here. As a minimum, you should specify + the memory size and the root device (e.g., mem=8M, root=/dev/nfs). + +endmenu + +menu "Bus options" + +config PCI + bool + +source "drivers/pci/Kconfig" + +source "drivers/pcmcia/Kconfig" + +endmenu + +menu "Executable file formats" +source "fs/Kconfig.binfmt" +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/avr32/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug new file mode 100644 index 000000000000..64ace00fe6cb --- /dev/null +++ b/arch/avr32/Kconfig.debug @@ -0,0 +1,19 @@ +menu "Kernel hacking" + +config TRACE_IRQFLAGS_SUPPORT + bool + default y + +source "lib/Kconfig.debug" + +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + +endmenu diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile new file mode 100644 index 000000000000..cefc95a73980 --- /dev/null +++ b/arch/avr32/Makefile @@ -0,0 +1,84 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2004-2006 Atmel Corporation. + +# Default target when executing plain make +.PHONY: all +all: uImage vmlinux.elf linux.lst + +KBUILD_DEFCONFIG := atstk1002_defconfig + +CFLAGS += -pipe -fno-builtin -mno-pic +AFLAGS += -mrelax -mno-pic +CFLAGS_MODULE += -mno-relax +LDFLAGS_vmlinux += --relax + +cpuflags-$(CONFIG_CPU_AP7000) += -mcpu=ap7000 + +CFLAGS += $(cpuflags-y) +AFLAGS += $(cpuflags-y) + +CHECKFLAGS += -D__avr32__ + +LIBGCC := $(shell $(CC) $(CFLAGS) -print-libgcc-file-name) + +head-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/head.o +head-y += arch/avr32/kernel/head.o +core-$(CONFIG_PLATFORM_AT32AP) += arch/avr32/mach-at32ap/ +core-$(CONFIG_BOARD_ATSTK1000) += arch/avr32/boards/atstk1000/ +core-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/ +core-y += arch/avr32/kernel/ +core-y += arch/avr32/mm/ +libs-y += arch/avr32/lib/ #$(LIBGCC) + +archincdir-$(CONFIG_PLATFORM_AT32AP) := arch-at32ap + +include/asm-avr32/.arch: $(wildcard include/config/platform/*.h) include/config/auto.conf + @echo ' SYMLINK include/asm-avr32/arch -> include/asm-avr32/$(archincdir-y)' +ifneq ($(KBUILD_SRC),) + $(Q)mkdir -p include/asm-avr32 + $(Q)ln -fsn $(srctree)/include/asm-avr32/$(archincdir-y) include/asm-avr32/arch +else + $(Q)ln -fsn $(archincdir-y) include/asm-avr32/arch +endif + @touch $@ + +archprepare: include/asm-avr32/.arch + +BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec + +.PHONY: $(BOOT_TARGETS) install + +boot := arch/$(ARCH)/boot/images + + KBUILD_IMAGE := $(boot)/uImage +vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf +vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso +uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec +uImage: KBUILD_IMAGE := $(boot)/uImage + +quiet_cmd_listing = LST $@ + cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@ +quiet_cmd_disasm = DIS $@ + cmd_disasm = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@ + +vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +install: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ + +linux.s: vmlinux + $(call if_changed,disasm) + +linux.lst: vmlinux + $(call if_changed,listing) + +define archhelp + @echo '* vmlinux.elf - ELF image with load address 0' + @echo ' vmlinux.cso - PathFinder CSO image' + @echo ' uImage - Create a bootable image for U-Boot' +endef diff --git a/arch/avr32/boards/atstk1000/Makefile b/arch/avr32/boards/atstk1000/Makefile new file mode 100644 index 000000000000..c3e36ece8651 --- /dev/null +++ b/arch/avr32/boards/atstk1000/Makefile @@ -0,0 +1,2 @@ +obj-y += setup.o spi.o +obj-$(CONFIG_BOARD_ATSTK1002) += atstk1002.o diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c new file mode 100644 index 000000000000..49164e9aadd6 --- /dev/null +++ b/arch/avr32/boards/atstk1000/atstk1002.c @@ -0,0 +1,37 @@ +/* + * ATSTK1002 daughterboard-specific init code + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +#include + +struct eth_platform_data __initdata eth0_data = { + .valid = 1, + .mii_phy_addr = 0x10, + .is_rmii = 0, + .hw_addr = { 0x6a, 0x87, 0x71, 0x14, 0xcd, 0xcb }, +}; + +extern struct lcdc_platform_data atstk1000_fb0_data; + +static int __init atstk1002_init(void) +{ + at32_add_system_devices(); + + at32_add_device_usart(1); /* /dev/ttyS0 */ + at32_add_device_usart(2); /* /dev/ttyS1 */ + at32_add_device_usart(3); /* /dev/ttyS2 */ + + at32_add_device_eth(0, ð0_data); + at32_add_device_spi(0); + at32_add_device_lcdc(0, &atstk1000_fb0_data); + + return 0; +} +postcore_initcall(atstk1002_init); diff --git a/arch/avr32/boards/atstk1000/setup.c b/arch/avr32/boards/atstk1000/setup.c new file mode 100644 index 000000000000..191ab85de9a3 --- /dev/null +++ b/arch/avr32/boards/atstk1000/setup.c @@ -0,0 +1,59 @@ +/* + * ATSTK1000 board-specific setup code. + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include + +#include + +/* Initialized by bootloader-specific startup code. */ +struct tag *bootloader_tags __initdata; + +struct lcdc_platform_data __initdata atstk1000_fb0_data; + +asmlinkage void __init board_early_init(void) +{ + extern void sdram_init(void); + +#ifdef CONFIG_LOADER_STANDALONE + sdram_init(); +#endif +} + +void __init board_setup_fbmem(unsigned long fbmem_start, + unsigned long fbmem_size) +{ + if (!fbmem_size) + return; + + if (!fbmem_start) { + void *fbmem; + + fbmem = alloc_bootmem_low_pages(fbmem_size); + fbmem_start = __pa(fbmem); + } else { + pg_data_t *pgdat; + + for_each_online_pgdat(pgdat) { + if (fbmem_start >= pgdat->bdata->node_boot_start + && fbmem_start <= pgdat->bdata->node_low_pfn) + reserve_bootmem_node(pgdat, fbmem_start, + fbmem_size); + } + } + + printk("%luKiB framebuffer memory at address 0x%08lx\n", + fbmem_size >> 10, fbmem_start); + atstk1000_fb0_data.fbmem_start = fbmem_start; + atstk1000_fb0_data.fbmem_size = fbmem_size; +} diff --git a/arch/avr32/boards/atstk1000/spi.c b/arch/avr32/boards/atstk1000/spi.c new file mode 100644 index 000000000000..567726c82c6e --- /dev/null +++ b/arch/avr32/boards/atstk1000/spi.c @@ -0,0 +1,27 @@ +/* + * ATSTK1000 SPI devices + * + * Copyright (C) 2005 Atmel Norway + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +static struct spi_board_info spi_board_info[] __initdata = { + { + .modalias = "ltv350qv", + .max_speed_hz = 16000000, + .bus_num = 0, + .chip_select = 1, + }, +}; + +static int board_init_spi(void) +{ + spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info)); + return 0; +} +arch_initcall(board_init_spi); diff --git a/arch/avr32/boot/images/Makefile b/arch/avr32/boot/images/Makefile new file mode 100644 index 000000000000..ccd74eeecec3 --- /dev/null +++ b/arch/avr32/boot/images/Makefile @@ -0,0 +1,62 @@ +# +# Copyright (C) 2004-2006 Atmel Corporation +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# + +MKIMAGE := $(srctree)/scripts/mkuboot.sh + +extra-y := vmlinux.bin vmlinux.gz + +OBJCOPYFLAGS_vmlinux.bin := -O binary +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +quiet_cmd_uimage = UIMAGE $@ + cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A avr32 -O linux -T kernel \ + -C gzip -a $(CONFIG_LOAD_ADDRESS) -e $(CONFIG_ENTRY_ADDRESS) \ + -n 'Linux-$(KERNELRELEASE)' -d $< $@ + +targets += uImage uImage.srec +$(obj)/uImage: $(obj)/vmlinux.gz + $(call if_changed,uimage) + @echo ' Image $@ is ready' + +OBJCOPYFLAGS_uImage.srec := -I binary -O srec +$(obj)/uImage.srec: $(obj)/uImage + $(call if_changed,objcopy) + +OBJCOPYFLAGS_vmlinux.elf := --change-section-lma .text-0x80000000 \ + --change-section-lma __ex_table-0x80000000 \ + --change-section-lma .rodata-0x80000000 \ + --change-section-lma .data-0x80000000 \ + --change-section-lma .init-0x80000000 \ + --change-section-lma .bss-0x80000000 \ + --change-section-lma .initrd-0x80000000 \ + --change-section-lma __param-0x80000000 \ + --change-section-lma __ksymtab-0x80000000 \ + --change-section-lma __ksymtab_gpl-0x80000000 \ + --change-section-lma __kcrctab-0x80000000 \ + --change-section-lma __kcrctab_gpl-0x80000000 \ + --change-section-lma __ksymtab_strings-0x80000000 \ + --change-section-lma .got-0x80000000 \ + --set-start 0xa0000000 +$(obj)/vmlinux.elf: vmlinux FORCE + $(call if_changed,objcopy) + +quiet_cmd_sfdwarf = SFDWARF $@ + cmd_sfdwarf = sfdwarf $< TO $@ GNUAVR IW $(SFDWARF_FLAGS) > $(obj)/sfdwarf.log + +$(obj)/vmlinux.cso: $(obj)/vmlinux.elf FORCE + $(call if_changed,sfdwarf) + +install: $(BOOTIMAGE) + sh $(srctree)/install-kernel.sh $< + +# Generated files to be removed upon make clean +clean-files := vmlinux* uImage uImage.srec diff --git a/arch/avr32/boot/u-boot/Makefile b/arch/avr32/boot/u-boot/Makefile new file mode 100644 index 000000000000..125ddc96c275 --- /dev/null +++ b/arch/avr32/boot/u-boot/Makefile @@ -0,0 +1,3 @@ +extra-y := head.o + +obj-y := empty.o diff --git a/arch/avr32/boot/u-boot/empty.S b/arch/avr32/boot/u-boot/empty.S new file mode 100644 index 000000000000..8ac91a5f12f0 --- /dev/null +++ b/arch/avr32/boot/u-boot/empty.S @@ -0,0 +1 @@ +/* Empty file */ diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S new file mode 100644 index 000000000000..4488fa27fe94 --- /dev/null +++ b/arch/avr32/boot/u-boot/head.S @@ -0,0 +1,60 @@ +/* + * Startup code for use with the u-boot bootloader. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + + /* + * The kernel is loaded where we want it to be and all caches + * have just been flushed. We get two parameters from u-boot: + * + * r12 contains a magic number (ATAG_MAGIC) + * r11 points to a tag table providing information about + * the system. + */ + .section .init.text,"ax" + .global _start +_start: + /* Check if the boot loader actually provided a tag table */ + lddpc r0, magic_number + cp.w r12, r0 + brne no_tag_table + + /* Initialize .bss */ + lddpc r2, bss_start_addr + lddpc r3, end_addr + mov r0, 0 + mov r1, 0 +1: st.d r2++, r0 + cp r2, r3 + brlo 1b + + /* + * Save the tag table address for later use. This must be done + * _after_ .bss has been initialized... + */ + lddpc r0, tag_table_addr + st.w r0[0], r11 + + /* Jump to loader-independent setup code */ + rjmp kernel_entry + + .align 2 +magic_number: + .long ATAG_MAGIC +tag_table_addr: + .long bootloader_tags +bss_start_addr: + .long __bss_start +end_addr: + .long _end + +no_tag_table: + sub r12, pc, (. - 2f) + bral panic +2: .asciz "Boot loader didn't provide correct magic number\n" diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig new file mode 100644 index 000000000000..1d22255009fd --- /dev/null +++ b/arch/avr32/configs/atstk1002_defconfig @@ -0,0 +1,754 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.18-rc1 +# Tue Jul 11 12:41:36 2006 +# +CONFIG_AVR32=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +# CONFIG_IKCONFIG is not set +# CONFIG_RELAY is not set +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_EMBEDDED=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +# CONFIG_BASE_FULL is not set +# CONFIG_FUTEX is not set +# CONFIG_EPOLL is not set +CONFIG_SHMEM=y +# CONFIG_SLAB is not set +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=1 +CONFIG_SLOB=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set + +# +# Block layer +# +# CONFIG_BLK_DEV_IO_TRACE is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" + +# +# System Type and features +# +CONFIG_SUBARCH_AVR32B=y +CONFIG_MMU=y +CONFIG_PERFORMANCE_COUNTERS=y +CONFIG_PLATFORM_AT32AP=y +CONFIG_CPU_AT32AP7000=y +CONFIG_BOARD_ATSTK1002=y +CONFIG_BOARD_ATSTK1000=y +CONFIG_LOADER_U_BOOT=y +CONFIG_LOAD_ADDRESS=0x10000000 +CONFIG_ENTRY_ADDRESS=0x90000000 +CONFIG_PHYS_OFFSET=0x10000000 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_HAVE_ARCH_BOOTMEM_NODE is not set +# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set +# CONFIG_NEED_NODE_MEMMAP_SIZE is not set +CONFIG_ARCH_FLATMEM_ENABLE=y +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +# CONFIG_ARCH_SPARSEMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +# CONFIG_OWNERSHIP_TRACE is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_CMDLINE="" + +# +# Bus options +# + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NETDEBUG is not set +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y +# CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# + +# +# I2O device support +# + +# +# Network device support +# +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# PHY device support +# +# CONFIG_PHYLIB is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +CONFIG_MACB=y + +# +# Ethernet (1000 Mbit) +# + +# +# Ethernet (10000 Mbit) +# + +# +# Token Ring devices +# + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +CONFIG_PPP_ASYNC=m +# CONFIG_PPP_SYNC_TTY is not set +CONFIG_PPP_DEFLATE=m +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set +# CONFIG_PPPOE is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_AT91=y +CONFIG_SERIAL_AT91_CONSOLE=y +# CONFIG_SERIAL_AT91_TTYAT is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# SPI support +# +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +CONFIG_SPI_ATMEL=m +# CONFIG_SPI_BITBANG is not set + +# +# SPI Protocol Masters +# + +# +# Dallas's 1-wire bus +# + +# +# Hardware Monitoring support +# +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FIRMWARE_EDID is not set +CONFIG_FB=m +CONFIG_FB_CFB_FILLRECT=m +CONFIG_FB_CFB_COPYAREA=m +CONFIG_FB_CFB_IMAGEBLIT=m +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set +CONFIG_FB_SIDSA=m +CONFIG_FB_SIDSA_DEFAULT_BPP=24 +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Logo configuration +# +# CONFIG_LOGO is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +CONFIG_LCD_CLASS_DEVICE=m +CONFIG_LCD_DEVICE=y +CONFIG_LCD_LTV350QV=m + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB_ARCH_HAS_HCD is not set +# CONFIG_USB_ARCH_HAS_OHCI is not set +# CONFIG_USB_ARCH_HAS_EHCI is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# + +# +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT3_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +CONFIG_MINIX_FS=m +CONFIG_ROMFS_FS=m +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_DNOTIFY is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=m +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +CONFIG_NLS_CODEPAGE_850=m +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=m +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=m + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_PRINTK_TIME=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_RWSEMS is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +CONFIG_FRAME_POINTER=y +# CONFIG_UNWIND_INFO is not set +CONFIG_FORCED_INLINING=y +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_KPROBES=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Hardware crypto devices +# + +# +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=m +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile new file mode 100644 index 000000000000..90e5afff54a2 --- /dev/null +++ b/arch/avr32/kernel/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for the Linux/AVR32 kernel. +# + +extra-y := head.o vmlinux.lds + +obj-$(CONFIG_SUBARCH_AVR32B) += entry-avr32b.o +obj-y += syscall_table.o syscall-stubs.o irq.o +obj-y += setup.o traps.o semaphore.o ptrace.o +obj-y += signal.o sys_avr32.o process.o time.o +obj-y += init_task.o switch_to.o cpu.o +obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o +obj-$(CONFIG_KPROBES) += kprobes.o + +USE_STANDARD_AS_RULE := true + +%.lds: %.lds.c FORCE + $(call if_changed_dep,cpp_lds_S) diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c new file mode 100644 index 000000000000..97d865865667 --- /dev/null +++ b/arch/avr32/kernel/asm-offsets.c @@ -0,0 +1,25 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#include + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(TI_task, thread_info, task); + OFFSET(TI_exec_domain, thread_info, exec_domain); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_cpu, thread_info, cpu); + OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_restart_block, thread_info, restart_block); +} diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c new file mode 100644 index 000000000000..04f767a272b7 --- /dev/null +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -0,0 +1,55 @@ +/* + * Export AVR32-specific functions for loadable modules. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +#include +#include +#include + +/* + * GCC functions + */ +extern unsigned long long __avr32_lsl64(unsigned long long u, unsigned long b); +extern unsigned long long __avr32_lsr64(unsigned long long u, unsigned long b); +extern unsigned long long __avr32_asr64(unsigned long long u, unsigned long b); +EXPORT_SYMBOL(__avr32_lsl64); +EXPORT_SYMBOL(__avr32_lsr64); +EXPORT_SYMBOL(__avr32_asr64); + +/* + * String functions + */ +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); + +/* + * Userspace access stuff. + */ +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); + +/* Delay loops (lib/delay.S) */ +EXPORT_SYMBOL(__ndelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__const_udelay); + +/* Bit operations (lib/findbit.S) */ +EXPORT_SYMBOL(find_first_zero_bit); +EXPORT_SYMBOL(find_next_zero_bit); +EXPORT_SYMBOL(find_first_bit); +EXPORT_SYMBOL(find_next_bit); +EXPORT_SYMBOL(generic_find_next_zero_le_bit); diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c new file mode 100644 index 000000000000..342452ba2049 --- /dev/null +++ b/arch/avr32/kernel/cpu.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +#ifdef CONFIG_PERFORMANCE_COUNTERS + +/* + * XXX: If/when a SMP-capable implementation of AVR32 will ever be + * made, we must make sure that the code executes on the correct CPU. + */ +static ssize_t show_pc0event(struct sys_device *dev, char *buf) +{ + unsigned long pccr; + + pccr = sysreg_read(PCCR); + return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f); +} +static ssize_t store_pc0event(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf || val > 0x3f) + return -EINVAL; + val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff); + sysreg_write(PCCR, val); + return count; +} +static ssize_t show_pc0count(struct sys_device *dev, char *buf) +{ + unsigned long pcnt0; + + pcnt0 = sysreg_read(PCNT0); + return sprintf(buf, "%lu\n", pcnt0); +} +static ssize_t store_pc0count(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + sysreg_write(PCNT0, val); + + return count; +} + +static ssize_t show_pc1event(struct sys_device *dev, char *buf) +{ + unsigned long pccr; + + pccr = sysreg_read(PCCR); + return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f); +} +static ssize_t store_pc1event(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf || val > 0x3f) + return -EINVAL; + val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff); + sysreg_write(PCCR, val); + return count; +} +static ssize_t show_pc1count(struct sys_device *dev, char *buf) +{ + unsigned long pcnt1; + + pcnt1 = sysreg_read(PCNT1); + return sprintf(buf, "%lu\n", pcnt1); +} +static ssize_t store_pc1count(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + sysreg_write(PCNT1, val); + + return count; +} + +static ssize_t show_pccycles(struct sys_device *dev, char *buf) +{ + unsigned long pccnt; + + pccnt = sysreg_read(PCCNT); + return sprintf(buf, "%lu\n", pccnt); +} +static ssize_t store_pccycles(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + sysreg_write(PCCNT, val); + + return count; +} + +static ssize_t show_pcenable(struct sys_device *dev, char *buf) +{ + unsigned long pccr; + + pccr = sysreg_read(PCCR); + return sprintf(buf, "%c\n", (pccr & 1)?'1':'0'); +} +static ssize_t store_pcenable(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long pccr, val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + if (val) + val = 1; + + pccr = sysreg_read(PCCR); + pccr = (pccr & ~1UL) | val; + sysreg_write(PCCR, pccr); + + return count; +} + +static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event); +static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count); +static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event); +static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count); +static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles); +static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable); + +#endif /* CONFIG_PERFORMANCE_COUNTERS */ + +static int __init topology_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu *c = &per_cpu(cpu_devices, cpu); + + register_cpu(c, cpu); + +#ifdef CONFIG_PERFORMANCE_COUNTERS + sysdev_create_file(&c->sysdev, &attr_pc0event); + sysdev_create_file(&c->sysdev, &attr_pc0count); + sysdev_create_file(&c->sysdev, &attr_pc1event); + sysdev_create_file(&c->sysdev, &attr_pc1count); + sysdev_create_file(&c->sysdev, &attr_pccycles); + sysdev_create_file(&c->sysdev, &attr_pcenable); +#endif + } + + return 0; +} + +subsys_initcall(topology_init); + +static const char *cpu_names[] = { + "Morgan", + "AP7000", +}; +#define NR_CPU_NAMES ARRAY_SIZE(cpu_names) + +static const char *arch_names[] = { + "AVR32A", + "AVR32B", +}; +#define NR_ARCH_NAMES ARRAY_SIZE(arch_names) + +static const char *mmu_types[] = { + "No MMU", + "ITLB and DTLB", + "Shared TLB", + "MPU" +}; + +void __init setup_processor(void) +{ + unsigned long config0, config1; + unsigned cpu_id, cpu_rev, arch_id, arch_rev, mmu_type; + unsigned tmp; + + config0 = sysreg_read(CONFIG0); /* 0x0000013e; */ + config1 = sysreg_read(CONFIG1); /* 0x01f689a2; */ + cpu_id = config0 >> 24; + cpu_rev = (config0 >> 16) & 0xff; + arch_id = (config0 >> 13) & 0x07; + arch_rev = (config0 >> 10) & 0x07; + mmu_type = (config0 >> 7) & 0x03; + + boot_cpu_data.arch_type = arch_id; + boot_cpu_data.cpu_type = cpu_id; + boot_cpu_data.arch_revision = arch_rev; + boot_cpu_data.cpu_revision = cpu_rev; + boot_cpu_data.tlb_config = mmu_type; + + tmp = (config1 >> 13) & 0x07; + if (tmp) { + boot_cpu_data.icache.ways = 1 << ((config1 >> 10) & 0x07); + boot_cpu_data.icache.sets = 1 << ((config1 >> 16) & 0x0f); + boot_cpu_data.icache.linesz = 1 << (tmp + 1); + } + tmp = (config1 >> 3) & 0x07; + if (tmp) { + boot_cpu_data.dcache.ways = 1 << (config1 & 0x07); + boot_cpu_data.dcache.sets = 1 << ((config1 >> 6) & 0x0f); + boot_cpu_data.dcache.linesz = 1 << (tmp + 1); + } + + if ((cpu_id >= NR_CPU_NAMES) || (arch_id >= NR_ARCH_NAMES)) { + printk ("Unknown CPU configuration (ID %02x, arch %02x), " + "continuing anyway...\n", + cpu_id, arch_id); + return; + } + + printk ("CPU: %s [%02x] revision %d (%s revision %d)\n", + cpu_names[cpu_id], cpu_id, cpu_rev, + arch_names[arch_id], arch_rev); + printk ("CPU: MMU configuration: %s\n", mmu_types[mmu_type]); + printk ("CPU: features:"); + if (config0 & (1 << 6)) + printk(" fpu"); + if (config0 & (1 << 5)) + printk(" java"); + if (config0 & (1 << 4)) + printk(" perfctr"); + if (config0 & (1 << 3)) + printk(" ocd"); + printk("\n"); +} + +#ifdef CONFIG_PROC_FS +static int c_show(struct seq_file *m, void *v) +{ + unsigned int icache_size, dcache_size; + unsigned int cpu = smp_processor_id(); + + icache_size = boot_cpu_data.icache.ways * + boot_cpu_data.icache.sets * + boot_cpu_data.icache.linesz; + dcache_size = boot_cpu_data.dcache.ways * + boot_cpu_data.dcache.sets * + boot_cpu_data.dcache.linesz; + + seq_printf(m, "processor\t: %d\n", cpu); + + if (boot_cpu_data.arch_type < NR_ARCH_NAMES) + seq_printf(m, "cpu family\t: %s revision %d\n", + arch_names[boot_cpu_data.arch_type], + boot_cpu_data.arch_revision); + if (boot_cpu_data.cpu_type < NR_CPU_NAMES) + seq_printf(m, "cpu type\t: %s revision %d\n", + cpu_names[boot_cpu_data.cpu_type], + boot_cpu_data.cpu_revision); + + seq_printf(m, "i-cache\t\t: %dK (%u ways x %u sets x %u)\n", + icache_size >> 10, + boot_cpu_data.icache.ways, + boot_cpu_data.icache.sets, + boot_cpu_data.icache.linesz); + seq_printf(m, "d-cache\t\t: %dK (%u ways x %u sets x %u)\n", + dcache_size >> 10, + boot_cpu_data.dcache.ways, + boot_cpu_data.dcache.sets, + boot_cpu_data.dcache.linesz); + seq_printf(m, "bogomips\t: %lu.%02lu\n", + boot_cpu_data.loops_per_jiffy / (500000/HZ), + (boot_cpu_data.loops_per_jiffy / (5000/HZ)) % 100); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ + +} + +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; +#endif /* CONFIG_PROC_FS */ diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S new file mode 100644 index 000000000000..eeb66792bc37 --- /dev/null +++ b/arch/avr32/kernel/entry-avr32b.S @@ -0,0 +1,678 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * This file contains the low-level entry-points into the kernel, that is, + * exception handlers, debug trap handlers, interrupt handlers and the + * system call handler. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PREEMPT +# define preempt_stop mask_interrupts +#else +# define preempt_stop +# define fault_resume_kernel fault_restore_all +#endif + +#define __MASK(x) ((1 << (x)) - 1) +#define IRQ_MASK ((__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | \ + (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)) + + .section .ex.text,"ax",@progbits + .align 2 +exception_vectors: + bral handle_critical + .align 2 + bral handle_critical + .align 2 + bral do_bus_error_write + .align 2 + bral do_bus_error_read + .align 2 + bral do_nmi_ll + .align 2 + bral handle_address_fault + .align 2 + bral handle_protection_fault + .align 2 + bral handle_debug + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral do_fpe_ll + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral handle_address_fault + .align 2 + bral handle_address_fault + .align 2 + bral handle_protection_fault + .align 2 + bral handle_protection_fault + .align 2 + bral do_dtlb_modified + + /* + * r0 : PGD/PT/PTE + * r1 : Offending address + * r2 : Scratch register + * r3 : Cause (5, 12 or 13) + */ +#define tlbmiss_save pushm r0-r3 +#define tlbmiss_restore popm r0-r3 + + .section .tlbx.ex.text,"ax",@progbits + .global itlb_miss +itlb_miss: + tlbmiss_save + rjmp tlb_miss_common + + .section .tlbr.ex.text,"ax",@progbits +dtlb_miss_read: + tlbmiss_save + rjmp tlb_miss_common + + .section .tlbw.ex.text,"ax",@progbits +dtlb_miss_write: + tlbmiss_save + + .global tlb_miss_common +tlb_miss_common: + mfsr r0, SYSREG_PTBR + mfsr r1, SYSREG_TLBEAR + + /* Is it the vmalloc space? */ + bld r1, 31 + brcs handle_vmalloc_miss + + /* First level lookup */ +pgtbl_lookup: + lsr r2, r1, PGDIR_SHIFT + ld.w r0, r0[r2 << 2] + bld r0, _PAGE_BIT_PRESENT + brcc page_table_not_present + + /* TODO: Check access rights on page table if necessary */ + + /* Translate to virtual address in P1. */ + andl r0, 0xf000 + sbr r0, 31 + + /* Second level lookup */ + lsl r1, (32 - PGDIR_SHIFT) + lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT + add r2, r0, r1 << 2 + ld.w r1, r2[0] + bld r1, _PAGE_BIT_PRESENT + brcc page_not_present + + /* Mark the page as accessed */ + sbr r1, _PAGE_BIT_ACCESSED + st.w r2[0], r1 + + /* Drop software flags */ + andl r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff + mtsr SYSREG_TLBELO, r1 + + /* Figure out which entry we want to replace */ + mfsr r0, SYSREG_TLBARLO + clz r2, r0 + brcc 1f + mov r1, -1 /* All entries have been accessed, */ + mtsr SYSREG_TLBARLO, r1 /* so reset TLBAR */ + mov r2, 0 /* and start at 0 */ +1: mfsr r1, SYSREG_MMUCR + lsl r2, 14 + andl r1, 0x3fff, COH + or r1, r2 + mtsr SYSREG_MMUCR, r1 + + tlbw + + tlbmiss_restore + rete + +handle_vmalloc_miss: + /* Simply do the lookup in init's page table */ + mov r0, lo(swapper_pg_dir) + orh r0, hi(swapper_pg_dir) + rjmp pgtbl_lookup + + + /* --- System Call --- */ + + .section .scall.text,"ax",@progbits +system_call: + pushm r12 /* r12_orig */ + stmts --sp, r0-lr + zero_fp + mfsr r0, SYSREG_RAR_SUP + mfsr r1, SYSREG_RSR_SUP + stm --sp, r0-r1 + + /* check for syscall tracing */ + get_thread_info r0 + ld.w r1, r0[TI_flags] + bld r1, TIF_SYSCALL_TRACE + brcs syscall_trace_enter + +syscall_trace_cont: + cp.w r8, NR_syscalls + brhs syscall_badsys + + lddpc lr, syscall_table_addr + ld.w lr, lr[r8 << 2] + mov r8, r5 /* 5th argument (6th is pushed by stub) */ + icall lr + + .global syscall_return +syscall_return: + get_thread_info r0 + mask_interrupts /* make sure we don't miss an interrupt + setting need_resched or sigpending + between sampling and the rets */ + + /* Store the return value so that the correct value is loaded below */ + stdsp sp[REG_R12], r12 + + ld.w r1, r0[TI_flags] + andl r1, _TIF_ALLWORK_MASK, COH + brne syscall_exit_work + +syscall_exit_cont: + popm r8-r9 + mtsr SYSREG_RAR_SUP, r8 + mtsr SYSREG_RSR_SUP, r9 + ldmts sp++, r0-lr + sub sp, -4 /* r12_orig */ + rets + + .align 2 +syscall_table_addr: + .long sys_call_table + +syscall_badsys: + mov r12, -ENOSYS + rjmp syscall_return + + .global ret_from_fork +ret_from_fork: + rcall schedule_tail + + /* check for syscall tracing */ + get_thread_info r0 + ld.w r1, r0[TI_flags] + andl r1, _TIF_ALLWORK_MASK, COH + brne syscall_exit_work + rjmp syscall_exit_cont + +syscall_trace_enter: + pushm r8-r12 + rcall syscall_trace + popm r8-r12 + rjmp syscall_trace_cont + +syscall_exit_work: + bld r1, TIF_SYSCALL_TRACE + brcc 1f + unmask_interrupts + rcall syscall_trace + mask_interrupts + ld.w r1, r0[TI_flags] + +1: bld r1, TIF_NEED_RESCHED + brcc 2f + unmask_interrupts + rcall schedule + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK + tst r1, r2 + breq 3f + unmask_interrupts + mov r12, sp + mov r11, r0 + rcall do_notify_resume + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +3: bld r1, TIF_BREAKPOINT + brcc syscall_exit_cont + mfsr r3, SYSREG_TLBEHI + lddsp r2, sp[REG_PC] + andl r3, 0xff, COH + lsl r3, 1 + sbr r3, 30 + sbr r3, 0 + mtdr DBGREG_BWA2A, r2 + mtdr DBGREG_BWC2A, r3 + rjmp syscall_exit_cont + + + /* The slow path of the TLB miss handler */ +page_table_not_present: +page_not_present: + tlbmiss_restore + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_page_fault + rjmp ret_from_exception + + /* This function expects to find offending PC in SYSREG_RAR_EX */ +save_full_context_ex: + mfsr r8, SYSREG_RSR_EX + mov r12, r8 + andh r8, (MODE_MASK >> 16), COH + mfsr r11, SYSREG_RAR_EX + brne 2f + +1: pushm r11, r12 /* PC and SR */ + unmask_exceptions + ret r12 + +2: sub r10, sp, -(FRAME_SIZE_FULL - REG_LR) + stdsp sp[4], r10 /* replace saved SP */ + rjmp 1b + + /* Low-level exception handlers */ +handle_critical: + pushm r12 + pushm r0-r12 + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_critical_exception + + /* We should never get here... */ +bad_return: + sub r12, pc, (. - 1f) + bral panic + .align 2 +1: .asciz "Return from critical exception!" + + .align 1 +do_bus_error_write: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mov r11, 1 + rjmp 1f + +do_bus_error_read: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mov r11, 0 +1: mfsr r12, SYSREG_BEAR + mov r10, sp + rcall do_bus_error + rjmp ret_from_exception + + .align 1 +do_nmi_ll: + sub sp, 4 + stmts --sp, r0-lr + /* FIXME: Make sure RAR_NMI and RSR_NMI are pushed instead of *_EX */ + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_nmi + rjmp bad_return + +handle_address_fault: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_address_exception + rjmp ret_from_exception + +handle_protection_fault: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_page_fault + rjmp ret_from_exception + + .align 1 +do_illegal_opcode_ll: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_illegal_opcode + rjmp ret_from_exception + +do_dtlb_modified: + pushm r0-r3 + mfsr r1, SYSREG_TLBEAR + mfsr r0, SYSREG_PTBR + lsr r2, r1, PGDIR_SHIFT + ld.w r0, r0[r2 << 2] + lsl r1, (32 - PGDIR_SHIFT) + lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT + + /* Translate to virtual address in P1 */ + andl r0, 0xf000 + sbr r0, 31 + add r2, r0, r1 << 2 + ld.w r3, r2[0] + sbr r3, _PAGE_BIT_DIRTY + mov r0, r3 + st.w r2[0], r3 + + /* The page table is up-to-date. Update the TLB entry as well */ + andl r0, lo(_PAGE_FLAGS_HARDWARE_MASK) + mtsr SYSREG_TLBELO, r0 + + /* MMUCR[DRP] is updated automatically, so let's go... */ + tlbw + + popm r0-r3 + rete + +do_fpe_ll: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + unmask_interrupts + mov r12, 26 + mov r11, sp + rcall do_fpe + rjmp ret_from_exception + +ret_from_exception: + mask_interrupts + lddsp r4, sp[REG_SR] + andh r4, (MODE_MASK >> 16), COH + brne fault_resume_kernel + + get_thread_info r0 + ld.w r1, r0[TI_flags] + andl r1, _TIF_WORK_MASK, COH + brne fault_exit_work + +fault_resume_user: + popm r8-r9 + mask_exceptions + mtsr SYSREG_RAR_EX, r8 + mtsr SYSREG_RSR_EX, r9 + ldmts sp++, r0-lr + sub sp, -4 + rete + +fault_resume_kernel: +#ifdef CONFIG_PREEMPT + get_thread_info r0 + ld.w r2, r0[TI_preempt_count] + cp.w r2, 0 + brne 1f + ld.w r1, r0[TI_flags] + bld r1, TIF_NEED_RESCHED + brcc 1f + lddsp r4, sp[REG_SR] + bld r4, SYSREG_GM_OFFSET + brcs 1f + rcall preempt_schedule_irq +1: +#endif + + popm r8-r9 + mask_exceptions + mfsr r1, SYSREG_SR + mtsr SYSREG_RAR_EX, r8 + mtsr SYSREG_RSR_EX, r9 + popm lr + sub sp, -4 /* ignore SP */ + popm r0-r12 + sub sp, -4 /* ignore r12_orig */ + rete + +irq_exit_work: + /* Switch to exception mode so that we can share the same code. */ + mfsr r8, SYSREG_SR + cbr r8, SYSREG_M0_OFFSET + orh r8, hi(SYSREG_BIT(M1) | SYSREG_BIT(M2)) + mtsr SYSREG_SR, r8 + sub pc, -2 + get_thread_info r0 + ld.w r1, r0[TI_flags] + +fault_exit_work: + bld r1, TIF_NEED_RESCHED + brcc 1f + unmask_interrupts + rcall schedule + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp fault_exit_work + +1: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK + tst r1, r2 + breq 2f + unmask_interrupts + mov r12, sp + mov r11, r0 + rcall do_notify_resume + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp fault_exit_work + +2: bld r1, TIF_BREAKPOINT + brcc fault_resume_user + mfsr r3, SYSREG_TLBEHI + lddsp r2, sp[REG_PC] + andl r3, 0xff, COH + lsl r3, 1 + sbr r3, 30 + sbr r3, 0 + mtdr DBGREG_BWA2A, r2 + mtdr DBGREG_BWC2A, r3 + rjmp fault_resume_user + + /* If we get a debug trap from privileged context we end up here */ +handle_debug_priv: + /* Fix up LR and SP in regs. r11 contains the mode we came from */ + mfsr r8, SYSREG_SR + mov r9, r8 + andh r8, hi(~MODE_MASK) + or r8, r11 + mtsr SYSREG_SR, r8 + sub pc, -2 + stdsp sp[REG_LR], lr + mtsr SYSREG_SR, r9 + sub pc, -2 + sub r10, sp, -FRAME_SIZE_FULL + stdsp sp[REG_SP], r10 + mov r12, sp + rcall do_debug_priv + + /* Now, put everything back */ + ssrf SR_EM_BIT + popm r10, r11 + mtsr SYSREG_RAR_DBG, r10 + mtsr SYSREG_RSR_DBG, r11 + mfsr r8, SYSREG_SR + mov r9, r8 + andh r8, hi(~MODE_MASK) + andh r11, hi(MODE_MASK) + or r8, r11 + mtsr SYSREG_SR, r8 + sub pc, -2 + popm lr + mtsr SYSREG_SR, r9 + sub pc, -2 + sub sp, -4 /* skip SP */ + popm r0-r12 + sub sp, -4 + retd + + /* + * At this point, everything is masked, that is, interrupts, + * exceptions and debugging traps. We might get called from + * interrupt or exception context in some rare cases, but this + * will be taken care of by do_debug(), so we're not going to + * do a 100% correct context save here. + */ +handle_debug: + sub sp, 4 /* r12_orig */ + stmts --sp, r0-lr + mfsr r10, SYSREG_RAR_DBG + mfsr r11, SYSREG_RSR_DBG + unmask_exceptions + pushm r10,r11 + andh r11, (MODE_MASK >> 16), COH + brne handle_debug_priv + + mov r12, sp + rcall do_debug + + lddsp r10, sp[REG_SR] + andh r10, (MODE_MASK >> 16), COH + breq debug_resume_user + +debug_restore_all: + popm r10,r11 + mask_exceptions + mtsr SYSREG_RSR_DBG, r11 + mtsr SYSREG_RAR_DBG, r10 + ldmts sp++, r0-lr + sub sp, -4 + retd + +debug_resume_user: + get_thread_info r0 + mask_interrupts + + ld.w r1, r0[TI_flags] + andl r1, _TIF_DBGWORK_MASK, COH + breq debug_restore_all + +1: bld r1, TIF_NEED_RESCHED + brcc 2f + unmask_interrupts + rcall schedule + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK + tst r1, r2 + breq 3f + unmask_interrupts + mov r12, sp + mov r11, r0 + rcall do_notify_resume + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +3: bld r1, TIF_SINGLE_STEP + brcc debug_restore_all + mfdr r2, DBGREG_DC + sbr r2, DC_SS_BIT + mtdr DBGREG_DC, r2 + rjmp debug_restore_all + + .set rsr_int0, SYSREG_RSR_INT0 + .set rsr_int1, SYSREG_RSR_INT1 + .set rsr_int2, SYSREG_RSR_INT2 + .set rsr_int3, SYSREG_RSR_INT3 + .set rar_int0, SYSREG_RAR_INT0 + .set rar_int1, SYSREG_RAR_INT1 + .set rar_int2, SYSREG_RAR_INT2 + .set rar_int3, SYSREG_RAR_INT3 + + .macro IRQ_LEVEL level + .type irq_level\level, @function +irq_level\level: + sub sp, 4 /* r12_orig */ + stmts --sp,r0-lr + mfsr r8, rar_int\level + mfsr r9, rsr_int\level + pushm r8-r9 + + mov r11, sp + mov r12, \level + + rcall do_IRQ + + lddsp r4, sp[REG_SR] + andh r4, (MODE_MASK >> 16), COH +#ifdef CONFIG_PREEMPT + brne 2f +#else + brne 1f +#endif + + get_thread_info r0 + ld.w r1, r0[TI_flags] + andl r1, _TIF_WORK_MASK, COH + brne irq_exit_work + +1: popm r8-r9 + mtsr rar_int\level, r8 + mtsr rsr_int\level, r9 + ldmts sp++,r0-lr + sub sp, -4 /* ignore r12_orig */ + rete + +#ifdef CONFIG_PREEMPT +2: + get_thread_info r0 + ld.w r2, r0[TI_preempt_count] + cp.w r2, 0 + brne 1b + ld.w r1, r0[TI_flags] + bld r1, TIF_NEED_RESCHED + brcc 1b + lddsp r4, sp[REG_SR] + bld r4, SYSREG_GM_OFFSET + brcs 1b + rcall preempt_schedule_irq + rjmp 1b +#endif + .endm + + .section .irq.text,"ax",@progbits + + .global irq_level0 + .global irq_level1 + .global irq_level2 + .global irq_level3 + IRQ_LEVEL 0 + IRQ_LEVEL 1 + IRQ_LEVEL 2 + IRQ_LEVEL 3 diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S new file mode 100644 index 000000000000..773b7ad87be9 --- /dev/null +++ b/arch/avr32/kernel/head.S @@ -0,0 +1,45 @@ +/* + * Non-board-specific low-level startup code + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +#include +#include +#include + + .section .init.text,"ax" + .global kernel_entry +kernel_entry: + /* Initialize status register */ + lddpc r0, init_sr + mtsr SYSREG_SR, r0 + + /* Set initial stack pointer */ + lddpc sp, stack_addr + sub sp, -THREAD_SIZE + +#ifdef CONFIG_FRAME_POINTER + /* Mark last stack frame */ + mov lr, 0 + mov r7, 0 +#endif + + /* Set up the PIO, SDRAM controller, early printk, etc. */ + rcall board_early_init + + /* Start the show */ + lddpc pc, kernel_start_addr + + .align 2 +init_sr: + .long 0x007f0000 /* Supervisor mode, everything masked */ +stack_addr: + .long init_thread_union +kernel_start_addr: + .long start_kernel diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c new file mode 100644 index 000000000000..effcacf9d1a2 --- /dev/null +++ b/arch/avr32/kernel/init_task.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial thread structure. Must be aligned on an 8192-byte boundary. + */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c new file mode 100644 index 000000000000..856f3548e664 --- /dev/null +++ b/arch/avr32/kernel/irq.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on arch/i386/kernel/irq.c + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * IRQ's are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include +#include +#include +#include +#include +#include + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ %u\n", irq); +} + +#ifdef CONFIG_PROC_FS +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *)v, cpu; + struct irqaction *action; + unsigned long flags; + + if (i == 0) { + seq_puts(p, " "); + for_each_online_cpu(cpu) + seq_printf(p, "CPU%d ", cpu); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) + goto unlock; + + seq_printf(p, "%3d: ", i); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, " %s", action->name); + for (action = action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); + unlock: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } + + return 0; +} +#endif diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c new file mode 100644 index 000000000000..6caf9e8d8080 --- /dev/null +++ b/arch/avr32/kernel/kprobes.c @@ -0,0 +1,270 @@ +/* + * Kernel Probes (KProbes) + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * Based on arch/ppc64/kernel/kprobes.c + * Copyright (C) IBM Corporation, 2002, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +static unsigned long kprobe_status; +static struct pt_regs jprobe_saved_regs; + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + int ret = 0; + + if ((unsigned long)p->addr & 0x01) { + printk("Attempt to register kprobe at an unaligned address\n"); + ret = -EINVAL; + } + + /* XXX: Might be a good idea to check if p->addr is a valid + * kernel address as well... */ + + if (!ret) { + pr_debug("copy kprobe at %p\n", p->addr); + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; + } + + return ret; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + pr_debug("arming kprobe at %p\n", p->addr); + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + pr_debug("disarming kprobe at %p\n", p->addr); + *p->addr = p->opcode; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long dc; + + pr_debug("preparing to singlestep over %p (PC=%08lx)\n", + p->addr, regs->pc); + + BUG_ON(!(sysreg_read(SR) & SYSREG_BIT(SR_D))); + + dc = __mfdr(DBGREG_DC); + dc |= DC_SS; + __mtdr(DBGREG_DC, dc); + + /* + * We must run the instruction from its original location + * since it may actually reference PC. + * + * TODO: Do the instruction replacement directly in icache. + */ + *p->addr = p->opcode; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long dc; + + pr_debug("resuming execution at PC=%08lx\n", regs->pc); + + dc = __mfdr(DBGREG_DC); + dc &= ~DC_SS; + __mtdr(DBGREG_DC, dc); + + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __get_cpu_var(current_kprobe) = p; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + void *addr = (void *)regs->pc; + int ret = 0; + + pr_debug("kprobe_handler: kprobe_running=%d\n", + kprobe_running()); + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + + /* Check that we're not recursing */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + printk("FIXME: kprobe hit while single-stepping!\n"); + goto no_kprobe; + } + + printk("FIXME: kprobe hit while handling another kprobe\n"); + goto no_kprobe; + } else { + p = kprobe_running(); + if (p->break_handler && p->break_handler(p, regs)) + goto ss_probe; + } + /* If it's not ours, can't be delete race, (we hold lock). */ + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) + goto no_kprobe; + + kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p); + if (p->pre_handler && p->pre_handler(p, regs)) + /* handler has already set things up, so skip ss setup */ + return 1; + +ss_probe: + prepare_singlestep(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + return ret; +} + +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + pr_debug("post_kprobe_handler, cur=%p\n", cur); + + if (!cur) + return 0; + + if (cur->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs); + reset_current_kprobe(); + preempt_enable_no_resched(); + + return 1; +} + +static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + + pr_debug("kprobe_fault_handler: trapnr=%d\n", trapnr); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + pr_debug("kprobe_exceptions_notify: val=%lu, data=%p\n", + val, data); + + switch (val) { + case DIE_BREAKPOINT: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_FAULT: + if (kprobe_running() + && kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + break; + default: + break; + } + + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + + memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* + * TODO: We should probably save some of the stack here as + * well, since gcc may pass arguments on the stack for certain + * functions (lots of arguments, large aggregates, varargs) + */ + + /* setup return addr to the jprobe handler routine */ + regs->pc = (unsigned long)jp->entry; + return 1; +} + +void __kprobes jprobe_return(void) +{ + asm volatile("breakpoint" ::: "memory"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + /* + * FIXME - we should ideally be validating that we got here 'cos + * of the "trap" in jprobe_return() above, before restoring the + * saved regs... + */ + memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); + return 1; +} + +int __init arch_init_kprobes(void) +{ + printk("KPROBES: Enabling monitor mode (MM|DBE)...\n"); + __mtdr(DBGREG_DC, DC_MM | DC_DBE); + + /* TODO: Register kretprobe trampoline */ + return 0; +} diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c new file mode 100644 index 000000000000..dfc32f2817b6 --- /dev/null +++ b/arch/avr32/kernel/module.c @@ -0,0 +1,324 @@ +/* + * AVR32-specific kernel module loader + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * GOT initialization parts are based on the s390 version + * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH, + * IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc(size); +} + +void module_free(struct module *mod, void *module_region) +{ + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; + + vfree(module_region); + /* FIXME: if module_region == mod->init_region, trim exception + * table entries. */ +} + +static inline int check_rela(Elf32_Rela *rela, struct module *module, + char *strings, Elf32_Sym *symbols) +{ + struct mod_arch_syminfo *info; + + info = module->arch.syminfo + ELF32_R_SYM(rela->r_info); + switch (ELF32_R_TYPE(rela->r_info)) { + case R_AVR32_GOT32: + case R_AVR32_GOT16: + case R_AVR32_GOT8: + case R_AVR32_GOT21S: + case R_AVR32_GOT18SW: /* mcall */ + case R_AVR32_GOT16S: /* ld.w */ + if (rela->r_addend != 0) { + printk(KERN_ERR + "GOT relocation against %s at offset %u with addend\n", + strings + symbols[ELF32_R_SYM(rela->r_info)].st_name, + rela->r_offset); + return -ENOEXEC; + } + if (info->got_offset == -1UL) { + info->got_offset = module->arch.got_size; + module->arch.got_size += sizeof(void *); + } + pr_debug("GOT[%3lu] %s\n", info->got_offset, + strings + symbols[ELF32_R_SYM(rela->r_info)].st_name); + break; + } + + return 0; +} + +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *module) +{ + Elf32_Shdr *symtab; + Elf32_Sym *symbols; + Elf32_Rela *rela; + char *strings; + int nrela, i, j; + int ret; + + /* Find the symbol table */ + symtab = NULL; + for (i = 0; i < hdr->e_shnum; i++) + switch (sechdrs[i].sh_type) { + case SHT_SYMTAB: + symtab = &sechdrs[i]; + break; + } + if (!symtab) { + printk(KERN_ERR "module %s: no symbol table\n", module->name); + return -ENOEXEC; + } + + /* Allocate room for one syminfo structure per symbol. */ + module->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym); + module->arch.syminfo = vmalloc(module->arch.nsyms + * sizeof(struct mod_arch_syminfo)); + if (!module->arch.syminfo) + return -ENOMEM; + + symbols = (void *)hdr + symtab->sh_offset; + strings = (void *)hdr + sechdrs[symtab->sh_link].sh_offset; + for (i = 0; i < module->arch.nsyms; i++) { + if (symbols[i].st_shndx == SHN_UNDEF && + strcmp(strings + symbols[i].st_name, + "_GLOBAL_OFFSET_TABLE_") == 0) + /* "Define" it as absolute. */ + symbols[i].st_shndx = SHN_ABS; + module->arch.syminfo[i].got_offset = -1UL; + module->arch.syminfo[i].got_initialized = 0; + } + + /* Allocate GOT entries for symbols that need it. */ + module->arch.got_size = 0; + for (i = 0; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_RELA) + continue; + nrela = sechdrs[i].sh_size / sizeof(Elf32_Rela); + rela = (void *)hdr + sechdrs[i].sh_offset; + for (j = 0; j < nrela; j++) { + ret = check_rela(rela + j, module, + strings, symbols); + if (ret) + goto out_free_syminfo; + } + } + + /* + * Increase core size to make room for GOT and set start + * offset for GOT. + */ + module->core_size = ALIGN(module->core_size, 4); + module->arch.got_offset = module->core_size; + module->core_size += module->arch.got_size; + + return 0; + +out_free_syminfo: + vfree(module->arch.syminfo); + module->arch.syminfo = NULL; + + return ret; +} + +static inline int reloc_overflow(struct module *module, const char *reloc_name, + Elf32_Addr relocation) +{ + printk(KERN_ERR "module %s: Value %lx does not fit relocation %s\n", + module->name, (unsigned long)relocation, reloc_name); + return -ENOEXEC; +} + +#define get_u16(loc) (*((uint16_t *)loc)) +#define put_u16(loc, val) (*((uint16_t *)loc) = (val)) + +int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relindex, + struct module *module) +{ + Elf32_Shdr *symsec = sechdrs + symindex; + Elf32_Shdr *relsec = sechdrs + relindex; + Elf32_Shdr *dstsec = sechdrs + relsec->sh_info; + Elf32_Rela *rel = (void *)relsec->sh_addr; + unsigned int i; + int ret = 0; + + for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) { + struct mod_arch_syminfo *info; + Elf32_Sym *sym; + Elf32_Addr relocation; + uint32_t *location; + uint32_t value; + + location = (void *)dstsec->sh_addr + rel->r_offset; + sym = (Elf32_Sym *)symsec->sh_addr + ELF32_R_SYM(rel->r_info); + relocation = sym->st_value + rel->r_addend; + + info = module->arch.syminfo + ELF32_R_SYM(rel->r_info); + + /* Initialize GOT entry if necessary */ + switch (ELF32_R_TYPE(rel->r_info)) { + case R_AVR32_GOT32: + case R_AVR32_GOT16: + case R_AVR32_GOT8: + case R_AVR32_GOT21S: + case R_AVR32_GOT18SW: + case R_AVR32_GOT16S: + if (!info->got_initialized) { + Elf32_Addr *gotent; + + gotent = (module->module_core + + module->arch.got_offset + + info->got_offset); + *gotent = relocation; + info->got_initialized = 1; + } + + relocation = info->got_offset; + break; + } + + switch (ELF32_R_TYPE(rel->r_info)) { + case R_AVR32_32: + case R_AVR32_32_CPENT: + *location = relocation; + break; + case R_AVR32_22H_PCREL: + relocation -= (Elf32_Addr)location; + if ((relocation & 0xffe00001) != 0 + && (relocation & 0xffc00001) != 0xffc00000) + return reloc_overflow(module, + "R_AVR32_22H_PCREL", + relocation); + relocation >>= 1; + + value = *location; + value = ((value & 0xe1ef0000) + | (relocation & 0xffff) + | ((relocation & 0x10000) << 4) + | ((relocation & 0x1e0000) << 8)); + *location = value; + break; + case R_AVR32_11H_PCREL: + relocation -= (Elf32_Addr)location; + if ((relocation & 0xfffffc01) != 0 + && (relocation & 0xfffff801) != 0xfffff800) + return reloc_overflow(module, + "R_AVR32_11H_PCREL", + relocation); + value = get_u16(location); + value = ((value & 0xf00c) + | ((relocation & 0x1fe) << 3) + | ((relocation & 0x600) >> 9)); + put_u16(location, value); + break; + case R_AVR32_9H_PCREL: + relocation -= (Elf32_Addr)location; + if ((relocation & 0xffffff01) != 0 + && (relocation & 0xfffffe01) != 0xfffffe00) + return reloc_overflow(module, + "R_AVR32_9H_PCREL", + relocation); + value = get_u16(location); + value = ((value & 0xf00f) + | ((relocation & 0x1fe) << 3)); + put_u16(location, value); + break; + case R_AVR32_9UW_PCREL: + relocation -= ((Elf32_Addr)location) & 0xfffffffc; + if ((relocation & 0xfffffc03) != 0) + return reloc_overflow(module, + "R_AVR32_9UW_PCREL", + relocation); + value = get_u16(location); + value = ((value & 0xf80f) + | ((relocation & 0x1fc) << 2)); + put_u16(location, value); + break; + case R_AVR32_GOTPC: + /* + * R6 = PC - (PC - GOT) + * + * At this point, relocation contains the + * value of PC. Just subtract the value of + * GOT, and we're done. + */ + pr_debug("GOTPC: PC=0x%lx, got_offset=0x%lx, core=0x%p\n", + relocation, module->arch.got_offset, + module->module_core); + relocation -= ((unsigned long)module->module_core + + module->arch.got_offset); + *location = relocation; + break; + case R_AVR32_GOT18SW: + if ((relocation & 0xfffe0003) != 0 + && (relocation & 0xfffc0003) != 0xffff0000) + return reloc_overflow(module, "R_AVR32_GOT18SW", + relocation); + relocation >>= 2; + /* fall through */ + case R_AVR32_GOT16S: + if ((relocation & 0xffff8000) != 0 + && (relocation & 0xffff0000) != 0xffff0000) + return reloc_overflow(module, "R_AVR32_GOT16S", + relocation); + pr_debug("GOT reloc @ 0x%lx -> %lu\n", + rel->r_offset, relocation); + value = *location; + value = ((value & 0xffff0000) + | (relocation & 0xffff)); + *location = value; + break; + + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", + module->name, ELF32_R_TYPE(rel->r_info)); + return -ENOEXEC; + } + } + + return ret; +} + +int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relindex, + struct module *module) +{ + printk(KERN_ERR "module %s: REL relocations are not supported\n", + module->name); + return -ENOEXEC; +} + +int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *module) +{ + vfree(module->arch.syminfo); + module->arch.syminfo = NULL; + + return 0; +} + +void module_arch_cleanup(struct module *module) +{ + +} diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c new file mode 100644 index 000000000000..317dc50945f2 --- /dev/null +++ b/arch/avr32/kernel/process.c @@ -0,0 +1,276 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +void (*pm_power_off)(void) = NULL; +EXPORT_SYMBOL(pm_power_off); + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +void cpu_idle(void) +{ + /* endless idle loop with no priority at all */ + while (1) { + /* TODO: Enter sleep mode */ + while (!need_resched()) + cpu_relax(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } +} + +void machine_halt(void) +{ +} + +void machine_power_off(void) +{ +} + +void machine_restart(char *cmd) +{ + __mtdr(DBGREG_DC, DC_DBE); + __mtdr(DBGREG_DC, DC_RES); + while (1) ; +} + +/* + * PC is actually discarded when returning from a system call -- the + * return address must be stored in LR. This function will make sure + * LR points to do_exit before starting the thread. + * + * Also, when returning from fork(), r12 is 0, so we must copy the + * argument as well. + * + * r0 : The argument to the main thread function + * r1 : The address of do_exit + * r2 : The address of the main thread function + */ +asmlinkage extern void kernel_thread_helper(void); +__asm__(" .type kernel_thread_helper, @function\n" + "kernel_thread_helper:\n" + " mov r12, r0\n" + " mov lr, r2\n" + " mov pc, r1\n" + " .size kernel_thread_helper, . - kernel_thread_helper"); + +int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.r0 = (unsigned long)arg; + regs.r1 = (unsigned long)fn; + regs.r2 = (unsigned long)do_exit; + regs.lr = (unsigned long)kernel_thread_helper; + regs.pc = (unsigned long)kernel_thread_helper; + regs.sr = MODE_SUPERVISOR; + + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, + 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +/* + * Free current thread data structures etc + */ +void exit_thread(void) +{ + /* nothing to do */ +} + +void flush_thread(void) +{ + /* nothing to do */ +} + +void release_thread(struct task_struct *dead_task) +{ + /* do nothing */ +} + +static const char *cpu_modes[] = { + "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1", + "Interrupt level 2", "Interrupt level 3", "Exception", "NMI" +}; + +void show_regs(struct pt_regs *regs) +{ + unsigned long sp = regs->sp; + unsigned long lr = regs->lr; + unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT; + + if (!user_mode(regs)) + sp = (unsigned long)regs + FRAME_SIZE_FULL; + + print_symbol("PC is at %s\n", instruction_pointer(regs)); + print_symbol("LR is at %s\n", lr); + printk("pc : [<%08lx>] lr : [<%08lx>] %s\n" + "sp : %08lx r12: %08lx r11: %08lx\n", + instruction_pointer(regs), + lr, print_tainted(), sp, regs->r12, regs->r11); + printk("r10: %08lx r9 : %08lx r8 : %08lx\n", + regs->r10, regs->r9, regs->r8); + printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n", + regs->r7, regs->r6, regs->r5, regs->r4); + printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n", + regs->r3, regs->r2, regs->r1, regs->r0); + printk("Flags: %c%c%c%c%c\n", + regs->sr & SR_Q ? 'Q' : 'q', + regs->sr & SR_V ? 'V' : 'v', + regs->sr & SR_N ? 'N' : 'n', + regs->sr & SR_Z ? 'Z' : 'z', + regs->sr & SR_C ? 'C' : 'c'); + printk("Mode bits: %c%c%c%c%c%c%c%c%c\n", + regs->sr & SR_H ? 'H' : 'h', + regs->sr & SR_R ? 'R' : 'r', + regs->sr & SR_J ? 'J' : 'j', + regs->sr & SR_EM ? 'E' : 'e', + regs->sr & SR_I3M ? '3' : '.', + regs->sr & SR_I2M ? '2' : '.', + regs->sr & SR_I1M ? '1' : '.', + regs->sr & SR_I0M ? '0' : '.', + regs->sr & SR_GM ? 'G' : 'g'); + printk("CPU Mode: %s\n", cpu_modes[mode]); + + show_trace(NULL, (unsigned long *)sp, regs); +} +EXPORT_SYMBOL(show_regs); + +/* Fill in the fpu structure for a core dump. This is easy -- we don't have any */ +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) +{ + /* Not valid */ + return 0; +} + +asmlinkage void ret_from_fork(void); + +int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) +{ + struct pt_regs *childregs; + + childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1; + *childregs = *regs; + + if (user_mode(regs)) + childregs->sp = usp; + else + childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE; + + childregs->r12 = 0; /* Set return value for child */ + + p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM; + p->thread.cpu_context.ksp = (unsigned long)childregs; + p->thread.cpu_context.pc = (unsigned long)ret_from_fork; + + return 0; +} + +/* r12-r8 are dummy parameters to force the compiler to use the stack */ +asmlinkage int sys_fork(struct pt_regs *regs) +{ + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); +} + +asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, + unsigned long parent_tidptr, + unsigned long child_tidptr, struct pt_regs *regs) +{ + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, + (int __user *)parent_tidptr, + (int __user *)child_tidptr); +} + +asmlinkage int sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, + 0, NULL, NULL); +} + +asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv, + char __user *__user *uenvp, struct pt_regs *regs) +{ + int error; + char *filename; + + filename = getname(ufilename); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + + error = do_execve(filename, uargv, uenvp, regs); + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + +out: + return error; +} + + +/* + * This function is supposed to answer the question "who called + * schedule()?" + */ +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long pc; + unsigned long stack_page; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + stack_page = (unsigned long)p->thread_info; + BUG_ON(!stack_page); + + /* + * The stored value of PC is either the address right after + * the call to __switch_to() or ret_from_fork. + */ + pc = thread_saved_pc(p); + if (in_sched_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER + unsigned long fp = p->thread.cpu_context.r7; + BUG_ON(fp < stack_page || fp > (THREAD_SIZE + stack_page)); + pc = *(unsigned long *)fp; +#else + /* + * We depend on the frame size of schedule here, which + * is actually quite ugly. It might be possible to + * determine the frame size automatically at build + * time by doing this: + * - compile sched.c + * - disassemble the resulting sched.o + * - look for 'sub sp,??' shortly after ':' + */ + unsigned long sp = p->thread.cpu_context.ksp + 16; + BUG_ON(sp < stack_page || sp > (THREAD_SIZE + stack_page)); + pc = *(unsigned long *)sp; +#endif + } + + return pc; +} diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c new file mode 100644 index 000000000000..3c89e59029ab --- /dev/null +++ b/arch/avr32/kernel/ptrace.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static struct pt_regs *get_user_regs(struct task_struct *tsk) +{ + return (struct pt_regs *)((unsigned long) tsk->thread_info + + THREAD_SIZE - sizeof(struct pt_regs)); +} + +static void ptrace_single_step(struct task_struct *tsk) +{ + pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n", + tsk->pid, tsk->thread.cpu_context.sr); + if (!(tsk->thread.cpu_context.sr & SR_D)) { + /* + * Set a breakpoint at the current pc to force the + * process into debug mode. The syscall/exception + * exit code will set a breakpoint at the return + * address when this flag is set. + */ + pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n"); + set_tsk_thread_flag(tsk, TIF_BREAKPOINT); + } + + /* The monitor code will do the actual step for us */ + set_tsk_thread_flag(tsk, TIF_SINGLE_STEP); +} + +/* + * Called by kernel/ptrace.c when detaching + * + * Make sure any single step bits, etc. are not set + */ +void ptrace_disable(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLE_STEP); +} + +/* + * Handle hitting a breakpoint + */ +static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) +{ + siginfo_t info; + + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *)instruction_pointer(regs); + + pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n", + tsk->pid, info.si_addr); + force_sig_info(SIGTRAP, &info, tsk); +} + +/* + * Read the word at offset "offset" into the task's "struct user". We + * actually access the pt_regs struct stored on the kernel stack. + */ +static int ptrace_read_user(struct task_struct *tsk, unsigned long offset, + unsigned long __user *data) +{ + unsigned long *regs; + unsigned long value; + + pr_debug("ptrace_read_user(%p, %#lx, %p)\n", + tsk, offset, data); + + if (offset & 3 || offset >= sizeof(struct user)) { + printk("ptrace_read_user: invalid offset 0x%08lx\n", offset); + return -EIO; + } + + regs = (unsigned long *)get_user_regs(tsk); + + value = 0; + if (offset < sizeof(struct pt_regs)) + value = regs[offset / sizeof(regs[0])]; + + return put_user(value, data); +} + +/* + * Write the word "value" to offset "offset" into the task's "struct + * user". We actually access the pt_regs struct stored on the kernel + * stack. + */ +static int ptrace_write_user(struct task_struct *tsk, unsigned long offset, + unsigned long value) +{ + unsigned long *regs; + + if (offset & 3 || offset >= sizeof(struct user)) { + printk("ptrace_write_user: invalid offset 0x%08lx\n", offset); + return -EIO; + } + + if (offset >= sizeof(struct pt_regs)) + return 0; + + regs = (unsigned long *)get_user_regs(tsk); + regs[offset / sizeof(regs[0])] = value; + + return 0; +} + +static int ptrace_getregs(struct task_struct *tsk, void __user *uregs) +{ + struct pt_regs *regs = get_user_regs(tsk); + + return copy_to_user(uregs, regs, sizeof(*regs)) ? -EFAULT : 0; +} + +static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs) +{ + struct pt_regs newregs; + int ret; + + ret = -EFAULT; + if (copy_from_user(&newregs, uregs, sizeof(newregs)) == 0) { + struct pt_regs *regs = get_user_regs(tsk); + + ret = -EINVAL; + if (valid_user_regs(&newregs)) { + *regs = newregs; + ret = 0; + } + } + + return ret; +} + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) +{ + unsigned long tmp; + int ret; + + pr_debug("arch_ptrace(%ld, %ld, %#lx, %#lx)\n", + request, child->pid, addr, data); + + pr_debug("ptrace: Enabling monitor mode...\n"); + __mtdr(DBGREG_DC, __mfdr(DBGREG_DC) | DC_MM | DC_DBE); + + switch (request) { + /* Read the word at location addr in the child process */ + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + ret = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + if (ret == sizeof(tmp)) + ret = put_user(tmp, (unsigned long __user *)data); + else + ret = -EIO; + break; + + case PTRACE_PEEKUSR: + ret = ptrace_read_user(child, addr, + (unsigned long __user *)data); + break; + + /* Write the word in data at location addr */ + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + ret = access_process_vm(child, addr, &data, sizeof(data), 1); + if (ret == sizeof(data)) + ret = 0; + else + ret = -EIO; + break; + + case PTRACE_POKEUSR: + ret = ptrace_write_user(child, addr, data); + break; + + /* continue and stop at next (return from) syscall */ + case PTRACE_SYSCALL: + /* restart after signal */ + case PTRACE_CONT: + ret = -EIO; + if (!valid_signal(data)) + break; + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + /* XXX: Are we sure no breakpoints are active here? */ + wake_up_process(child); + ret = 0; + break; + + /* + * Make the child exit. Best I can do is send it a + * SIGKILL. Perhaps it should be put in the status that it + * wants to exit. + */ + case PTRACE_KILL: + ret = 0; + if (child->exit_state == EXIT_ZOMBIE) + break; + child->exit_code = SIGKILL; + wake_up_process(child); + break; + + /* + * execute single instruction. + */ + case PTRACE_SINGLESTEP: + ret = -EIO; + if (!valid_signal(data)) + break; + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + ptrace_single_step(child); + child->exit_code = data; + wake_up_process(child); + ret = 0; + break; + + /* Detach a process that was attached */ + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + case PTRACE_GETREGS: + ret = ptrace_getregs(child, (void __user *)data); + break; + + case PTRACE_SETREGS: + ret = ptrace_setregs(child, (const void __user *)data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n", ret, __mfdr(DBGREG_DC)); + return ret; +} + +asmlinkage void syscall_trace(void) +{ + pr_debug("syscall_trace called\n"); + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + + pr_debug("syscall_trace: notifying parent\n"); + /* The 0x80 provides a way for the tracing parent to + * distinguish between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it + * will do for normal use. strace only continues with a + * signal if the stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + pr_debug("syscall_trace: sending signal %d to PID %u\n", + current->exit_code, current->pid); + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +asmlinkage void do_debug_priv(struct pt_regs *regs) +{ + unsigned long dc, ds; + unsigned long die_val; + + ds = __mfdr(DBGREG_DS); + + pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds); + + if (ds & DS_SSS) + die_val = DIE_SSTEP; + else + die_val = DIE_BREAKPOINT; + + if (notify_die(die_val, regs, 0, SIGTRAP) == NOTIFY_STOP) + return; + + if (likely(ds & DS_SSS)) { + extern void itlb_miss(void); + extern void tlb_miss_common(void); + struct thread_info *ti; + + dc = __mfdr(DBGREG_DC); + dc &= ~DC_SS; + __mtdr(DBGREG_DC, dc); + + ti = current_thread_info(); + ti->flags |= _TIF_BREAKPOINT; + + /* The TLB miss handlers don't check thread flags */ + if ((regs->pc >= (unsigned long)&itlb_miss) + && (regs->pc <= (unsigned long)&tlb_miss_common)) { + __mtdr(DBGREG_BWA2A, sysreg_read(RAR_EX)); + __mtdr(DBGREG_BWC2A, 0x40000001 | (get_asid() << 1)); + } + + /* + * If we're running in supervisor mode, the breakpoint + * will take us where we want directly, no need to + * single step. + */ + if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR) + ti->flags |= TIF_SINGLE_STEP; + } else { + panic("Unable to handle debug trap at pc = %08lx\n", + regs->pc); + } +} + +/* + * Handle breakpoints, single steps and other debuggy things. To keep + * things simple initially, we run with interrupts and exceptions + * disabled all the time. + */ +asmlinkage void do_debug(struct pt_regs *regs) +{ + unsigned long dc, ds; + + ds = __mfdr(DBGREG_DS); + pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds); + + if (test_thread_flag(TIF_BREAKPOINT)) { + pr_debug("TIF_BREAKPOINT set\n"); + /* We're taking care of it */ + clear_thread_flag(TIF_BREAKPOINT); + __mtdr(DBGREG_BWC2A, 0); + } + + if (test_thread_flag(TIF_SINGLE_STEP)) { + pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds); + if (ds & DS_SSS) { + dc = __mfdr(DBGREG_DC); + dc &= ~DC_SS; + __mtdr(DBGREG_DC, dc); + + clear_thread_flag(TIF_SINGLE_STEP); + ptrace_break(current, regs); + } + } else { + /* regular breakpoint */ + ptrace_break(current, regs); + } +} diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c new file mode 100644 index 000000000000..1e2705a05016 --- /dev/null +++ b/arch/avr32/kernel/semaphore.c @@ -0,0 +1,148 @@ +/* + * AVR32 sempahore implementation. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/i386/kernel/semaphore.c + * Copyright (C) 1999 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to acquire the semaphore, while the "sleeping" + * variable is a count of such acquires. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + * + * "sleeping" and the contention routine ordering is protected + * by the spinlock in the semaphore's waitqueue head. + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in + * where we want to avoid any extra jumps and calls. + */ + +/* + * Logic: + * - only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - when we go from a non-negative count to a negative do we + * (a) synchronize with the "sleeper" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void __up(struct semaphore *sem) +{ + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__up); + +void __sched __down(struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * the wait_queue_head. + */ + if (atomic_add_return(sleepers - 1, &sem->count) >= 0) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_UNINTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + tsk->state = TASK_RUNNING; +} +EXPORT_SYMBOL(__down); + +int __sched __down_interruptible(struct semaphore *sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into the trylock + * failure case - we won't be sleeping, and we can't + * get the lock as it has contention. Just correct the + * count and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * the wait_queue_head. + */ + if (atomic_add_return(sleepers - 1, &sem->count) >= 0) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_INTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + + tsk->state = TASK_RUNNING; + return retval; +} +EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c new file mode 100644 index 000000000000..5d68f3c6990b --- /dev/null +++ b/arch/avr32/kernel/setup.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +extern int root_mountflags; + +/* + * Bootloader-provided information about physical memory + */ +struct tag_mem_range *mem_phys; +struct tag_mem_range *mem_reserved; +struct tag_mem_range *mem_ramdisk; + +/* + * Initialize loops_per_jiffy as 5000000 (500MIPS). + * Better make it too large than too small... + */ +struct avr32_cpuinfo boot_cpu_data = { + .loops_per_jiffy = 5000000 +}; +EXPORT_SYMBOL(boot_cpu_data); + +static char command_line[COMMAND_LINE_SIZE]; + +/* + * Should be more than enough, but if you have a _really_ complex + * setup, you might need to increase the size of this... + */ +static struct tag_mem_range __initdata mem_range_cache[32]; +static unsigned mem_range_next_free; + +/* + * Standard memory resources + */ +static struct resource mem_res[] = { + { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM + }, + { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM, + }, +}; + +#define kernel_code mem_res[0] +#define kernel_data mem_res[1] + +/* + * Early framebuffer allocation. Works as follows: + * - If fbmem_size is zero, nothing will be allocated or reserved. + * - If fbmem_start is zero when setup_bootmem() is called, + * fbmem_size bytes will be allocated from the bootmem allocator. + * - If fbmem_start is nonzero, an area of size fbmem_size will be + * reserved at the physical address fbmem_start if necessary. If + * the area isn't in a memory region known to the kernel, it will + * be left alone. + * + * Board-specific code may use these variables to set up platform data + * for the framebuffer driver if fbmem_size is nonzero. + */ +static unsigned long __initdata fbmem_start; +static unsigned long __initdata fbmem_size; + +/* + * "fbmem=xxx[kKmM]" allocates the specified amount of boot memory for + * use as framebuffer. + * + * "fbmem=xxx[kKmM]@yyy[kKmM]" defines a memory region of size xxx and + * starting at yyy to be reserved for use as framebuffer. + * + * The kernel won't verify that the memory region starting at yyy + * actually contains usable RAM. + */ +static int __init early_parse_fbmem(char *p) +{ + fbmem_size = memparse(p, &p); + if (*p == '@') + fbmem_start = memparse(p, &p); + return 0; +} +early_param("fbmem", early_parse_fbmem); + +static inline void __init resource_init(void) +{ + struct tag_mem_range *region; + + kernel_code.start = __pa(init_mm.start_code); + kernel_code.end = __pa(init_mm.end_code - 1); + kernel_data.start = __pa(init_mm.end_code); + kernel_data.end = __pa(init_mm.brk - 1); + + for (region = mem_phys; region; region = region->next) { + struct resource *res; + unsigned long phys_start, phys_end; + + if (region->size == 0) + continue; + + phys_start = region->addr; + phys_end = phys_start + region->size - 1; + + res = alloc_bootmem_low(sizeof(*res)); + res->name = "System RAM"; + res->start = phys_start; + res->end = phys_end; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + request_resource (&iomem_resource, res); + + if (kernel_code.start >= res->start && + kernel_code.end <= res->end) + request_resource (res, &kernel_code); + if (kernel_data.start >= res->start && + kernel_data.end <= res->end) + request_resource (res, &kernel_data); + } +} + +static int __init parse_tag_core(struct tag *tag) +{ + if (tag->hdr.size > 2) { + if ((tag->u.core.flags & 1) == 0) + root_mountflags &= ~MS_RDONLY; + ROOT_DEV = new_decode_dev(tag->u.core.rootdev); + } + return 0; +} +__tagtable(ATAG_CORE, parse_tag_core); + +static int __init parse_tag_mem_range(struct tag *tag, + struct tag_mem_range **root) +{ + struct tag_mem_range *cur, **pprev; + struct tag_mem_range *new; + + /* + * Ignore zero-sized entries. If we're running standalone, the + * SDRAM code may emit such entries if something goes + * wrong... + */ + if (tag->u.mem_range.size == 0) + return 0; + + /* + * Copy the data so the bootmem init code doesn't need to care + * about it. + */ + if (mem_range_next_free >= + (sizeof(mem_range_cache) / sizeof(mem_range_cache[0]))) + panic("Physical memory map too complex!\n"); + + new = &mem_range_cache[mem_range_next_free++]; + *new = tag->u.mem_range; + + pprev = root; + cur = *root; + while (cur) { + pprev = &cur->next; + cur = cur->next; + } + + *pprev = new; + new->next = NULL; + + return 0; +} + +static int __init parse_tag_mem(struct tag *tag) +{ + return parse_tag_mem_range(tag, &mem_phys); +} +__tagtable(ATAG_MEM, parse_tag_mem); + +static int __init parse_tag_cmdline(struct tag *tag) +{ + strlcpy(saved_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); + return 0; +} +__tagtable(ATAG_CMDLINE, parse_tag_cmdline); + +static int __init parse_tag_rdimg(struct tag *tag) +{ + return parse_tag_mem_range(tag, &mem_ramdisk); +} +__tagtable(ATAG_RDIMG, parse_tag_rdimg); + +static int __init parse_tag_clock(struct tag *tag) +{ + /* + * We'll figure out the clocks by peeking at the system + * manager regs directly. + */ + return 0; +} +__tagtable(ATAG_CLOCK, parse_tag_clock); + +static int __init parse_tag_rsvd_mem(struct tag *tag) +{ + return parse_tag_mem_range(tag, &mem_reserved); +} +__tagtable(ATAG_RSVD_MEM, parse_tag_rsvd_mem); + +static int __init parse_tag_ethernet(struct tag *tag) +{ +#if 0 + const struct platform_device *pdev; + + /* + * We really need a bus type that supports "classes"...this + * will do for now (until we must handle other kinds of + * ethernet controllers) + */ + pdev = platform_get_device("macb", tag->u.ethernet.mac_index); + if (pdev && pdev->dev.platform_data) { + struct eth_platform_data *data = pdev->dev.platform_data; + + data->valid = 1; + data->mii_phy_addr = tag->u.ethernet.mii_phy_addr; + memcpy(data->hw_addr, tag->u.ethernet.hw_address, + sizeof(data->hw_addr)); + } +#endif + return 0; +} +__tagtable(ATAG_ETHERNET, parse_tag_ethernet); + +/* + * Scan the tag table for this tag, and call its parse function. The + * tag table is built by the linker from all the __tagtable + * declarations. + */ +static int __init parse_tag(struct tag *tag) +{ + extern struct tagtable __tagtable_begin, __tagtable_end; + struct tagtable *t; + + for (t = &__tagtable_begin; t < &__tagtable_end; t++) + if (tag->hdr.tag == t->tag) { + t->parse(tag); + break; + } + + return t < &__tagtable_end; +} + +/* + * Parse all tags in the list we got from the boot loader + */ +static void __init parse_tags(struct tag *t) +{ + for (; t->hdr.tag != ATAG_NONE; t = tag_next(t)) + if (!parse_tag(t)) + printk(KERN_WARNING + "Ignoring unrecognised tag 0x%08x\n", + t->hdr.tag); +} + +void __init setup_arch (char **cmdline_p) +{ + struct clk *cpu_clk; + + parse_tags(bootloader_tags); + + setup_processor(); + setup_platform(); + + cpu_clk = clk_get(NULL, "cpu"); + if (IS_ERR(cpu_clk)) { + printk(KERN_WARNING "Warning: Unable to get CPU clock\n"); + } else { + unsigned long cpu_hz = clk_get_rate(cpu_clk); + + /* + * Well, duh, but it's probably a good idea to + * increment the use count. + */ + clk_enable(cpu_clk); + + boot_cpu_data.clk = cpu_clk; + boot_cpu_data.loops_per_jiffy = cpu_hz * 4; + printk("CPU: Running at %lu.%03lu MHz\n", + ((cpu_hz + 500) / 1000) / 1000, + ((cpu_hz + 500) / 1000) % 1000); + } + + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + parse_early_param(); + + setup_bootmem(); + + board_setup_fbmem(fbmem_start, fbmem_size); + +#ifdef CONFIG_VT + conswitchp = &dummy_con; +#endif + + paging_init(); + + resource_init(); +} diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c new file mode 100644 index 000000000000..33096651c24f --- /dev/null +++ b/arch/avr32/kernel/signal.c @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/sh/kernel/signal.c + * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->sp); +} + +struct rt_sigframe +{ + struct siginfo info; + struct ucontext uc; + unsigned long retcode; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +{ + int err = 0; + +#define COPY(x) err |= __get_user(regs->x, &sc->x) + COPY(sr); + COPY(pc); + COPY(lr); + COPY(sp); + COPY(r12); + COPY(r11); + COPY(r10); + COPY(r9); + COPY(r8); + COPY(r7); + COPY(r6); + COPY(r5); + COPY(r4); + COPY(r3); + COPY(r2); + COPY(r1); + COPY(r0); +#undef COPY + + /* + * Don't allow anyone to pretend they're running in supervisor + * mode or something... + */ + err |= !valid_user_regs(regs); + + return err; +} + + +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + sigset_t set; + + frame = (struct rt_sigframe __user *)regs->sp; + pr_debug("SIG return: frame = %p\n", frame); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + pr_debug("Context restored: pc = %08lx, lr = %08lx, sp = %08lx\n", + regs->pc, regs->lr, regs->sp); + + return regs->r12; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +static int +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) +{ + int err = 0; + +#define COPY(x) err |= __put_user(regs->x, &sc->x) + COPY(sr); + COPY(pc); + COPY(lr); + COPY(sp); + COPY(r12); + COPY(r11); + COPY(r10); + COPY(r9); + COPY(r8); + COPY(r7); + COPY(r6); + COPY(r5); + COPY(r4); + COPY(r3); + COPY(r2); + COPY(r1); + COPY(r0); +#undef COPY + + return err; +} + +static inline void __user * +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize) +{ + unsigned long sp = regs->sp; + + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + return (void __user *)((sp - framesize) & ~3); +} + +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + err = -EFAULT; + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) + goto out; + + /* + * Set up the return code: + * + * mov r8, __NR_rt_sigreturn + * scall + * + * Note: This will blow up since we're using a non-executable + * stack. Better use SA_RESTORER. + */ +#if __NR_rt_sigreturn > 127 +# error __NR_rt_sigreturn must be < 127 to fit in a short mov +#endif + err = __put_user(0x3008d733 | (__NR_rt_sigreturn << 20), + &frame->retcode); + + err |= copy_siginfo_to_user(&frame->info, info); + + /* Set up the ucontext */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __put_user((void __user *)current->sas_ss_sp, + &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, + &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (err) + goto out; + + regs->r12 = sig; + regs->r11 = (unsigned long) &frame->info; + regs->r10 = (unsigned long) &frame->uc; + regs->sp = (unsigned long) frame; + if (ka->sa.sa_flags & SA_RESTORER) + regs->lr = (unsigned long)ka->sa.sa_restorer; + else { + printk(KERN_NOTICE "[%s:%d] did not set SA_RESTORER\n", + current->comm, current->pid); + regs->lr = (unsigned long) &frame->retcode; + } + + pr_debug("SIG deliver [%s:%d]: sig=%d sp=0x%lx pc=0x%lx->0x%p lr=0x%lx\n", + current->comm, current->pid, sig, regs->sp, + regs->pc, ka->sa.sa_handler, regs->lr); + + regs->pc = (unsigned long) ka->sa.sa_handler; + +out: + return err; +} + +static inline void restart_syscall(struct pt_regs *regs) +{ + if (regs->r12 == -ERESTART_RESTARTBLOCK) + regs->r8 = __NR_restart_syscall; + else + regs->r12 = regs->r12_orig; + regs->pc -= 2; +} + +static inline void +handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *oldset, struct pt_regs *regs, int syscall) +{ + int ret; + + /* + * Set up the stack frame + */ + ret = setup_rt_frame(sig, ka, info, oldset, regs); + + /* + * Check that the resulting registers are sane + */ + ret |= !valid_user_regs(regs); + + /* + * Block the signal if we were unsuccessful. + */ + if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka->sa.sa_mask); + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + + if (ret == 0) + return; + + force_sigsegv(sig, current); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it + * doesn't want to handle. Thus you cannot kill init even with a + * SIGKILL even by mistake. + */ +int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + + /* + * We want the common case to go fast, which is why we may in + * certain cases get here from kernel mode. Just return + * without doing anything if so. + */ + if (!user_mode(regs)) + return 0; + + if (try_to_freeze()) { + signr = 0; + if (!signal_pending(current)) + goto no_signal; + } + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else if (!oldset) + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); +no_signal: + if (syscall) { + switch (regs->r12) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + if (signr > 0) { + regs->r12 = -EINTR; + break; + } + /* fall through */ + case -ERESTARTSYS: + if (signr > 0 && !(ka.sa.sa_flags & SA_RESTART)) { + regs->r12 = -EINTR; + break; + } + /* fall through */ + case -ERESTARTNOINTR: + restart_syscall(regs); + } + } + + if (signr == 0) { + /* No signal to deliver -- put the saved sigmask back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + return 0; + } + + handle_signal(signr, &ka, &info, oldset, regs, syscall); + return 1; +} + +asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti) +{ + int syscall = 0; + + if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) + syscall = 1; + + if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + do_signal(regs, ¤t->blocked, syscall); +} diff --git a/arch/avr32/kernel/switch_to.S b/arch/avr32/kernel/switch_to.S new file mode 100644 index 000000000000..a48d046723c5 --- /dev/null +++ b/arch/avr32/kernel/switch_to.S @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + + .text + .global __switch_to + .type __switch_to, @function + + /* Switch thread context from "prev" to "next", returning "last" + * r12 : prev + * r11 : &prev->thread + 1 + * r10 : &next->thread + */ +__switch_to: + stm --r11, r0,r1,r2,r3,r4,r5,r6,r7,sp,lr + mfsr r9, SYSREG_SR + st.w --r11, r9 + ld.w r8, r10++ + /* + * schedule() may have been called from a mode with a different + * set of registers. Make sure we don't lose anything here. + */ + pushm r10,r12 + mtsr SYSREG_SR, r8 + frs /* flush the return stack */ + sub pc, -2 /* flush the pipeline */ + popm r10,r12 + ldm r10++, r0,r1,r2,r3,r4,r5,r6,r7,sp,pc + .size __switch_to, . - __switch_to diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c new file mode 100644 index 000000000000..6ec5693da448 --- /dev/null +++ b/arch/avr32/kernel/sys_avr32.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage int sys_pipe(unsigned long __user *filedes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(filedes, fd, sizeof(fd))) + error = -EFAULT; + } + return error; +} + +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset) +{ + int error = -EBADF; + struct file *file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return error; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, offset); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); + return error; +} diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S new file mode 100644 index 000000000000..7589a9b426cb --- /dev/null +++ b/arch/avr32/kernel/syscall-stubs.S @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Stubs for syscalls that require access to pt_regs or that take more + * than five parameters. + */ + +#define ARG6 r3 + + .text + .global __sys_rt_sigsuspend + .type __sys_rt_sigsuspend,@function +__sys_rt_sigsuspend: + mov r10, sp + rjmp sys_rt_sigsuspend + + .global __sys_sigaltstack + .type __sys_sigaltstack,@function +__sys_sigaltstack: + mov r10, sp + rjmp sys_sigaltstack + + .global __sys_rt_sigreturn + .type __sys_rt_sigreturn,@function +__sys_rt_sigreturn: + mov r12, sp + rjmp sys_rt_sigreturn + + .global __sys_fork + .type __sys_fork,@function +__sys_fork: + mov r12, sp + rjmp sys_fork + + .global __sys_clone + .type __sys_clone,@function +__sys_clone: + mov r8, sp + rjmp sys_clone + + .global __sys_vfork + .type __sys_vfork,@function +__sys_vfork: + mov r12, sp + rjmp sys_vfork + + .global __sys_execve + .type __sys_execve,@function +__sys_execve: + mov r9, sp + rjmp sys_execve + + .global __sys_mmap2 + .type __sys_mmap2,@function +__sys_mmap2: + pushm lr + st.w --sp, ARG6 + rcall sys_mmap2 + sub sp, -4 + popm pc + + .global __sys_sendto + .type __sys_sendto,@function +__sys_sendto: + pushm lr + st.w --sp, ARG6 + rcall sys_sendto + sub sp, -4 + popm pc + + .global __sys_recvfrom + .type __sys_recvfrom,@function +__sys_recvfrom: + pushm lr + st.w --sp, ARG6 + rcall sys_recvfrom + sub sp, -4 + popm pc + + .global __sys_pselect6 + .type __sys_pselect6,@function +__sys_pselect6: + pushm lr + st.w --sp, ARG6 + rcall sys_pselect6 + sub sp, -4 + popm pc + + .global __sys_splice + .type __sys_splice,@function +__sys_splice: + pushm lr + st.w --sp, ARG6 + rcall sys_splice + sub sp, -4 + popm pc diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S new file mode 100644 index 000000000000..63b206965d05 --- /dev/null +++ b/arch/avr32/kernel/syscall_table.S @@ -0,0 +1,289 @@ +/* + * AVR32 system call table + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#if !defined(CONFIG_NFSD) && !defined(CONFIG_NFSD_MODULE) +#define sys_nfsservctl sys_ni_syscall +#endif + +#if !defined(CONFIG_SYSV_IPC) +# define sys_ipc sys_ni_syscall +#endif + + .section .rodata,"a",@progbits + .type sys_call_table,@object + .global sys_call_table + .align 2 +sys_call_table: + .long sys_restart_syscall + .long sys_exit + .long __sys_fork + .long sys_read + .long sys_write + .long sys_open /* 5 */ + .long sys_close + .long sys_umask + .long sys_creat + .long sys_link + .long sys_unlink /* 10 */ + .long __sys_execve + .long sys_chdir + .long sys_time + .long sys_mknod + .long sys_chmod /* 15 */ + .long sys_chown + .long sys_lchown + .long sys_lseek + .long sys_llseek + .long sys_getpid /* 20 */ + .long sys_mount + .long sys_umount + .long sys_setuid + .long sys_getuid + .long sys_stime /* 25 */ + .long sys_ptrace + .long sys_alarm + .long sys_pause + .long sys_utime + .long sys_newstat /* 30 */ + .long sys_newfstat + .long sys_newlstat + .long sys_access + .long sys_chroot + .long sys_sync /* 35 */ + .long sys_fsync + .long sys_kill + .long sys_rename + .long sys_mkdir + .long sys_rmdir /* 40 */ + .long sys_dup + .long sys_pipe + .long sys_times + .long __sys_clone + .long sys_brk /* 45 */ + .long sys_setgid + .long sys_getgid + .long sys_getcwd + .long sys_geteuid + .long sys_getegid /* 50 */ + .long sys_acct + .long sys_setfsuid + .long sys_setfsgid + .long sys_ioctl + .long sys_fcntl /* 55 */ + .long sys_setpgid + .long sys_mremap + .long sys_setresuid + .long sys_getresuid + .long sys_setreuid /* 60 */ + .long sys_setregid + .long sys_ustat + .long sys_dup2 + .long sys_getppid + .long sys_getpgrp /* 65 */ + .long sys_setsid + .long sys_rt_sigaction + .long __sys_rt_sigreturn + .long sys_rt_sigprocmask + .long sys_rt_sigpending /* 70 */ + .long sys_rt_sigtimedwait + .long sys_rt_sigqueueinfo + .long __sys_rt_sigsuspend + .long sys_sethostname + .long sys_setrlimit /* 75 */ + .long sys_getrlimit + .long sys_getrusage + .long sys_gettimeofday + .long sys_settimeofday + .long sys_getgroups /* 80 */ + .long sys_setgroups + .long sys_select + .long sys_symlink + .long sys_fchdir + .long sys_readlink /* 85 */ + .long sys_pread64 + .long sys_pwrite64 + .long sys_swapon + .long sys_reboot + .long __sys_mmap2 /* 90 */ + .long sys_munmap + .long sys_truncate + .long sys_ftruncate + .long sys_fchmod + .long sys_fchown /* 95 */ + .long sys_getpriority + .long sys_setpriority + .long sys_wait4 + .long sys_statfs + .long sys_fstatfs /* 100 */ + .long sys_vhangup + .long __sys_sigaltstack + .long sys_syslog + .long sys_setitimer + .long sys_getitimer /* 105 */ + .long sys_swapoff + .long sys_sysinfo + .long sys_ipc + .long sys_sendfile + .long sys_setdomainname /* 110 */ + .long sys_newuname + .long sys_adjtimex + .long sys_mprotect + .long __sys_vfork + .long sys_init_module /* 115 */ + .long sys_delete_module + .long sys_quotactl + .long sys_getpgid + .long sys_bdflush + .long sys_sysfs /* 120 */ + .long sys_personality + .long sys_ni_syscall /* reserved for afs_syscall */ + .long sys_getdents + .long sys_flock + .long sys_msync /* 125 */ + .long sys_readv + .long sys_writev + .long sys_getsid + .long sys_fdatasync + .long sys_sysctl /* 130 */ + .long sys_mlock + .long sys_munlock + .long sys_mlockall + .long sys_munlockall + .long sys_sched_setparam /* 135 */ + .long sys_sched_getparam + .long sys_sched_setscheduler + .long sys_sched_getscheduler + .long sys_sched_yield + .long sys_sched_get_priority_max /* 140 */ + .long sys_sched_get_priority_min + .long sys_sched_rr_get_interval + .long sys_nanosleep + .long sys_poll + .long sys_nfsservctl /* 145 */ + .long sys_setresgid + .long sys_getresgid + .long sys_prctl + .long sys_socket + .long sys_bind /* 150 */ + .long sys_connect + .long sys_listen + .long sys_accept + .long sys_getsockname + .long sys_getpeername /* 155 */ + .long sys_socketpair + .long sys_send + .long sys_recv + .long __sys_sendto + .long __sys_recvfrom /* 160 */ + .long sys_shutdown + .long sys_setsockopt + .long sys_getsockopt + .long sys_sendmsg + .long sys_recvmsg /* 165 */ + .long sys_truncate64 + .long sys_ftruncate64 + .long sys_stat64 + .long sys_lstat64 + .long sys_fstat64 /* 170 */ + .long sys_pivot_root + .long sys_mincore + .long sys_madvise + .long sys_getdents64 + .long sys_fcntl64 /* 175 */ + .long sys_gettid + .long sys_readahead + .long sys_setxattr + .long sys_lsetxattr + .long sys_fsetxattr /* 180 */ + .long sys_getxattr + .long sys_lgetxattr + .long sys_fgetxattr + .long sys_listxattr + .long sys_llistxattr /* 185 */ + .long sys_flistxattr + .long sys_removexattr + .long sys_lremovexattr + .long sys_fremovexattr + .long sys_tkill /* 190 */ + .long sys_sendfile64 + .long sys_futex + .long sys_sched_setaffinity + .long sys_sched_getaffinity + .long sys_capget /* 195 */ + .long sys_capset + .long sys_io_setup + .long sys_io_destroy + .long sys_io_getevents + .long sys_io_submit /* 200 */ + .long sys_io_cancel + .long sys_fadvise64 + .long sys_exit_group + .long sys_lookup_dcookie + .long sys_epoll_create /* 205 */ + .long sys_epoll_ctl + .long sys_epoll_wait + .long sys_remap_file_pages + .long sys_set_tid_address + .long sys_timer_create /* 210 */ + .long sys_timer_settime + .long sys_timer_gettime + .long sys_timer_getoverrun + .long sys_timer_delete + .long sys_clock_settime /* 215 */ + .long sys_clock_gettime + .long sys_clock_getres + .long sys_clock_nanosleep + .long sys_statfs64 + .long sys_fstatfs64 /* 220 */ + .long sys_tgkill + .long sys_ni_syscall /* reserved for TUX */ + .long sys_utimes + .long sys_fadvise64_64 + .long sys_cacheflush /* 225 */ + .long sys_ni_syscall /* sys_vserver */ + .long sys_mq_open + .long sys_mq_unlink + .long sys_mq_timedsend + .long sys_mq_timedreceive /* 230 */ + .long sys_mq_notify + .long sys_mq_getsetattr + .long sys_kexec_load + .long sys_waitid + .long sys_add_key /* 235 */ + .long sys_request_key + .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get + .long sys_inotify_init /* 240 */ + .long sys_inotify_add_watch + .long sys_inotify_rm_watch + .long sys_openat + .long sys_mkdirat + .long sys_mknodat /* 245 */ + .long sys_fchownat + .long sys_futimesat + .long sys_fstatat64 + .long sys_unlinkat + .long sys_renameat /* 250 */ + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat + .long sys_fchmodat + .long sys_faccessat /* 255 */ + .long __sys_pselect6 + .long sys_ppoll + .long sys_unshare + .long sys_set_robust_list + .long sys_get_robust_list /* 260 */ + .long __sys_splice + .long sys_sync_file_range + .long sys_tee + .long sys_vmsplice + .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c new file mode 100644 index 000000000000..b0e6b5855a38 --- /dev/null +++ b/arch/avr32/kernel/time.c @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on MIPS implementation arch/mips/kernel/time.c + * Copyright 2001 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static cycle_t read_cycle_count(void) +{ + return (cycle_t)sysreg_read(COUNT); +} + +static struct clocksource clocksource_avr32 = { + .name = "avr32", + .rating = 350, + .read = read_cycle_count, + .mask = CLOCKSOURCE_MASK(32), + .shift = 16, + .is_continuous = 1, +}; + +/* + * By default we provide the null RTC ops + */ +static unsigned long null_rtc_get_time(void) +{ + return mktime(2004, 1, 1, 0, 0, 0); +} + +static int null_rtc_set_time(unsigned long sec) +{ + return 0; +} + +static unsigned long (*rtc_get_time)(void) = null_rtc_get_time; +static int (*rtc_set_time)(unsigned long) = null_rtc_set_time; + +/* how many counter cycles in a jiffy? */ +static unsigned long cycles_per_jiffy; + +/* cycle counter value at the previous timer interrupt */ +static unsigned int timerhi, timerlo; + +/* the count value for the next timer interrupt */ +static unsigned int expirelo; + +static void avr32_timer_ack(void) +{ + unsigned int count; + + /* Ack this timer interrupt and set the next one */ + expirelo += cycles_per_jiffy; + if (expirelo == 0) { + printk(KERN_DEBUG "expirelo == 0\n"); + sysreg_write(COMPARE, expirelo + 1); + } else { + sysreg_write(COMPARE, expirelo); + } + + /* Check to see if we have missed any timer interrupts */ + count = sysreg_read(COUNT); + if ((count - expirelo) < 0x7fffffff) { + expirelo = count + cycles_per_jiffy; + sysreg_write(COMPARE, expirelo); + } +} + +static unsigned int avr32_hpt_read(void) +{ + return sysreg_read(COUNT); +} + +/* + * Taken from MIPS c0_hpt_timer_init(). + * + * Why is it so complicated, and what is "count"? My assumption is + * that `count' specifies the "reference cycle", i.e. the cycle since + * reset that should mean "zero". The reason COUNT is written twice is + * probably to make sure we don't get any timer interrupts while we + * are messing with the counter. + */ +static void avr32_hpt_init(unsigned int count) +{ + count = sysreg_read(COUNT) - count; + expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy; + sysreg_write(COUNT, expirelo - cycles_per_jiffy); + sysreg_write(COMPARE, expirelo); + sysreg_write(COUNT, count); +} + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + /* There must be better ways...? */ + return (unsigned long long)jiffies * (1000000000 / HZ); +} + +/* + * local_timer_interrupt() does profiling and process accounting on a + * per-CPU basis. + * + * In UP mode, it is invoked from the (global) timer_interrupt. + */ +static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + if (current->pid) + profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(regs)); +} + +static irqreturn_t +timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned int count; + + /* ack timer interrupt and try to set next interrupt */ + count = avr32_hpt_read(); + avr32_timer_ack(); + + /* Update timerhi/timerlo for intra-jiffy calibration */ + timerhi += count < timerlo; /* Wrap around */ + timerlo = count; + + /* + * Call the generic timer interrupt handler + */ + write_seqlock(&xtime_lock); + do_timer(regs); + write_sequnlock(&xtime_lock); + + /* + * In UP mode, we call local_timer_interrupt() to do profiling + * and process accounting. + * + * SMP is not supported yet. + */ + local_timer_interrupt(irq, dev_id, regs); + + return IRQ_HANDLED; +} + +static struct irqaction timer_irqaction = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED, + .name = "timer", +}; + +void __init time_init(void) +{ + unsigned long mult, shift, count_hz; + int ret; + + xtime.tv_sec = rtc_get_time(); + xtime.tv_nsec = 0; + + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + + printk("Before time_init: count=%08lx, compare=%08lx\n", + (unsigned long)sysreg_read(COUNT), + (unsigned long)sysreg_read(COMPARE)); + + count_hz = clk_get_rate(boot_cpu_data.clk); + shift = clocksource_avr32.shift; + mult = clocksource_hz2mult(count_hz, shift); + clocksource_avr32.mult = mult; + + printk("Cycle counter: mult=%lu, shift=%lu\n", mult, shift); + + { + u64 tmp; + + tmp = TICK_NSEC; + tmp <<= shift; + tmp += mult / 2; + do_div(tmp, mult); + + cycles_per_jiffy = tmp; + } + + /* This sets up the high precision timer for the first interrupt. */ + avr32_hpt_init(avr32_hpt_read()); + + printk("After time_init: count=%08lx, compare=%08lx\n", + (unsigned long)sysreg_read(COUNT), + (unsigned long)sysreg_read(COMPARE)); + + ret = clocksource_register(&clocksource_avr32); + if (ret) + printk(KERN_ERR + "timer: could not register clocksource: %d\n", ret); + + ret = setup_irq(0, &timer_irqaction); + if (ret) + printk("timer: could not request IRQ 0: %d\n", ret); +} + +static struct sysdev_class timer_class = { + set_kset_name("timer"), +}; + +static struct sys_device timer_device = { + .id = 0, + .cls = &timer_class, +}; + +static int __init init_timer_sysfs(void) +{ + int err = sysdev_class_register(&timer_class); + if (!err) + err = sysdev_register(&timer_device); + return err; +} + +device_initcall(init_timer_sysfs); diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c new file mode 100644 index 000000000000..7e803f4d7a12 --- /dev/null +++ b/arch/avr32/kernel/traps.c @@ -0,0 +1,425 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static void dump_mem(const char *str, unsigned long bottom, unsigned long top) +{ + unsigned long p; + int i; + + printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top); + + for (p = bottom & ~31; p < top; ) { + printk("%04lx: ", p & 0xffff); + + for (i = 0; i < 8; i++, p += 4) { + unsigned int val; + + if (p < bottom || p >= top) + printk(" "); + else { + if (__get_user(val, (unsigned int __user *)p)) { + printk("\n"); + goto out; + } + printk("%08x ", val); + } + } + printk("\n"); + } + +out: + return; +} + +#ifdef CONFIG_FRAME_POINTER +static inline void __show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + unsigned long __user *fp; + unsigned long __user *last_fp = NULL; + + if (regs) { + fp = (unsigned long __user *)regs->r7; + } else if (tsk == current) { + register unsigned long __user *real_fp __asm__("r7"); + fp = real_fp; + } else { + fp = (unsigned long __user *)tsk->thread.cpu_context.r7; + } + + /* + * Walk the stack until (a) we get an exception, (b) the frame + * pointer becomes zero, or (c) the frame pointer gets stuck + * at the same value. + */ + while (fp && fp != last_fp) { + unsigned long lr, new_fp = 0; + + last_fp = fp; + if (__get_user(lr, fp)) + break; + if (fp && __get_user(new_fp, fp + 1)) + break; + fp = (unsigned long __user *)new_fp; + + printk(" [<%08lx>] ", lr); + print_symbol("%s\n", lr); + } + printk("\n"); +} +#else +static inline void __show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + unsigned long addr; + + while (!kstack_end(sp)) { + addr = *sp++; + if (kernel_text_address(addr)) { + printk(" [<%08lx>] ", addr); + print_symbol("%s\n", addr); + } + } +} +#endif + +void show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + if (regs && + (((regs->sr & MODE_MASK) == MODE_EXCEPTION) || + ((regs->sr & MODE_MASK) == MODE_USER))) + return; + + printk ("Call trace:"); +#ifdef CONFIG_KALLSYMS + printk("\n"); +#endif + + __show_trace(tsk, sp, regs); + printk("\n"); +} + +void show_stack(struct task_struct *tsk, unsigned long *sp) +{ + unsigned long stack; + + if (!tsk) + tsk = current; + if (sp == 0) { + if (tsk == current) { + register unsigned long *real_sp __asm__("sp"); + sp = real_sp; + } else { + sp = (unsigned long *)tsk->thread.cpu_context.ksp; + } + } + + stack = (unsigned long)sp; + dump_mem("Stack: ", stack, + THREAD_SIZE + (unsigned long)tsk->thread_info); + show_trace(tsk, sp, NULL); +} + +void dump_stack(void) +{ + show_stack(NULL, NULL); +} +EXPORT_SYMBOL(dump_stack); + +ATOMIC_NOTIFIER_HEAD(avr32_die_chain); + +int register_die_notifier(struct notifier_block *nb) +{ + pr_debug("register_die_notifier: %p\n", nb); + + return atomic_notifier_chain_register(&avr32_die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&avr32_die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); + +static DEFINE_SPINLOCK(die_lock); + +void __die(const char *str, struct pt_regs *regs, unsigned long err, + const char *file, const char *func, unsigned long line) +{ + struct task_struct *tsk = current; + static int die_counter; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + + printk(KERN_ALERT "%s", str); + if (file && func) + printk(" in %s:%s, line %ld", file, func, line); + printk("[#%d]:\n", ++die_counter); + print_modules(); + show_regs(regs); + printk("Process %s (pid: %d, stack limit = 0x%p)\n", + tsk->comm, tsk->pid, tsk->thread_info + 1); + + if (!user_mode(regs) || in_interrupt()) { + dump_mem("Stack: ", regs->sp, + THREAD_SIZE + (unsigned long)tsk->thread_info); + } + + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +void __die_if_kernel(const char *str, struct pt_regs *regs, unsigned long err, + const char *file, const char *func, unsigned long line) +{ + if (!user_mode(regs)) + __die(str, regs, err, file, func, line); +} + +asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs) +{ +#ifdef CONFIG_SUBARCH_AVR32B + /* + * The exception entry always saves RSR_EX. For NMI, this is + * wrong; it should be RSR_NMI + */ + regs->sr = sysreg_read(RSR_NMI); +#endif + + printk("NMI taken!!!!\n"); + die("NMI", regs, ecr); + BUG(); +} + +asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs) +{ + printk("Unable to handle critical exception %lu at pc = %08lx!\n", + ecr, regs->pc); + die("Oops", regs, ecr); + BUG(); +} + +asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs) +{ + siginfo_t info; + + die_if_kernel("Oops: Address exception in kernel mode", regs, ecr); + +#ifdef DEBUG + if (ecr == ECR_ADDR_ALIGN_X) + pr_debug("Instruction Address Exception at pc = %08lx\n", + regs->pc); + else if (ecr == ECR_ADDR_ALIGN_R) + pr_debug("Data Address Exception (Read) at pc = %08lx\n", + regs->pc); + else if (ecr == ECR_ADDR_ALIGN_W) + pr_debug("Data Address Exception (Write) at pc = %08lx\n", + regs->pc); + else + BUG(); + + show_regs(regs); +#endif + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)regs->pc; + + force_sig_info(SIGBUS, &info, current); +} + +/* This way of handling undefined instructions is stolen from ARM */ +static LIST_HEAD(undef_hook); +static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED; + +void register_undef_hook(struct undef_hook *hook) +{ + spin_lock_irq(&undef_lock); + list_add(&hook->node, &undef_hook); + spin_unlock_irq(&undef_lock); +} + +void unregister_undef_hook(struct undef_hook *hook) +{ + spin_lock_irq(&undef_lock); + list_del(&hook->node); + spin_unlock_irq(&undef_lock); +} + +static int do_cop_absent(u32 insn) +{ + int cop_nr; + u32 cpucr; + if ( (insn & 0xfdf00000) == 0xf1900000 ) + /* LDC0 */ + cop_nr = 0; + else + cop_nr = (insn >> 13) & 0x7; + + /* Try enabling the coprocessor */ + cpucr = sysreg_read(CPUCR); + cpucr |= (1 << (24 + cop_nr)); + sysreg_write(CPUCR, cpucr); + + cpucr = sysreg_read(CPUCR); + if ( !(cpucr & (1 << (24 + cop_nr))) ){ + printk("Coprocessor #%i not found!\n", cop_nr); + return -1; + } + + return 0; +} + +#ifdef CONFIG_BUG +#ifdef CONFIG_DEBUG_BUGVERBOSE +static inline void do_bug_verbose(struct pt_regs *regs, u32 insn) +{ + char *file; + u16 line; + char c; + + if (__get_user(line, (u16 __user *)(regs->pc + 2))) + return; + if (__get_user(file, (char * __user *)(regs->pc + 4)) + || (unsigned long)file < PAGE_OFFSET + || __get_user(c, file)) + file = ""; + + printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); +} +#else +static inline void do_bug_verbose(struct pt_regs *regs, u32 insn) +{ + +} +#endif +#endif + +asmlinkage void do_illegal_opcode(unsigned long ecr, struct pt_regs *regs) +{ + u32 insn; + struct undef_hook *hook; + siginfo_t info; + void __user *pc; + + if (!user_mode(regs)) + goto kernel_trap; + + local_irq_enable(); + + pc = (void __user *)instruction_pointer(regs); + if (__get_user(insn, (u32 __user *)pc)) + goto invalid_area; + + if (ecr == ECR_COPROC_ABSENT) { + if (do_cop_absent(insn) == 0) + return; + } + + spin_lock_irq(&undef_lock); + list_for_each_entry(hook, &undef_hook, node) { + if ((insn & hook->insn_mask) == hook->insn_val) { + if (hook->fn(regs, insn) == 0) { + spin_unlock_irq(&undef_lock); + return; + } + } + } + spin_unlock_irq(&undef_lock); + +invalid_area: + +#ifdef DEBUG + printk("Illegal instruction at pc = %08lx\n", regs->pc); + if (regs->pc < TASK_SIZE) { + unsigned long ptbr, pgd, pte, *p; + + ptbr = sysreg_read(PTBR); + p = (unsigned long *)ptbr; + pgd = p[regs->pc >> 22]; + p = (unsigned long *)((pgd & 0x1ffff000) | 0x80000000); + pte = p[(regs->pc >> 12) & 0x3ff]; + printk("page table: 0x%08lx -> 0x%08lx -> 0x%08lx\n", ptbr, pgd, pte); + } +#endif + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_addr = (void __user *)regs->pc; + switch (ecr) { + case ECR_ILLEGAL_OPCODE: + case ECR_UNIMPL_INSTRUCTION: + info.si_code = ILL_ILLOPC; + break; + case ECR_PRIVILEGE_VIOLATION: + info.si_code = ILL_PRVOPC; + break; + case ECR_COPROC_ABSENT: + info.si_code = ILL_COPROC; + break; + default: + BUG(); + } + + force_sig_info(SIGILL, &info, current); + return; + +kernel_trap: +#ifdef CONFIG_BUG + if (__kernel_text_address(instruction_pointer(regs))) { + insn = *(u16 *)instruction_pointer(regs); + if (insn == AVR32_BUG_OPCODE) { + do_bug_verbose(regs, insn); + die("Kernel BUG", regs, 0); + return; + } + } +#endif + + die("Oops: Illegal instruction in kernel code", regs, ecr); +} + +asmlinkage void do_fpe(unsigned long ecr, struct pt_regs *regs) +{ + siginfo_t info; + + printk("Floating-point exception at pc = %08lx\n", regs->pc); + + /* We have no FPU... */ + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_addr = (void __user *)regs->pc; + info.si_code = ILL_COPROC; + + force_sig_info(SIGILL, &info, current); +} + + +void __init trap_init(void) +{ + +} diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c new file mode 100644 index 000000000000..cdd627c6b7dc --- /dev/null +++ b/arch/avr32/kernel/vmlinux.lds.c @@ -0,0 +1,139 @@ +/* + * AVR32 linker script for the Linux kernel + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define LOAD_OFFSET 0x00000000 +#include + +OUTPUT_FORMAT("elf32-avr32", "elf32-avr32", "elf32-avr32") +OUTPUT_ARCH(avr32) +ENTRY(_start) + +/* Big endian */ +jiffies = jiffies_64 + 4; + +SECTIONS +{ + . = CONFIG_ENTRY_ADDRESS; + .init : AT(ADDR(.init) - LOAD_OFFSET) { + _stext = .; + __init_begin = .; + _sinittext = .; + *(.text.reset) + *(.init.text) + _einittext = .; + . = ALIGN(4); + __tagtable_begin = .; + *(.taglist) + __tagtable_end = .; + *(.init.data) + . = ALIGN(16); + __setup_start = .; + *(.init.setup) + __setup_end = .; + . = ALIGN(4); + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + __security_initcall_start = .; + *(.security_initcall.init) + __security_initcall_end = .; + . = ALIGN(32); + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + . = ALIGN(4096); + __init_end = .; + } + + . = ALIGN(8192); + .text : AT(ADDR(.text) - LOAD_OFFSET) { + _evba = .; + _text = .; + *(.ex.text) + . = 0x50; + *(.tlbx.ex.text) + . = 0x60; + *(.tlbr.ex.text) + . = 0x70; + *(.tlbw.ex.text) + . = 0x100; + *(.scall.text) + *(.irq.text) + *(.text) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) + _etext = .; + } = 0xd703d703 + + . = ALIGN(4); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + RODATA + + . = ALIGN(8192); + + .data : AT(ADDR(.data) - LOAD_OFFSET) { + _data = .; + _sdata = .; + /* + * First, the init task union, aligned to an 8K boundary. + */ + *(.data.init_task) + + /* Then, the cacheline aligned data */ + . = ALIGN(32); + *(.data.cacheline_aligned) + + /* And the rest... */ + *(.data.rel*) + *(.data) + CONSTRUCTORS + + _edata = .; + } + + + . = ALIGN(8); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __bss_start = .; + *(.bss) + *(COMMON) + . = ALIGN(8); + __bss_stop = .; + _end = .; + } + + /* When something in the kernel is NOT compiled as a module, the module + * cleanup code and data are put into these segments. Both can then be + * thrown away, as cleanup code is never called unless it's a module. + */ + /DISCARD/ : { + *(.exit.text) + *(.exit.data) + *(.exitcall.exit) + } + + DWARF_DEBUG +} diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile new file mode 100644 index 000000000000..09ac43e40522 --- /dev/null +++ b/arch/avr32/lib/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for AVR32-specific library files +# + +lib-y := copy_user.o clear_user.o +lib-y += strncpy_from_user.o strnlen_user.o +lib-y += delay.o memset.o memcpy.o findbit.o +lib-y += csum_partial.o csum_partial_copy_generic.o +lib-y += io-readsw.o io-readsl.o io-writesw.o io-writesl.o +lib-y += __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S new file mode 100644 index 000000000000..368b6bca4c76 --- /dev/null +++ b/arch/avr32/lib/__avr32_asr64.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * DWtype __avr32_asr64(DWtype u, word_type b) + */ + .text + .global __avr32_asr64 + .type __avr32_asr64,@function +__avr32_asr64: + cp.w r12, 0 + reteq r12 + + rsub r9, r12, 32 + brle 1f + + lsl r8, r11, r9 + lsr r10, r10, r12 + asr r11, r11, r12 + or r10, r8 + retal r12 + +1: neg r9 + asr r10, r11, r9 + asr r11, 31 + retal r12 diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S new file mode 100644 index 000000000000..f1dbc2b36257 --- /dev/null +++ b/arch/avr32/lib/__avr32_lsl64.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * DWtype __avr32_lsl64(DWtype u, word_type b) + */ + .text + .global __avr32_lsl64 + .type __avr32_lsl64,@function +__avr32_lsl64: + cp.w r12, 0 + reteq r12 + + rsub r9, r12, 32 + brle 1f + + lsr r8, r10, r9 + lsl r10, r10, r12 + lsl r11, r11, r12 + or r11, r8 + retal r12 + +1: neg r9 + lsl r11, r10, r9 + mov r10, 0 + retal r12 diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S new file mode 100644 index 000000000000..e65bb7f0d24c --- /dev/null +++ b/arch/avr32/lib/__avr32_lsr64.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * DWtype __avr32_lsr64(DWtype u, word_type b) + */ + .text + .global __avr32_lsr64 + .type __avr32_lsr64,@function +__avr32_lsr64: + cp.w r12, 0 + reteq r12 + + rsub r9, r12, 32 + brle 1f + + lsl r8, r11, r9 + lsr r11, r11, r12 + lsr r10, r10, r12 + or r10, r8 + retal r12 + +1: neg r9 + lsr r10, r11, r9 + mov r11, 0 + retal r12 diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S new file mode 100644 index 000000000000..d8991b6f8eb7 --- /dev/null +++ b/arch/avr32/lib/clear_user.S @@ -0,0 +1,76 @@ +/* + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 1 + .global clear_user + .type clear_user, "function" +clear_user: + branch_if_kernel r8, __clear_user + ret_if_privileged r8, r12, r11, r11 + + .global __clear_user + .type __clear_user, "function" +__clear_user: + mov r9, r12 + mov r8, 0 + andl r9, 3, COH + brne 5f + +1: sub r11, 4 + brlt 2f + +10: st.w r12++, r8 + sub r11, 4 + brge 10b + +2: sub r11, -4 + reteq 0 + + /* Unaligned count or address */ + bld r11, 1 + brcc 12f +11: st.h r12++, r8 + sub r11, 2 + reteq 0 +12: st.b r12++, r8 + retal 0 + + /* Unaligned address */ +5: cp.w r11, 4 + brlt 2b + + lsl r9, 2 + add pc, pc, r9 +13: st.b r12++, r8 + sub r11, 1 +14: st.b r12++, r8 + sub r11, 1 +15: st.b r12++, r8 + sub r11, 1 + rjmp 1b + + .size clear_user, . - clear_user + .size __clear_user, . - __clear_user + + .section .fixup, "ax" + .align 1 +18: sub r11, -4 +19: retal r11 + + .section __ex_table, "a" + .align 2 + .long 10b, 18b + .long 11b, 19b + .long 12b, 19b + .long 13b, 19b + .long 14b, 19b + .long 15b, 19b diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S new file mode 100644 index 000000000000..ea59c04b07de --- /dev/null +++ b/arch/avr32/lib/copy_user.S @@ -0,0 +1,119 @@ +/* + * Copy to/from userspace with optional address space checking. + * + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + + /* + * __kernel_size_t + * __copy_user(void *to, const void *from, __kernel_size_t n) + * + * Returns the number of bytes not copied. Might be off by + * max 3 bytes if we get a fault in the main loop. + * + * The address-space checking functions simply fall through to + * the non-checking version. + */ + .text + .align 1 + .global copy_from_user + .type copy_from_user, @function +copy_from_user: + branch_if_kernel r8, __copy_user + ret_if_privileged r8, r11, r10, r10 + rjmp __copy_user + .size copy_from_user, . - copy_from_user + + .global copy_to_user + .type copy_to_user, @function +copy_to_user: + branch_if_kernel r8, __copy_user + ret_if_privileged r8, r12, r10, r10 + .size copy_to_user, . - copy_to_user + + .global __copy_user + .type __copy_user, @function +__copy_user: + mov r9, r11 + andl r9, 3, COH + brne 6f + + /* At this point, from is word-aligned */ +1: sub r10, 4 + brlt 3f + +2: +10: ld.w r8, r11++ +11: st.w r12++, r8 + sub r10, 4 + brge 2b + +3: sub r10, -4 + reteq 0 + + /* + * Handle unaligned count. Need to be careful with r10 here so + * that we return the correct value even if we get a fault + */ +4: +20: ld.ub r8, r11++ +21: st.b r12++, r8 + sub r10, 1 + reteq 0 +22: ld.ub r8, r11++ +23: st.b r12++, r8 + sub r10, 1 + reteq 0 +24: ld.ub r8, r11++ +25: st.b r12++, r8 + retal 0 + + /* Handle unaligned from-pointer */ +6: cp.w r10, 4 + brlt 4b + rsub r9, r9, 4 + +30: ld.ub r8, r11++ +31: st.b r12++, r8 + sub r10, 1 + sub r9, 1 + breq 1b +32: ld.ub r8, r11++ +33: st.b r12++, r8 + sub r10, 1 + sub r9, 1 + breq 1b +34: ld.ub r8, r11++ +35: st.b r12++, r8 + sub r10, 1 + rjmp 1b + .size __copy_user, . - __copy_user + + .section .fixup,"ax" + .align 1 +19: sub r10, -4 +29: retal r10 + + .section __ex_table,"a" + .align 2 + .long 10b, 19b + .long 11b, 19b + .long 20b, 29b + .long 21b, 29b + .long 22b, 29b + .long 23b, 29b + .long 24b, 29b + .long 25b, 29b + .long 30b, 29b + .long 31b, 29b + .long 32b, 29b + .long 33b, 29b + .long 34b, 29b + .long 35b, 29b diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S new file mode 100644 index 000000000000..6a262b528eb7 --- /dev/null +++ b/arch/avr32/lib/csum_partial.S @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * unsigned int csum_partial(const unsigned char *buff, + * int len, unsigned int sum) + */ + .text + .global csum_partial + .type csum_partial,"function" + .align 1 +csum_partial: + /* checksum complete words, aligned or not */ +3: sub r11, 4 + brlt 5f +4: ld.w r9, r12++ + add r10, r9 + acr r10 + sub r11, 4 + brge 4b + + /* return if we had a whole number of words */ +5: sub r11, -4 + reteq r10 + + /* checksum any remaining bytes at the end */ + mov r9, 0 + mov r8, 0 + cp r11, 2 + brlt 6f + ld.uh r9, r12++ + sub r11, 2 + breq 7f + lsl r9, 16 +6: ld.ub r8, r12++ + lsl r8, 8 +7: or r9, r8 + add r10, r9 + acr r10 + + retal r10 + .size csum_partial, . - csum_partial diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S new file mode 100644 index 000000000000..a3a0f9b8929c --- /dev/null +++ b/arch/avr32/lib/csum_partial_copy_generic.S @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + /* + * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len + * int sum, int *src_err_ptr, + * int *dst_err_ptr) + * + * Copy src to dst while checksumming, otherwise like csum_partial. + */ + + .macro ld_src size, reg, ptr +9999: ld.\size \reg, \ptr + .section __ex_table, "a" + .long 9999b, fixup_ld_src + .previous + .endm + + .macro st_dst size, ptr, reg +9999: st.\size \ptr, \reg + .section __ex_table, "a" + .long 9999b, fixup_st_dst + .previous + .endm + + .text + .global csum_partial_copy_generic + .type csum_partial_copy_generic,"function" + .align 1 +csum_partial_copy_generic: + pushm r4-r7,lr + + /* The inner loop */ +1: sub r10, 4 + brlt 5f +2: ld_src w, r5, r12++ + st_dst w, r11++, r5 + add r9, r5 + acr r9 + sub r10, 4 + brge 2b + + /* return if we had a whole number of words */ +5: sub r10, -4 + brne 7f + +6: mov r12, r9 + popm r4-r7,pc + + /* handle additional bytes at the tail */ +7: mov r5, 0 + mov r4, 32 +8: ld_src ub, r6, r12++ + st_dst b, r11++, r6 + lsl r5, 8 + sub r4, 8 + bfins r5, r6, 0, 8 + sub r10, 1 + brne 8b + + lsl r5, r5, r4 + add r9, r5 + acr r9 + rjmp 6b + + /* Exception handler */ + .section .fixup,"ax" + .align 1 +fixup_ld_src: + mov r9, -EFAULT + cp.w r8, 0 + breq 1f + st.w r8[0], r9 + +1: /* + * TODO: zero the complete destination - computing the rest + * is too much work + */ + + mov r9, 0 + rjmp 6b + +fixup_st_dst: + mov r9, -EFAULT + lddsp r8, sp[20] + cp.w r8, 0 + breq 1f + st.w r8[0], r9 +1: mov r9, 0 + rjmp 6b + + .previous diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c new file mode 100644 index 000000000000..462c8307b680 --- /dev/null +++ b/arch/avr32/lib/delay.c @@ -0,0 +1,55 @@ +/* + * Precise Delay Loops for avr32 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include + +int read_current_timer(unsigned long *timer_value) +{ + *timer_value = sysreg_read(COUNT); + return 0; +} + +void __delay(unsigned long loops) +{ + unsigned bclock, now; + + bclock = sysreg_read(COUNT); + do { + now = sysreg_read(COUNT); + } while ((now - bclock) < loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + unsigned long long loops; + + asm("mulu.d %0, %1, %2" + : "=r"(loops) + : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops)); + __delay(loops >> 32); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S new file mode 100644 index 000000000000..2b4856f4bf7c --- /dev/null +++ b/arch/avr32/lib/findbit.S @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + + .text + /* + * unsigned long find_first_zero_bit(const unsigned long *addr, + * unsigned long size) + */ +ENTRY(find_first_zero_bit) + cp.w r11, 0 + reteq r11 + mov r9, r11 +1: ld.w r8, r12[0] + com r8 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + + /* + * unsigned long find_next_zero_bit(const unsigned long *addr, + * unsigned long size, + * unsigned long offset) + */ +ENTRY(find_next_zero_bit) + lsr r8, r10, 5 + sub r9, r11, r10 + retle r11 + + lsl r8, 2 + add r12, r8 + andl r10, 31, COH + breq 1f + + /* offset is not word-aligned. Handle the first (32 - r10) bits */ + ld.w r8, r12[0] + com r8 + sub r12, -4 + lsr r8, r8, r10 + brne .L_found + + /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */ + add r9, r10 + sub r9, 32 + retle r11 + + /* Main loop. offset must be word-aligned */ +1: ld.w r8, r12[0] + com r8 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + + /* Common return path for when a bit is actually found. */ +.L_found: + brev r8 + clz r10, r8 + rsub r9, r11 + add r10, r9 + + /* XXX: If we don't have to return exactly "size" when the bit + is not found, we may drop this "min" thing */ + min r12, r11, r10 + retal r12 + + /* + * unsigned long find_first_bit(const unsigned long *addr, + * unsigned long size) + */ +ENTRY(find_first_bit) + cp.w r11, 0 + reteq r11 + mov r9, r11 +1: ld.w r8, r12[0] + cp.w r8, 0 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + + /* + * unsigned long find_next_bit(const unsigned long *addr, + * unsigned long size, + * unsigned long offset) + */ +ENTRY(find_next_bit) + lsr r8, r10, 5 + sub r9, r11, r10 + retle r11 + + lsl r8, 2 + add r12, r8 + andl r10, 31, COH + breq 1f + + /* offset is not word-aligned. Handle the first (32 - r10) bits */ + ld.w r8, r12[0] + sub r12, -4 + lsr r8, r8, r10 + brne .L_found + + /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */ + add r9, r10 + sub r9, 32 + retle r11 + + /* Main loop. offset must be word-aligned */ +1: ld.w r8, r12[0] + cp.w r8, 0 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + +ENTRY(generic_find_next_zero_le_bit) + lsr r8, r10, 5 + sub r9, r11, r10 + retle r11 + + lsl r8, 2 + add r12, r8 + andl r10, 31, COH + breq 1f + + /* offset is not word-aligned. Handle the first (32 - r10) bits */ + ldswp.w r8, r12[0] + sub r12, -4 + lsr r8, r8, r10 + brne .L_found + + /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */ + add r9, r10 + sub r9, 32 + retle r11 + + /* Main loop. offset must be word-aligned */ +1: ldswp.w r8, r12[0] + cp.w r8, 0 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S new file mode 100644 index 000000000000..b103511ed6c4 --- /dev/null +++ b/arch/avr32/lib/io-readsl.S @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + .global __raw_readsl + .type __raw_readsl,@function +__raw_readsl: + cp.w r10, 0 + reteq r12 + + /* + * If r11 isn't properly aligned, we might get an exception on + * some implementations. But there's not much we can do about it. + */ +1: ld.w r8, r12[0] + sub r10, 1 + st.w r11++, r8 + brne 1b + + retal r12 diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S new file mode 100644 index 000000000000..456be9909027 --- /dev/null +++ b/arch/avr32/lib/io-readsw.S @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +.Lnot_word_aligned: + /* + * Bad alignment will cause a hardware exception, which is as + * good as anything. No need for us to check for proper alignment. + */ + ld.uh r8, r12[0] + sub r10, 1 + st.h r11++, r8 + + /* fall through */ + + .global __raw_readsw + .type __raw_readsw,@function +__raw_readsw: + cp.w r10, 0 + reteq r12 + mov r9, 3 + tst r11, r9 + brne .Lnot_word_aligned + + sub r10, 2 + brlt 2f + +1: ldins.h r8:t, r12[0] + ldins.h r8:b, r12[0] + st.w r11++, r8 + sub r10, 2 + brge 1b + +2: sub r10, -2 + reteq r12 + + ld.uh r8, r12[0] + st.h r11++, r8 + retal r12 diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S new file mode 100644 index 000000000000..22138b3a16e5 --- /dev/null +++ b/arch/avr32/lib/io-writesl.S @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + .global __raw_writesl + .type __raw_writesl,@function +__raw_writesl: + cp.w r10, 0 + reteq r12 + +1: ld.w r8, r11++ + sub r10, 1 + st.w r12[0], r8 + brne 1b + + retal r12 diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S new file mode 100644 index 000000000000..8c4a53f1c52a --- /dev/null +++ b/arch/avr32/lib/io-writesw.S @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +.Lnot_word_aligned: + ld.uh r8, r11++ + sub r10, 1 + st.h r12[0], r8 + + .global __raw_writesw + .type __raw_writesw,@function +__raw_writesw: + cp.w r10, 0 + mov r9, 3 + reteq r12 + tst r11, r9 + brne .Lnot_word_aligned + + sub r10, 2 + brlt 2f + +1: ld.w r8, r11++ + bfextu r9, r8, 16, 16 + st.h r12[0], r9 + st.h r12[0], r8 + sub r10, 2 + brge 1b + +2: sub r10, -2 + reteq r12 + + ld.uh r8, r11++ + st.h r12[0], r8 + retal r12 diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h new file mode 100644 index 000000000000..5a091b5e3618 --- /dev/null +++ b/arch/avr32/lib/libgcc.h @@ -0,0 +1,33 @@ +/* Definitions for various functions 'borrowed' from gcc-3.4.3 */ + +#define BITS_PER_UNIT 8 + +typedef int QItype __attribute__ ((mode (QI))); +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int HItype __attribute__ ((mode (HI))); +typedef unsigned int UHItype __attribute__ ((mode (HI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +typedef float SFtype __attribute__ ((mode (SF))); +typedef float DFtype __attribute__ ((mode (DF))); +typedef int word_type __attribute__ ((mode (__word__))); + +#define W_TYPE_SIZE (4 * BITS_PER_UNIT) +#define Wtype SItype +#define UWtype USItype +#define HWtype SItype +#define UHWtype USItype +#define DWtype DItype +#define UDWtype UDItype +#define __NW(a,b) __ ## a ## si ## b +#define __NDW(a,b) __ ## a ## di ## b + +struct DWstruct {Wtype high, low;}; + +typedef union +{ + struct DWstruct s; + DWtype ll; +} DWunion; diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h new file mode 100644 index 000000000000..cd5e369ac437 --- /dev/null +++ b/arch/avr32/lib/longlong.h @@ -0,0 +1,98 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000 + Free Software Foundation, Inc. + + This definition file is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2, or (at your option) any later version. + + This definition file is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Borrowed from gcc-3.4.3 */ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +#define count_leading_zeros(count, x) ((count) = __builtin_clz(x)) + +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0; \ + UWtype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +#define udiv_qrnnd __udiv_qrnnd_c + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) + +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart (u); \ + __uh = __ll_highpart (u); \ + __vl = __ll_lowpart (v); \ + __vh = __ll_highpart (v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ + } while (0) diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S new file mode 100644 index 000000000000..0abb26142b64 --- /dev/null +++ b/arch/avr32/lib/memcpy.S @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * void *memcpy(void *to, const void *from, unsigned long n) + * + * This implementation does word-aligned loads in the main loop, + * possibly sacrificing alignment of stores. + * + * Hopefully, in most cases, both "to" and "from" will be + * word-aligned to begin with. + */ + .text + .global memcpy + .type memcpy, @function +memcpy: + mov r9, r11 + andl r9, 3, COH + brne 1f + + /* At this point, "from" is word-aligned */ +2: sub r10, 4 + mov r9, r12 + brlt 4f + +3: ld.w r8, r11++ + sub r10, 4 + st.w r12++, r8 + brge 3b + +4: neg r10 + reteq r9 + + /* Handle unaligned count */ + lsl r10, 2 + add pc, pc, r10 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + retal r9 + + /* Handle unaligned "from" pointer */ +1: sub r10, 4 + brlt 4b + add r10, r9 + lsl r9, 2 + add pc, pc, r9 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + rjmp 2b diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S new file mode 100644 index 000000000000..40da32c0480c --- /dev/null +++ b/arch/avr32/lib/memset.S @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/arm/lib/memset.S + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include + + /* + * r12: void *b + * r11: int c + * r10: size_t len + * + * Returns b in r12 + */ + .text + .global memset + .type memset, @function + .align 5 +memset: + mov r9, r12 + mov r8, r12 + or r11, r11, r11 << 8 + andl r9, 3, COH + brne 1f + +2: or r11, r11, r11 << 16 + sub r10, 4 + brlt 5f + + /* Let's do some real work */ +4: st.w r8++, r11 + sub r10, 4 + brge 4b + + /* + * When we get here, we've got less than 4 bytes to set. r10 + * might be negative. + */ +5: sub r10, -4 + reteq r12 + + /* Fastpath ends here, exactly 32 bytes from memset */ + + /* Handle unaligned count or pointer */ + bld r10, 1 + brcc 6f + st.b r8++, r11 + st.b r8++, r11 + bld r10, 0 + retcc r12 +6: st.b r8++, r11 + retal r12 + + /* Handle unaligned pointer */ +1: sub r10, 4 + brlt 5b + add r10, r9 + lsl r9, 1 + add pc, r9 + st.b r8++, r11 + st.b r8++, r11 + st.b r8++, r11 + rjmp 2b + + .size memset, . - memset diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S new file mode 100644 index 000000000000..72bd50599ec6 --- /dev/null +++ b/arch/avr32/lib/strncpy_from_user.S @@ -0,0 +1,60 @@ +/* + * Copy to/from userspace with optional address space checking. + * + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +#include +#include +#include + + /* + * long strncpy_from_user(char *dst, const char *src, long count) + * + * On success, returns the length of the string, not including + * the terminating NUL. + * + * If the string is longer than count, returns count + * + * If userspace access fails, returns -EFAULT + */ + .text + .align 1 + .global strncpy_from_user + .type strncpy_from_user, "function" +strncpy_from_user: + mov r9, -EFAULT + branch_if_kernel r8, __strncpy_from_user + ret_if_privileged r8, r11, r10, r9 + + .global __strncpy_from_user + .type __strncpy_from_user, "function" +__strncpy_from_user: + cp.w r10, 0 + reteq 0 + + mov r9, r10 + +1: ld.ub r8, r11++ + st.b r12++, r8 + cp.w r8, 0 + breq 2f + sub r9, 1 + brne 1b + +2: sub r10, r9 + retal r10 + + .section .fixup, "ax" + .align 1 +3: mov r12, -EFAULT + retal r12 + + .section __ex_table, "a" + .align 2 + .long 1b, 3b diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S new file mode 100644 index 000000000000..65ce11afa66a --- /dev/null +++ b/arch/avr32/lib/strnlen_user.S @@ -0,0 +1,67 @@ +/* + * Copy to/from userspace with optional address space checking. + * + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + + .text + .align 1 + .global strnlen_user + .type strnlen_user, "function" +strnlen_user: + branch_if_kernel r8, __strnlen_user + sub r8, r11, 1 + add r8, r12 + retcs 0 + brmi adjust_length /* do a closer inspection */ + + .global __strnlen_user + .type __strnlen_user, "function" +__strnlen_user: + mov r10, r12 + +10: ld.ub r8, r12++ + cp.w r8, 0 + breq 2f + sub r11, 1 + brne 10b + + sub r12, -1 +2: sub r12, r10 + retal r12 + + + .type adjust_length, "function" +adjust_length: + cp.w r12, 0 /* addr must always be < TASK_SIZE */ + retmi 0 + + pushm lr + lddpc lr, _task_size + sub r11, lr, r12 + mov r9, r11 + rcall __strnlen_user + cp.w r12, r9 + brgt 1f + popm pc +1: popm pc, r12=0 + + .align 2 +_task_size: + .long TASK_SIZE + + .section .fixup, "ax" + .align 1 +19: retal 0 + + .section __ex_table, "a" + .align 2 + .long 10b, 19b diff --git a/arch/avr32/mach-at32ap/Makefile b/arch/avr32/mach-at32ap/Makefile new file mode 100644 index 000000000000..4b10853eb614 --- /dev/null +++ b/arch/avr32/mach-at32ap/Makefile @@ -0,0 +1,2 @@ +obj-y += at32ap.o clock.o pio.o intc.o extint.o +obj-$(CONFIG_CPU_AT32AP7000) += at32ap7000.o diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c new file mode 100644 index 000000000000..f7cedf5aabea --- /dev/null +++ b/arch/avr32/mach-at32ap/at32ap.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include + +#include +#include + +struct at32_sm system_manager; + +static int __init at32_sm_init(void) +{ + struct resource *regs; + struct at32_sm *sm = &system_manager; + int ret = -ENXIO; + + regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0); + if (!regs) + goto fail; + + spin_lock_init(&sm->lock); + sm->pdev = &at32_sm_device; + + ret = -ENOMEM; + sm->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!sm->regs) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret); + return ret; +} + +void __init setup_platform(void) +{ + at32_sm_init(); + at32_clock_init(); + at32_portmux_init(); + + /* FIXME: This doesn't belong here */ + at32_setup_serial_console(1); +} + +static int __init pdc_probe(struct platform_device *pdev) +{ + struct clk *pclk, *hclk; + + pclk = clk_get(&pdev->dev, "pclk"); + if (IS_ERR(pclk)) { + dev_err(&pdev->dev, "no pclk defined\n"); + return PTR_ERR(pclk); + } + hclk = clk_get(&pdev->dev, "hclk"); + if (IS_ERR(hclk)) { + dev_err(&pdev->dev, "no hclk defined\n"); + clk_put(pclk); + return PTR_ERR(hclk); + } + + clk_enable(pclk); + clk_enable(hclk); + + dev_info(&pdev->dev, "Atmel Peripheral DMA Controller enabled\n"); + return 0; +} + +static struct platform_driver pdc_driver = { + .probe = pdc_probe, + .driver = { + .name = "pdc", + }, +}; + +static int __init pdc_init(void) +{ + return platform_driver_register(&pdc_driver); +} +arch_initcall(pdc_init); diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c new file mode 100644 index 000000000000..e8c6893a1c23 --- /dev/null +++ b/arch/avr32/mach-at32ap/at32ap7000.c @@ -0,0 +1,866 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#include + +#include +#include +#include + +#include "clock.h" +#include "pio.h" +#include "sm.h" + +#define PBMEM(base) \ + { \ + .start = base, \ + .end = base + 0x3ff, \ + .flags = IORESOURCE_MEM, \ + } +#define IRQ(num) \ + { \ + .start = num, \ + .end = num, \ + .flags = IORESOURCE_IRQ, \ + } +#define NAMED_IRQ(num, _name) \ + { \ + .start = num, \ + .end = num, \ + .name = _name, \ + .flags = IORESOURCE_IRQ, \ + } + +#define DEFINE_DEV(_name, _id) \ +static struct platform_device _name##_id##_device = { \ + .name = #_name, \ + .id = _id, \ + .resource = _name##_id##_resource, \ + .num_resources = ARRAY_SIZE(_name##_id##_resource), \ +} +#define DEFINE_DEV_DATA(_name, _id) \ +static struct platform_device _name##_id##_device = { \ + .name = #_name, \ + .id = _id, \ + .dev = { \ + .platform_data = &_name##_id##_data, \ + }, \ + .resource = _name##_id##_resource, \ + .num_resources = ARRAY_SIZE(_name##_id##_resource), \ +} + +#define DEV_CLK(_name, devname, bus, _index) \ +static struct clk devname##_##_name = { \ + .name = #_name, \ + .dev = &devname##_device.dev, \ + .parent = &bus##_clk, \ + .mode = bus##_clk_mode, \ + .get_rate = bus##_clk_get_rate, \ + .index = _index, \ +} + +enum { + PIOA, + PIOB, + PIOC, + PIOD, +}; + +enum { + FUNC_A, + FUNC_B, +}; + +unsigned long at32ap7000_osc_rates[3] = { + [0] = 32768, + /* FIXME: these are ATSTK1002-specific */ + [1] = 20000000, + [2] = 12000000, +}; + +static unsigned long osc_get_rate(struct clk *clk) +{ + return at32ap7000_osc_rates[clk->index]; +} + +static unsigned long pll_get_rate(struct clk *clk, unsigned long control) +{ + unsigned long div, mul, rate; + + if (!(control & SM_BIT(PLLEN))) + return 0; + + div = SM_BFEXT(PLLDIV, control) + 1; + mul = SM_BFEXT(PLLMUL, control) + 1; + + rate = clk->parent->get_rate(clk->parent); + rate = (rate + div / 2) / div; + rate *= mul; + + return rate; +} + +static unsigned long pll0_get_rate(struct clk *clk) +{ + u32 control; + + control = sm_readl(&system_manager, PM_PLL0); + + return pll_get_rate(clk, control); +} + +static unsigned long pll1_get_rate(struct clk *clk) +{ + u32 control; + + control = sm_readl(&system_manager, PM_PLL1); + + return pll_get_rate(clk, control); +} + +/* + * The AT32AP7000 has five primary clock sources: One 32kHz + * oscillator, two crystal oscillators and two PLLs. + */ +static struct clk osc32k = { + .name = "osc32k", + .get_rate = osc_get_rate, + .users = 1, + .index = 0, +}; +static struct clk osc0 = { + .name = "osc0", + .get_rate = osc_get_rate, + .users = 1, + .index = 1, +}; +static struct clk osc1 = { + .name = "osc1", + .get_rate = osc_get_rate, + .index = 2, +}; +static struct clk pll0 = { + .name = "pll0", + .get_rate = pll0_get_rate, + .parent = &osc0, +}; +static struct clk pll1 = { + .name = "pll1", + .get_rate = pll1_get_rate, + .parent = &osc0, +}; + +/* + * The main clock can be either osc0 or pll0. The boot loader may + * have chosen one for us, so we don't really know which one until we + * have a look at the SM. + */ +static struct clk *main_clock; + +/* + * Synchronous clocks are generated from the main clock. The clocks + * must satisfy the constraint + * fCPU >= fHSB >= fPB + * i.e. each clock must not be faster than its parent. + */ +static unsigned long bus_clk_get_rate(struct clk *clk, unsigned int shift) +{ + return main_clock->get_rate(main_clock) >> shift; +}; + +static void cpu_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_CPU_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_CPU_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long cpu_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(CPUDIV)) + shift = SM_BFEXT(CPUSEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static void hsb_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_HSB_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_HSB_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long hsb_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(HSBDIV)) + shift = SM_BFEXT(HSBSEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static void pba_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_PBA_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_PBA_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long pba_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(PBADIV)) + shift = SM_BFEXT(PBASEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static void pbb_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_PBB_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_PBB_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long pbb_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(PBBDIV)) + shift = SM_BFEXT(PBBSEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static struct clk cpu_clk = { + .name = "cpu", + .get_rate = cpu_clk_get_rate, + .users = 1, +}; +static struct clk hsb_clk = { + .name = "hsb", + .parent = &cpu_clk, + .get_rate = hsb_clk_get_rate, +}; +static struct clk pba_clk = { + .name = "pba", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = pba_clk_get_rate, + .index = 1, +}; +static struct clk pbb_clk = { + .name = "pbb", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = pbb_clk_get_rate, + .users = 1, + .index = 2, +}; + +/* -------------------------------------------------------------------- + * Generic Clock operations + * -------------------------------------------------------------------- */ + +static void genclk_mode(struct clk *clk, int enabled) +{ + u32 control; + + BUG_ON(clk->index > 7); + + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + if (enabled) + control |= SM_BIT(CEN); + else + control &= ~SM_BIT(CEN); + sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control); +} + +static unsigned long genclk_get_rate(struct clk *clk) +{ + u32 control; + unsigned long div = 1; + + BUG_ON(clk->index > 7); + + if (!clk->parent) + return 0; + + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + if (control & SM_BIT(DIVEN)) + div = 2 * (SM_BFEXT(DIV, control) + 1); + + return clk->parent->get_rate(clk->parent) / div; +} + +static long genclk_set_rate(struct clk *clk, unsigned long rate, int apply) +{ + u32 control; + unsigned long parent_rate, actual_rate, div; + + BUG_ON(clk->index > 7); + + if (!clk->parent) + return 0; + + parent_rate = clk->parent->get_rate(clk->parent); + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + + if (rate > 3 * parent_rate / 4) { + actual_rate = parent_rate; + control &= ~SM_BIT(DIVEN); + } else { + div = (parent_rate + rate) / (2 * rate) - 1; + control = SM_BFINS(DIV, div, control) | SM_BIT(DIVEN); + actual_rate = parent_rate / (2 * (div + 1)); + } + + printk("clk %s: new rate %lu (actual rate %lu)\n", + clk->name, rate, actual_rate); + + if (apply) + sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, + control); + + return actual_rate; +} + +int genclk_set_parent(struct clk *clk, struct clk *parent) +{ + u32 control; + + BUG_ON(clk->index > 7); + + printk("clk %s: new parent %s (was %s)\n", + clk->name, parent->name, + clk->parent ? clk->parent->name : "(null)"); + + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + + if (parent == &osc1 || parent == &pll1) + control |= SM_BIT(OSCSEL); + else if (parent == &osc0 || parent == &pll0) + control &= ~SM_BIT(OSCSEL); + else + return -EINVAL; + + if (parent == &pll0 || parent == &pll1) + control |= SM_BIT(PLLSEL); + else + control &= ~SM_BIT(PLLSEL); + + sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control); + clk->parent = parent; + + return 0; +} + +/* -------------------------------------------------------------------- + * System peripherals + * -------------------------------------------------------------------- */ +static struct resource sm_resource[] = { + PBMEM(0xfff00000), + NAMED_IRQ(19, "eim"), + NAMED_IRQ(20, "pm"), + NAMED_IRQ(21, "rtc"), +}; +struct platform_device at32_sm_device = { + .name = "sm", + .id = 0, + .resource = sm_resource, + .num_resources = ARRAY_SIZE(sm_resource), +}; +DEV_CLK(pclk, at32_sm, pbb, 0); + +static struct resource intc0_resource[] = { + PBMEM(0xfff00400), +}; +struct platform_device at32_intc0_device = { + .name = "intc", + .id = 0, + .resource = intc0_resource, + .num_resources = ARRAY_SIZE(intc0_resource), +}; +DEV_CLK(pclk, at32_intc0, pbb, 1); + +static struct clk ebi_clk = { + .name = "ebi", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = hsb_clk_get_rate, + .users = 1, +}; +static struct clk hramc_clk = { + .name = "hramc", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = hsb_clk_get_rate, + .users = 1, +}; + +static struct platform_device pdc_device = { + .name = "pdc", + .id = 0, +}; +DEV_CLK(hclk, pdc, hsb, 4); +DEV_CLK(pclk, pdc, pba, 16); + +static struct clk pico_clk = { + .name = "pico", + .parent = &cpu_clk, + .mode = cpu_clk_mode, + .get_rate = cpu_clk_get_rate, + .users = 1, +}; + +/* -------------------------------------------------------------------- + * PIO + * -------------------------------------------------------------------- */ + +static struct resource pio0_resource[] = { + PBMEM(0xffe02800), + IRQ(13), +}; +DEFINE_DEV(pio, 0); +DEV_CLK(mck, pio0, pba, 10); + +static struct resource pio1_resource[] = { + PBMEM(0xffe02c00), + IRQ(14), +}; +DEFINE_DEV(pio, 1); +DEV_CLK(mck, pio1, pba, 11); + +static struct resource pio2_resource[] = { + PBMEM(0xffe03000), + IRQ(15), +}; +DEFINE_DEV(pio, 2); +DEV_CLK(mck, pio2, pba, 12); + +static struct resource pio3_resource[] = { + PBMEM(0xffe03400), + IRQ(16), +}; +DEFINE_DEV(pio, 3); +DEV_CLK(mck, pio3, pba, 13); + +void __init at32_add_system_devices(void) +{ + system_manager.eim_first_irq = NR_INTERNAL_IRQS; + + platform_device_register(&at32_sm_device); + platform_device_register(&at32_intc0_device); + platform_device_register(&pdc_device); + + platform_device_register(&pio0_device); + platform_device_register(&pio1_device); + platform_device_register(&pio2_device); + platform_device_register(&pio3_device); +} + +/* -------------------------------------------------------------------- + * USART + * -------------------------------------------------------------------- */ + +static struct resource usart0_resource[] = { + PBMEM(0xffe00c00), + IRQ(7), +}; +DEFINE_DEV(usart, 0); +DEV_CLK(usart, usart0, pba, 4); + +static struct resource usart1_resource[] = { + PBMEM(0xffe01000), + IRQ(7), +}; +DEFINE_DEV(usart, 1); +DEV_CLK(usart, usart1, pba, 4); + +static struct resource usart2_resource[] = { + PBMEM(0xffe01400), + IRQ(8), +}; +DEFINE_DEV(usart, 2); +DEV_CLK(usart, usart2, pba, 5); + +static struct resource usart3_resource[] = { + PBMEM(0xffe01800), + IRQ(9), +}; +DEFINE_DEV(usart, 3); +DEV_CLK(usart, usart3, pba, 6); + +static inline void configure_usart0_pins(void) +{ + portmux_set_func(PIOA, 8, FUNC_B); /* RXD */ + portmux_set_func(PIOA, 9, FUNC_B); /* TXD */ +} + +static inline void configure_usart1_pins(void) +{ + portmux_set_func(PIOA, 17, FUNC_A); /* RXD */ + portmux_set_func(PIOA, 18, FUNC_A); /* TXD */ +} + +static inline void configure_usart2_pins(void) +{ + portmux_set_func(PIOB, 26, FUNC_B); /* RXD */ + portmux_set_func(PIOB, 27, FUNC_B); /* TXD */ +} + +static inline void configure_usart3_pins(void) +{ + portmux_set_func(PIOB, 18, FUNC_B); /* RXD */ + portmux_set_func(PIOB, 17, FUNC_B); /* TXD */ +} + +static struct platform_device *setup_usart(unsigned int id) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &usart0_device; + configure_usart0_pins(); + break; + case 1: + pdev = &usart1_device; + configure_usart1_pins(); + break; + case 2: + pdev = &usart2_device; + configure_usart2_pins(); + break; + case 3: + pdev = &usart3_device; + configure_usart3_pins(); + break; + default: + pdev = NULL; + break; + } + + return pdev; +} + +struct platform_device *__init at32_add_device_usart(unsigned int id) +{ + struct platform_device *pdev; + + pdev = setup_usart(id); + if (pdev) + platform_device_register(pdev); + + return pdev; +} + +struct platform_device *at91_default_console_device; + +void __init at32_setup_serial_console(unsigned int usart_id) +{ + at91_default_console_device = setup_usart(usart_id); +} + +/* -------------------------------------------------------------------- + * Ethernet + * -------------------------------------------------------------------- */ + +static struct eth_platform_data macb0_data; +static struct resource macb0_resource[] = { + PBMEM(0xfff01800), + IRQ(25), +}; +DEFINE_DEV_DATA(macb, 0); +DEV_CLK(hclk, macb0, hsb, 8); +DEV_CLK(pclk, macb0, pbb, 6); + +struct platform_device *__init +at32_add_device_eth(unsigned int id, struct eth_platform_data *data) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &macb0_device; + + portmux_set_func(PIOC, 3, FUNC_A); /* TXD0 */ + portmux_set_func(PIOC, 4, FUNC_A); /* TXD1 */ + portmux_set_func(PIOC, 7, FUNC_A); /* TXEN */ + portmux_set_func(PIOC, 8, FUNC_A); /* TXCK */ + portmux_set_func(PIOC, 9, FUNC_A); /* RXD0 */ + portmux_set_func(PIOC, 10, FUNC_A); /* RXD1 */ + portmux_set_func(PIOC, 13, FUNC_A); /* RXER */ + portmux_set_func(PIOC, 15, FUNC_A); /* RXDV */ + portmux_set_func(PIOC, 16, FUNC_A); /* MDC */ + portmux_set_func(PIOC, 17, FUNC_A); /* MDIO */ + + if (!data->is_rmii) { + portmux_set_func(PIOC, 0, FUNC_A); /* COL */ + portmux_set_func(PIOC, 1, FUNC_A); /* CRS */ + portmux_set_func(PIOC, 2, FUNC_A); /* TXER */ + portmux_set_func(PIOC, 5, FUNC_A); /* TXD2 */ + portmux_set_func(PIOC, 6, FUNC_A); /* TXD3 */ + portmux_set_func(PIOC, 11, FUNC_A); /* RXD2 */ + portmux_set_func(PIOC, 12, FUNC_A); /* RXD3 */ + portmux_set_func(PIOC, 14, FUNC_A); /* RXCK */ + portmux_set_func(PIOC, 18, FUNC_A); /* SPD */ + } + break; + + default: + return NULL; + } + + memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data)); + platform_device_register(pdev); + + return pdev; +} + +/* -------------------------------------------------------------------- + * SPI + * -------------------------------------------------------------------- */ +static struct resource spi0_resource[] = { + PBMEM(0xffe00000), + IRQ(3), +}; +DEFINE_DEV(spi, 0); +DEV_CLK(mck, spi0, pba, 0); + +struct platform_device *__init at32_add_device_spi(unsigned int id) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &spi0_device; + portmux_set_func(PIOA, 0, FUNC_A); /* MISO */ + portmux_set_func(PIOA, 1, FUNC_A); /* MOSI */ + portmux_set_func(PIOA, 2, FUNC_A); /* SCK */ + portmux_set_func(PIOA, 3, FUNC_A); /* NPCS0 */ + portmux_set_func(PIOA, 4, FUNC_A); /* NPCS1 */ + portmux_set_func(PIOA, 5, FUNC_A); /* NPCS2 */ + break; + + default: + return NULL; + } + + platform_device_register(pdev); + return pdev; +} + +/* -------------------------------------------------------------------- + * LCDC + * -------------------------------------------------------------------- */ +static struct lcdc_platform_data lcdc0_data; +static struct resource lcdc0_resource[] = { + { + .start = 0xff000000, + .end = 0xff000fff, + .flags = IORESOURCE_MEM, + }, + IRQ(1), +}; +DEFINE_DEV_DATA(lcdc, 0); +DEV_CLK(hclk, lcdc0, hsb, 7); +static struct clk lcdc0_pixclk = { + .name = "pixclk", + .dev = &lcdc0_device.dev, + .mode = genclk_mode, + .get_rate = genclk_get_rate, + .set_rate = genclk_set_rate, + .set_parent = genclk_set_parent, + .index = 7, +}; + +struct platform_device *__init +at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &lcdc0_device; + portmux_set_func(PIOC, 19, FUNC_A); /* CC */ + portmux_set_func(PIOC, 20, FUNC_A); /* HSYNC */ + portmux_set_func(PIOC, 21, FUNC_A); /* PCLK */ + portmux_set_func(PIOC, 22, FUNC_A); /* VSYNC */ + portmux_set_func(PIOC, 23, FUNC_A); /* DVAL */ + portmux_set_func(PIOC, 24, FUNC_A); /* MODE */ + portmux_set_func(PIOC, 25, FUNC_A); /* PWR */ + portmux_set_func(PIOC, 26, FUNC_A); /* DATA0 */ + portmux_set_func(PIOC, 27, FUNC_A); /* DATA1 */ + portmux_set_func(PIOC, 28, FUNC_A); /* DATA2 */ + portmux_set_func(PIOC, 29, FUNC_A); /* DATA3 */ + portmux_set_func(PIOC, 30, FUNC_A); /* DATA4 */ + portmux_set_func(PIOC, 31, FUNC_A); /* DATA5 */ + portmux_set_func(PIOD, 0, FUNC_A); /* DATA6 */ + portmux_set_func(PIOD, 1, FUNC_A); /* DATA7 */ + portmux_set_func(PIOD, 2, FUNC_A); /* DATA8 */ + portmux_set_func(PIOD, 3, FUNC_A); /* DATA9 */ + portmux_set_func(PIOD, 4, FUNC_A); /* DATA10 */ + portmux_set_func(PIOD, 5, FUNC_A); /* DATA11 */ + portmux_set_func(PIOD, 6, FUNC_A); /* DATA12 */ + portmux_set_func(PIOD, 7, FUNC_A); /* DATA13 */ + portmux_set_func(PIOD, 8, FUNC_A); /* DATA14 */ + portmux_set_func(PIOD, 9, FUNC_A); /* DATA15 */ + portmux_set_func(PIOD, 10, FUNC_A); /* DATA16 */ + portmux_set_func(PIOD, 11, FUNC_A); /* DATA17 */ + portmux_set_func(PIOD, 12, FUNC_A); /* DATA18 */ + portmux_set_func(PIOD, 13, FUNC_A); /* DATA19 */ + portmux_set_func(PIOD, 14, FUNC_A); /* DATA20 */ + portmux_set_func(PIOD, 15, FUNC_A); /* DATA21 */ + portmux_set_func(PIOD, 16, FUNC_A); /* DATA22 */ + portmux_set_func(PIOD, 17, FUNC_A); /* DATA23 */ + + clk_set_parent(&lcdc0_pixclk, &pll0); + clk_set_rate(&lcdc0_pixclk, clk_get_rate(&pll0)); + break; + + default: + return NULL; + } + + memcpy(pdev->dev.platform_data, data, + sizeof(struct lcdc_platform_data)); + + platform_device_register(pdev); + return pdev; +} + +struct clk *at32_clock_list[] = { + &osc32k, + &osc0, + &osc1, + &pll0, + &pll1, + &cpu_clk, + &hsb_clk, + &pba_clk, + &pbb_clk, + &at32_sm_pclk, + &at32_intc0_pclk, + &ebi_clk, + &hramc_clk, + &pdc_hclk, + &pdc_pclk, + &pico_clk, + &pio0_mck, + &pio1_mck, + &pio2_mck, + &pio3_mck, + &usart0_usart, + &usart1_usart, + &usart2_usart, + &usart3_usart, + &macb0_hclk, + &macb0_pclk, + &spi0_mck, + &lcdc0_hclk, + &lcdc0_pixclk, +}; +unsigned int at32_nr_clocks = ARRAY_SIZE(at32_clock_list); + +void __init at32_portmux_init(void) +{ + at32_init_pio(&pio0_device); + at32_init_pio(&pio1_device); + at32_init_pio(&pio2_device); + at32_init_pio(&pio3_device); +} + +void __init at32_clock_init(void) +{ + struct at32_sm *sm = &system_manager; + u32 cpu_mask = 0, hsb_mask = 0, pba_mask = 0, pbb_mask = 0; + int i; + + if (sm_readl(sm, PM_MCCTRL) & SM_BIT(PLLSEL)) + main_clock = &pll0; + else + main_clock = &osc0; + + if (sm_readl(sm, PM_PLL0) & SM_BIT(PLLOSC)) + pll0.parent = &osc1; + if (sm_readl(sm, PM_PLL1) & SM_BIT(PLLOSC)) + pll1.parent = &osc1; + + /* + * Turn on all clocks that have at least one user already, and + * turn off everything else. We only do this for module + * clocks, and even though it isn't particularly pretty to + * check the address of the mode function, it should do the + * trick... + */ + for (i = 0; i < ARRAY_SIZE(at32_clock_list); i++) { + struct clk *clk = at32_clock_list[i]; + + if (clk->mode == &cpu_clk_mode) + cpu_mask |= 1 << clk->index; + else if (clk->mode == &hsb_clk_mode) + hsb_mask |= 1 << clk->index; + else if (clk->mode == &pba_clk_mode) + pba_mask |= 1 << clk->index; + else if (clk->mode == &pbb_clk_mode) + pbb_mask |= 1 << clk->index; + } + + sm_writel(sm, PM_CPU_MASK, cpu_mask); + sm_writel(sm, PM_HSB_MASK, hsb_mask); + sm_writel(sm, PM_PBA_MASK, pba_mask); + sm_writel(sm, PM_PBB_MASK, pbb_mask); +} diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c new file mode 100644 index 000000000000..3d0d1097389f --- /dev/null +++ b/arch/avr32/mach-at32ap/clock.c @@ -0,0 +1,148 @@ +/* + * Clock management for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * Based on arch/arm/mach-at91rm9200/clock.c + * Copyright (C) 2005 David Brownell + * Copyright (C) 2005 Ivan Kokshaysky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "clock.h" + +static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED; + +struct clk *clk_get(struct device *dev, const char *id) +{ + int i; + + for (i = 0; i < at32_nr_clocks; i++) { + struct clk *clk = at32_clock_list[i]; + + if (clk->dev == dev && strcmp(id, clk->name) == 0) + return clk; + } + + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ + /* clocks are static for now, we can't free them */ +} +EXPORT_SYMBOL(clk_put); + +static void __clk_enable(struct clk *clk) +{ + if (clk->parent) + __clk_enable(clk->parent); + if (clk->users++ == 0 && clk->mode) + clk->mode(clk, 1); +} + +int clk_enable(struct clk *clk) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_lock, flags); + __clk_enable(clk); + spin_unlock_irqrestore(&clk_lock, flags); + + return 0; +} +EXPORT_SYMBOL(clk_enable); + +static void __clk_disable(struct clk *clk) +{ + BUG_ON(clk->users == 0); + + if (--clk->users == 0 && clk->mode) + clk->mode(clk, 0); + if (clk->parent) + __clk_disable(clk->parent); +} + +void clk_disable(struct clk *clk) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_lock, flags); + __clk_disable(clk); + spin_unlock_irqrestore(&clk_lock, flags); +} +EXPORT_SYMBOL(clk_disable); + +unsigned long clk_get_rate(struct clk *clk) +{ + unsigned long flags; + unsigned long rate; + + spin_lock_irqsave(&clk_lock, flags); + rate = clk->get_rate(clk); + spin_unlock_irqrestore(&clk_lock, flags); + + return rate; +} +EXPORT_SYMBOL(clk_get_rate); + +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + unsigned long flags, actual_rate; + + if (!clk->set_rate) + return -ENOSYS; + + spin_lock_irqsave(&clk_lock, flags); + actual_rate = clk->set_rate(clk, rate, 0); + spin_unlock_irqrestore(&clk_lock, flags); + + return actual_rate; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + unsigned long flags; + long ret; + + if (!clk->set_rate) + return -ENOSYS; + + spin_lock_irqsave(&clk_lock, flags); + ret = clk->set_rate(clk, rate, 1); + spin_unlock_irqrestore(&clk_lock, flags); + + return (ret < 0) ? ret : 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + unsigned long flags; + int ret; + + if (!clk->set_parent) + return -ENOSYS; + + spin_lock_irqsave(&clk_lock, flags); + ret = clk->set_parent(clk, parent); + spin_unlock_irqrestore(&clk_lock, flags); + + return ret; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/avr32/mach-at32ap/clock.h b/arch/avr32/mach-at32ap/clock.h new file mode 100644 index 000000000000..f953f044ba4d --- /dev/null +++ b/arch/avr32/mach-at32ap/clock.h @@ -0,0 +1,30 @@ +/* + * Clock management for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * Based on arch/arm/mach-at91rm9200/clock.c + * Copyright (C) 2005 David Brownell + * Copyright (C) 2005 Ivan Kokshaysky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +struct clk { + const char *name; /* Clock name/function */ + struct device *dev; /* Device the clock is used by */ + struct clk *parent; /* Parent clock, if any */ + void (*mode)(struct clk *clk, int enabled); + unsigned long (*get_rate)(struct clk *clk); + long (*set_rate)(struct clk *clk, unsigned long rate, + int apply); + int (*set_parent)(struct clk *clk, struct clk *parent); + u16 users; /* Enabled if non-zero */ + u16 index; /* Sibling index */ +}; + +extern struct clk *at32_clock_list[]; +extern unsigned int at32_nr_clocks; diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c new file mode 100644 index 000000000000..7da9c5f7a0eb --- /dev/null +++ b/arch/avr32/mach-at32ap/extint.c @@ -0,0 +1,171 @@ +/* + * External interrupt handling for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "sm.h" + +static void eim_ack_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq)); +} + +static void eim_mask_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq)); +} + +static void eim_mask_ack_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq)); + sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq)); +} + +static void eim_unmask_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_IER, 1 << (irq - sm->eim_first_irq)); +} + +static int eim_set_irq_type(unsigned int irq, unsigned int flow_type) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + unsigned int i = irq - sm->eim_first_irq; + u32 mode, edge, level; + unsigned long flags; + int ret = 0; + + flow_type &= IRQ_TYPE_SENSE_MASK; + + spin_lock_irqsave(&sm->lock, flags); + + mode = sm_readl(sm, EIM_MODE); + edge = sm_readl(sm, EIM_EDGE); + level = sm_readl(sm, EIM_LEVEL); + + switch (flow_type) { + case IRQ_TYPE_LEVEL_LOW: + mode |= 1 << i; + level &= ~(1 << i); + break; + case IRQ_TYPE_LEVEL_HIGH: + mode |= 1 << i; + level |= 1 << i; + break; + case IRQ_TYPE_EDGE_RISING: + mode &= ~(1 << i); + edge |= 1 << i; + break; + case IRQ_TYPE_EDGE_FALLING: + mode &= ~(1 << i); + edge &= ~(1 << i); + break; + default: + ret = -EINVAL; + break; + } + + sm_writel(sm, EIM_MODE, mode); + sm_writel(sm, EIM_EDGE, edge); + sm_writel(sm, EIM_LEVEL, level); + + spin_unlock_irqrestore(&sm->lock, flags); + + return ret; +} + +struct irq_chip eim_chip = { + .name = "eim", + .ack = eim_ack_irq, + .mask = eim_mask_irq, + .mask_ack = eim_mask_ack_irq, + .unmask = eim_unmask_irq, + .set_type = eim_set_irq_type, +}; + +static void demux_eim_irq(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) +{ + struct at32_sm *sm = desc->handler_data; + struct irq_desc *ext_desc; + unsigned long status, pending; + unsigned int i, ext_irq; + + spin_lock(&sm->lock); + + status = sm_readl(sm, EIM_ISR); + pending = status & sm_readl(sm, EIM_IMR); + + while (pending) { + i = fls(pending) - 1; + pending &= ~(1 << i); + + ext_irq = i + sm->eim_first_irq; + ext_desc = irq_desc + ext_irq; + ext_desc->handle_irq(ext_irq, ext_desc, regs); + } + + spin_unlock(&sm->lock); +} + +static int __init eim_init(void) +{ + struct at32_sm *sm = &system_manager; + unsigned int i; + unsigned int nr_irqs; + unsigned int int_irq; + u32 pattern; + + /* + * The EIM is really the same module as SM, so register + * mapping, etc. has been taken care of already. + */ + + /* + * Find out how many interrupt lines that are actually + * implemented in hardware. + */ + sm_writel(sm, EIM_IDR, ~0UL); + sm_writel(sm, EIM_MODE, ~0UL); + pattern = sm_readl(sm, EIM_MODE); + nr_irqs = fls(pattern); + + sm->eim_chip = &eim_chip; + + for (i = 0; i < nr_irqs; i++) { + set_irq_chip(sm->eim_first_irq + i, &eim_chip); + set_irq_chip_data(sm->eim_first_irq + i, sm); + } + + int_irq = platform_get_irq_byname(sm->pdev, "eim"); + + set_irq_chained_handler(int_irq, demux_eim_irq); + set_irq_data(int_irq, sm); + + printk("EIM: External Interrupt Module at 0x%p, IRQ %u\n", + sm->regs, int_irq); + printk("EIM: Handling %u external IRQs, starting with IRQ %u\n", + nr_irqs, sm->eim_first_irq); + + return 0; +} +arch_initcall(eim_init); diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c new file mode 100644 index 000000000000..74f8c9f2f03d --- /dev/null +++ b/arch/avr32/mach-at32ap/intc.c @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "intc.h" + +struct intc { + void __iomem *regs; + struct irq_chip chip; +}; + +extern struct platform_device at32_intc0_device; + +/* + * TODO: We may be able to implement mask/unmask by setting IxM flags + * in the status register. + */ +static void intc_mask_irq(unsigned int irq) +{ + +} + +static void intc_unmask_irq(unsigned int irq) +{ + +} + +static struct intc intc0 = { + .chip = { + .name = "intc", + .mask = intc_mask_irq, + .unmask = intc_unmask_irq, + }, +}; + +/* + * All interrupts go via intc at some point. + */ +asmlinkage void do_IRQ(int level, struct pt_regs *regs) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long status_reg; + + local_irq_disable(); + + irq_enter(); + + irq = intc_readl(&intc0, INTCAUSE0 - 4 * level); + desc = irq_desc + irq; + desc->handle_irq(irq, desc, regs); + + /* + * Clear all interrupt level masks so that we may handle + * interrupts during softirq processing. If this is a nested + * interrupt, interrupts must stay globally disabled until we + * return. + */ + status_reg = sysreg_read(SR); + status_reg &= ~(SYSREG_BIT(I0M) | SYSREG_BIT(I1M) + | SYSREG_BIT(I2M) | SYSREG_BIT(I3M)); + sysreg_write(SR, status_reg); + + irq_exit(); +} + +void __init init_IRQ(void) +{ + extern void _evba(void); + extern void irq_level0(void); + struct resource *regs; + struct clk *pclk; + unsigned int i; + u32 offset, readback; + + regs = platform_get_resource(&at32_intc0_device, IORESOURCE_MEM, 0); + if (!regs) { + printk(KERN_EMERG "intc: no mmio resource defined\n"); + goto fail; + } + pclk = clk_get(&at32_intc0_device.dev, "pclk"); + if (IS_ERR(pclk)) { + printk(KERN_EMERG "intc: no clock defined\n"); + goto fail; + } + + clk_enable(pclk); + + intc0.regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!intc0.regs) { + printk(KERN_EMERG "intc: failed to map registers (0x%08lx)\n", + (unsigned long)regs->start); + goto fail; + } + + /* + * Initialize all interrupts to level 0 (lowest priority). The + * priority level may be changed by calling + * irq_set_priority(). + * + */ + offset = (unsigned long)&irq_level0 - (unsigned long)&_evba; + for (i = 0; i < NR_INTERNAL_IRQS; i++) { + intc_writel(&intc0, INTPR0 + 4 * i, offset); + readback = intc_readl(&intc0, INTPR0 + 4 * i); + if (readback == offset) + set_irq_chip_and_handler(i, &intc0.chip, + handle_simple_irq); + } + + /* Unmask all interrupt levels */ + sysreg_write(SR, (sysreg_read(SR) + & ~(SR_I3M | SR_I2M | SR_I1M | SR_I0M))); + + return; + +fail: + panic("Interrupt controller initialization failed!\n"); +} + diff --git a/arch/avr32/mach-at32ap/intc.h b/arch/avr32/mach-at32ap/intc.h new file mode 100644 index 000000000000..d289ca2fff13 --- /dev/null +++ b/arch/avr32/mach-at32ap/intc.h @@ -0,0 +1,327 @@ +/* + * Automatically generated by gen-header.xsl + */ +#ifndef __ASM_AVR32_PERIHP_INTC_H__ +#define __ASM_AVR32_PERIHP_INTC_H__ + +#define INTC_NUM_INT_GRPS 33 + +#define INTC_INTPR0 0x0 +# define INTC_INTPR0_INTLEV_OFFSET 30 +# define INTC_INTPR0_INTLEV_SIZE 2 +# define INTC_INTPR0_OFFSET_OFFSET 0 +# define INTC_INTPR0_OFFSET_SIZE 24 +#define INTC_INTREQ0 0x100 +# define INTC_INTREQ0_IREQUEST0_OFFSET 0 +# define INTC_INTREQ0_IREQUEST0_SIZE 1 +# define INTC_INTREQ0_IREQUEST1_OFFSET 1 +# define INTC_INTREQ0_IREQUEST1_SIZE 1 +#define INTC_INTPR1 0x4 +# define INTC_INTPR1_INTLEV_OFFSET 30 +# define INTC_INTPR1_INTLEV_SIZE 2 +# define INTC_INTPR1_OFFSET_OFFSET 0 +# define INTC_INTPR1_OFFSET_SIZE 24 +#define INTC_INTREQ1 0x104 +# define INTC_INTREQ1_IREQUEST32_OFFSET 0 +# define INTC_INTREQ1_IREQUEST32_SIZE 1 +# define INTC_INTREQ1_IREQUEST33_OFFSET 1 +# define INTC_INTREQ1_IREQUEST33_SIZE 1 +# define INTC_INTREQ1_IREQUEST34_OFFSET 2 +# define INTC_INTREQ1_IREQUEST34_SIZE 1 +# define INTC_INTREQ1_IREQUEST35_OFFSET 3 +# define INTC_INTREQ1_IREQUEST35_SIZE 1 +# define INTC_INTREQ1_IREQUEST36_OFFSET 4 +# define INTC_INTREQ1_IREQUEST36_SIZE 1 +# define INTC_INTREQ1_IREQUEST37_OFFSET 5 +# define INTC_INTREQ1_IREQUEST37_SIZE 1 +#define INTC_INTPR2 0x8 +# define INTC_INTPR2_INTLEV_OFFSET 30 +# define INTC_INTPR2_INTLEV_SIZE 2 +# define INTC_INTPR2_OFFSET_OFFSET 0 +# define INTC_INTPR2_OFFSET_SIZE 24 +#define INTC_INTREQ2 0x108 +# define INTC_INTREQ2_IREQUEST64_OFFSET 0 +# define INTC_INTREQ2_IREQUEST64_SIZE 1 +# define INTC_INTREQ2_IREQUEST65_OFFSET 1 +# define INTC_INTREQ2_IREQUEST65_SIZE 1 +# define INTC_INTREQ2_IREQUEST66_OFFSET 2 +# define INTC_INTREQ2_IREQUEST66_SIZE 1 +# define INTC_INTREQ2_IREQUEST67_OFFSET 3 +# define INTC_INTREQ2_IREQUEST67_SIZE 1 +# define INTC_INTREQ2_IREQUEST68_OFFSET 4 +# define INTC_INTREQ2_IREQUEST68_SIZE 1 +#define INTC_INTPR3 0xc +# define INTC_INTPR3_INTLEV_OFFSET 30 +# define INTC_INTPR3_INTLEV_SIZE 2 +# define INTC_INTPR3_OFFSET_OFFSET 0 +# define INTC_INTPR3_OFFSET_SIZE 24 +#define INTC_INTREQ3 0x10c +# define INTC_INTREQ3_IREQUEST96_OFFSET 0 +# define INTC_INTREQ3_IREQUEST96_SIZE 1 +#define INTC_INTPR4 0x10 +# define INTC_INTPR4_INTLEV_OFFSET 30 +# define INTC_INTPR4_INTLEV_SIZE 2 +# define INTC_INTPR4_OFFSET_OFFSET 0 +# define INTC_INTPR4_OFFSET_SIZE 24 +#define INTC_INTREQ4 0x110 +# define INTC_INTREQ4_IREQUEST128_OFFSET 0 +# define INTC_INTREQ4_IREQUEST128_SIZE 1 +#define INTC_INTPR5 0x14 +# define INTC_INTPR5_INTLEV_OFFSET 30 +# define INTC_INTPR5_INTLEV_SIZE 2 +# define INTC_INTPR5_OFFSET_OFFSET 0 +# define INTC_INTPR5_OFFSET_SIZE 24 +#define INTC_INTREQ5 0x114 +# define INTC_INTREQ5_IREQUEST160_OFFSET 0 +# define INTC_INTREQ5_IREQUEST160_SIZE 1 +#define INTC_INTPR6 0x18 +# define INTC_INTPR6_INTLEV_OFFSET 30 +# define INTC_INTPR6_INTLEV_SIZE 2 +# define INTC_INTPR6_OFFSET_OFFSET 0 +# define INTC_INTPR6_OFFSET_SIZE 24 +#define INTC_INTREQ6 0x118 +# define INTC_INTREQ6_IREQUEST192_OFFSET 0 +# define INTC_INTREQ6_IREQUEST192_SIZE 1 +#define INTC_INTPR7 0x1c +# define INTC_INTPR7_INTLEV_OFFSET 30 +# define INTC_INTPR7_INTLEV_SIZE 2 +# define INTC_INTPR7_OFFSET_OFFSET 0 +# define INTC_INTPR7_OFFSET_SIZE 24 +#define INTC_INTREQ7 0x11c +# define INTC_INTREQ7_IREQUEST224_OFFSET 0 +# define INTC_INTREQ7_IREQUEST224_SIZE 1 +#define INTC_INTPR8 0x20 +# define INTC_INTPR8_INTLEV_OFFSET 30 +# define INTC_INTPR8_INTLEV_SIZE 2 +# define INTC_INTPR8_OFFSET_OFFSET 0 +# define INTC_INTPR8_OFFSET_SIZE 24 +#define INTC_INTREQ8 0x120 +# define INTC_INTREQ8_IREQUEST256_OFFSET 0 +# define INTC_INTREQ8_IREQUEST256_SIZE 1 +#define INTC_INTPR9 0x24 +# define INTC_INTPR9_INTLEV_OFFSET 30 +# define INTC_INTPR9_INTLEV_SIZE 2 +# define INTC_INTPR9_OFFSET_OFFSET 0 +# define INTC_INTPR9_OFFSET_SIZE 24 +#define INTC_INTREQ9 0x124 +# define INTC_INTREQ9_IREQUEST288_OFFSET 0 +# define INTC_INTREQ9_IREQUEST288_SIZE 1 +#define INTC_INTPR10 0x28 +# define INTC_INTPR10_INTLEV_OFFSET 30 +# define INTC_INTPR10_INTLEV_SIZE 2 +# define INTC_INTPR10_OFFSET_OFFSET 0 +# define INTC_INTPR10_OFFSET_SIZE 24 +#define INTC_INTREQ10 0x128 +# define INTC_INTREQ10_IREQUEST320_OFFSET 0 +# define INTC_INTREQ10_IREQUEST320_SIZE 1 +#define INTC_INTPR11 0x2c +# define INTC_INTPR11_INTLEV_OFFSET 30 +# define INTC_INTPR11_INTLEV_SIZE 2 +# define INTC_INTPR11_OFFSET_OFFSET 0 +# define INTC_INTPR11_OFFSET_SIZE 24 +#define INTC_INTREQ11 0x12c +# define INTC_INTREQ11_IREQUEST352_OFFSET 0 +# define INTC_INTREQ11_IREQUEST352_SIZE 1 +#define INTC_INTPR12 0x30 +# define INTC_INTPR12_INTLEV_OFFSET 30 +# define INTC_INTPR12_INTLEV_SIZE 2 +# define INTC_INTPR12_OFFSET_OFFSET 0 +# define INTC_INTPR12_OFFSET_SIZE 24 +#define INTC_INTREQ12 0x130 +# define INTC_INTREQ12_IREQUEST384_OFFSET 0 +# define INTC_INTREQ12_IREQUEST384_SIZE 1 +#define INTC_INTPR13 0x34 +# define INTC_INTPR13_INTLEV_OFFSET 30 +# define INTC_INTPR13_INTLEV_SIZE 2 +# define INTC_INTPR13_OFFSET_OFFSET 0 +# define INTC_INTPR13_OFFSET_SIZE 24 +#define INTC_INTREQ13 0x134 +# define INTC_INTREQ13_IREQUEST416_OFFSET 0 +# define INTC_INTREQ13_IREQUEST416_SIZE 1 +#define INTC_INTPR14 0x38 +# define INTC_INTPR14_INTLEV_OFFSET 30 +# define INTC_INTPR14_INTLEV_SIZE 2 +# define INTC_INTPR14_OFFSET_OFFSET 0 +# define INTC_INTPR14_OFFSET_SIZE 24 +#define INTC_INTREQ14 0x138 +# define INTC_INTREQ14_IREQUEST448_OFFSET 0 +# define INTC_INTREQ14_IREQUEST448_SIZE 1 +#define INTC_INTPR15 0x3c +# define INTC_INTPR15_INTLEV_OFFSET 30 +# define INTC_INTPR15_INTLEV_SIZE 2 +# define INTC_INTPR15_OFFSET_OFFSET 0 +# define INTC_INTPR15_OFFSET_SIZE 24 +#define INTC_INTREQ15 0x13c +# define INTC_INTREQ15_IREQUEST480_OFFSET 0 +# define INTC_INTREQ15_IREQUEST480_SIZE 1 +#define INTC_INTPR16 0x40 +# define INTC_INTPR16_INTLEV_OFFSET 30 +# define INTC_INTPR16_INTLEV_SIZE 2 +# define INTC_INTPR16_OFFSET_OFFSET 0 +# define INTC_INTPR16_OFFSET_SIZE 24 +#define INTC_INTREQ16 0x140 +# define INTC_INTREQ16_IREQUEST512_OFFSET 0 +# define INTC_INTREQ16_IREQUEST512_SIZE 1 +#define INTC_INTPR17 0x44 +# define INTC_INTPR17_INTLEV_OFFSET 30 +# define INTC_INTPR17_INTLEV_SIZE 2 +# define INTC_INTPR17_OFFSET_OFFSET 0 +# define INTC_INTPR17_OFFSET_SIZE 24 +#define INTC_INTREQ17 0x144 +# define INTC_INTREQ17_IREQUEST544_OFFSET 0 +# define INTC_INTREQ17_IREQUEST544_SIZE 1 +#define INTC_INTPR18 0x48 +# define INTC_INTPR18_INTLEV_OFFSET 30 +# define INTC_INTPR18_INTLEV_SIZE 2 +# define INTC_INTPR18_OFFSET_OFFSET 0 +# define INTC_INTPR18_OFFSET_SIZE 24 +#define INTC_INTREQ18 0x148 +# define INTC_INTREQ18_IREQUEST576_OFFSET 0 +# define INTC_INTREQ18_IREQUEST576_SIZE 1 +#define INTC_INTPR19 0x4c +# define INTC_INTPR19_INTLEV_OFFSET 30 +# define INTC_INTPR19_INTLEV_SIZE 2 +# define INTC_INTPR19_OFFSET_OFFSET 0 +# define INTC_INTPR19_OFFSET_SIZE 24 +#define INTC_INTREQ19 0x14c +# define INTC_INTREQ19_IREQUEST608_OFFSET 0 +# define INTC_INTREQ19_IREQUEST608_SIZE 1 +# define INTC_INTREQ19_IREQUEST609_OFFSET 1 +# define INTC_INTREQ19_IREQUEST609_SIZE 1 +# define INTC_INTREQ19_IREQUEST610_OFFSET 2 +# define INTC_INTREQ19_IREQUEST610_SIZE 1 +# define INTC_INTREQ19_IREQUEST611_OFFSET 3 +# define INTC_INTREQ19_IREQUEST611_SIZE 1 +#define INTC_INTPR20 0x50 +# define INTC_INTPR20_INTLEV_OFFSET 30 +# define INTC_INTPR20_INTLEV_SIZE 2 +# define INTC_INTPR20_OFFSET_OFFSET 0 +# define INTC_INTPR20_OFFSET_SIZE 24 +#define INTC_INTREQ20 0x150 +# define INTC_INTREQ20_IREQUEST640_OFFSET 0 +# define INTC_INTREQ20_IREQUEST640_SIZE 1 +#define INTC_INTPR21 0x54 +# define INTC_INTPR21_INTLEV_OFFSET 30 +# define INTC_INTPR21_INTLEV_SIZE 2 +# define INTC_INTPR21_OFFSET_OFFSET 0 +# define INTC_INTPR21_OFFSET_SIZE 24 +#define INTC_INTREQ21 0x154 +# define INTC_INTREQ21_IREQUEST672_OFFSET 0 +# define INTC_INTREQ21_IREQUEST672_SIZE 1 +#define INTC_INTPR22 0x58 +# define INTC_INTPR22_INTLEV_OFFSET 30 +# define INTC_INTPR22_INTLEV_SIZE 2 +# define INTC_INTPR22_OFFSET_OFFSET 0 +# define INTC_INTPR22_OFFSET_SIZE 24 +#define INTC_INTREQ22 0x158 +# define INTC_INTREQ22_IREQUEST704_OFFSET 0 +# define INTC_INTREQ22_IREQUEST704_SIZE 1 +# define INTC_INTREQ22_IREQUEST705_OFFSET 1 +# define INTC_INTREQ22_IREQUEST705_SIZE 1 +# define INTC_INTREQ22_IREQUEST706_OFFSET 2 +# define INTC_INTREQ22_IREQUEST706_SIZE 1 +#define INTC_INTPR23 0x5c +# define INTC_INTPR23_INTLEV_OFFSET 30 +# define INTC_INTPR23_INTLEV_SIZE 2 +# define INTC_INTPR23_OFFSET_OFFSET 0 +# define INTC_INTPR23_OFFSET_SIZE 24 +#define INTC_INTREQ23 0x15c +# define INTC_INTREQ23_IREQUEST736_OFFSET 0 +# define INTC_INTREQ23_IREQUEST736_SIZE 1 +# define INTC_INTREQ23_IREQUEST737_OFFSET 1 +# define INTC_INTREQ23_IREQUEST737_SIZE 1 +# define INTC_INTREQ23_IREQUEST738_OFFSET 2 +# define INTC_INTREQ23_IREQUEST738_SIZE 1 +#define INTC_INTPR24 0x60 +# define INTC_INTPR24_INTLEV_OFFSET 30 +# define INTC_INTPR24_INTLEV_SIZE 2 +# define INTC_INTPR24_OFFSET_OFFSET 0 +# define INTC_INTPR24_OFFSET_SIZE 24 +#define INTC_INTREQ24 0x160 +# define INTC_INTREQ24_IREQUEST768_OFFSET 0 +# define INTC_INTREQ24_IREQUEST768_SIZE 1 +#define INTC_INTPR25 0x64 +# define INTC_INTPR25_INTLEV_OFFSET 30 +# define INTC_INTPR25_INTLEV_SIZE 2 +# define INTC_INTPR25_OFFSET_OFFSET 0 +# define INTC_INTPR25_OFFSET_SIZE 24 +#define INTC_INTREQ25 0x164 +# define INTC_INTREQ25_IREQUEST800_OFFSET 0 +# define INTC_INTREQ25_IREQUEST800_SIZE 1 +#define INTC_INTPR26 0x68 +# define INTC_INTPR26_INTLEV_OFFSET 30 +# define INTC_INTPR26_INTLEV_SIZE 2 +# define INTC_INTPR26_OFFSET_OFFSET 0 +# define INTC_INTPR26_OFFSET_SIZE 24 +#define INTC_INTREQ26 0x168 +# define INTC_INTREQ26_IREQUEST832_OFFSET 0 +# define INTC_INTREQ26_IREQUEST832_SIZE 1 +#define INTC_INTPR27 0x6c +# define INTC_INTPR27_INTLEV_OFFSET 30 +# define INTC_INTPR27_INTLEV_SIZE 2 +# define INTC_INTPR27_OFFSET_OFFSET 0 +# define INTC_INTPR27_OFFSET_SIZE 24 +#define INTC_INTREQ27 0x16c +# define INTC_INTREQ27_IREQUEST864_OFFSET 0 +# define INTC_INTREQ27_IREQUEST864_SIZE 1 +#define INTC_INTPR28 0x70 +# define INTC_INTPR28_INTLEV_OFFSET 30 +# define INTC_INTPR28_INTLEV_SIZE 2 +# define INTC_INTPR28_OFFSET_OFFSET 0 +# define INTC_INTPR28_OFFSET_SIZE 24 +#define INTC_INTREQ28 0x170 +# define INTC_INTREQ28_IREQUEST896_OFFSET 0 +# define INTC_INTREQ28_IREQUEST896_SIZE 1 +#define INTC_INTPR29 0x74 +# define INTC_INTPR29_INTLEV_OFFSET 30 +# define INTC_INTPR29_INTLEV_SIZE 2 +# define INTC_INTPR29_OFFSET_OFFSET 0 +# define INTC_INTPR29_OFFSET_SIZE 24 +#define INTC_INTREQ29 0x174 +# define INTC_INTREQ29_IREQUEST928_OFFSET 0 +# define INTC_INTREQ29_IREQUEST928_SIZE 1 +#define INTC_INTPR30 0x78 +# define INTC_INTPR30_INTLEV_OFFSET 30 +# define INTC_INTPR30_INTLEV_SIZE 2 +# define INTC_INTPR30_OFFSET_OFFSET 0 +# define INTC_INTPR30_OFFSET_SIZE 24 +#define INTC_INTREQ30 0x178 +# define INTC_INTREQ30_IREQUEST960_OFFSET 0 +# define INTC_INTREQ30_IREQUEST960_SIZE 1 +#define INTC_INTPR31 0x7c +# define INTC_INTPR31_INTLEV_OFFSET 30 +# define INTC_INTPR31_INTLEV_SIZE 2 +# define INTC_INTPR31_OFFSET_OFFSET 0 +# define INTC_INTPR31_OFFSET_SIZE 24 +#define INTC_INTREQ31 0x17c +# define INTC_INTREQ31_IREQUEST992_OFFSET 0 +# define INTC_INTREQ31_IREQUEST992_SIZE 1 +#define INTC_INTPR32 0x80 +# define INTC_INTPR32_INTLEV_OFFSET 30 +# define INTC_INTPR32_INTLEV_SIZE 2 +# define INTC_INTPR32_OFFSET_OFFSET 0 +# define INTC_INTPR32_OFFSET_SIZE 24 +#define INTC_INTREQ32 0x180 +# define INTC_INTREQ32_IREQUEST1024_OFFSET 0 +# define INTC_INTREQ32_IREQUEST1024_SIZE 1 +#define INTC_INTCAUSE0 0x20c +# define INTC_INTCAUSE0_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE0_CAUSEGRP_SIZE 6 +#define INTC_INTCAUSE1 0x208 +# define INTC_INTCAUSE1_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE1_CAUSEGRP_SIZE 6 +#define INTC_INTCAUSE2 0x204 +# define INTC_INTCAUSE2_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE2_CAUSEGRP_SIZE 6 +#define INTC_INTCAUSE3 0x200 +# define INTC_INTCAUSE3_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE3_CAUSEGRP_SIZE 6 + +#define INTC_BIT(name) (1 << INTC_##name##_OFFSET) +#define INTC_MKBF(name, value) (((value) & ((1 << INTC_##name##_SIZE) - 1)) << INTC_##name##_OFFSET) +#define INTC_GETBF(name, value) (((value) >> INTC_##name##_OFFSET) & ((1 << INTC_##name##_SIZE) - 1)) + +#define intc_readl(port,reg) readl((port)->regs + INTC_##reg) +#define intc_writel(port,reg,value) writel((value), (port)->regs + INTC_##reg) + +#endif /* __ASM_AVR32_PERIHP_INTC_H__ */ diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c new file mode 100644 index 000000000000..d3aabfca8598 --- /dev/null +++ b/arch/avr32/mach-at32ap/pio.c @@ -0,0 +1,118 @@ +/* + * Atmel PIO2 Port Multiplexer support + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include + +#include + +#include "pio.h" + +#define MAX_NR_PIO_DEVICES 8 + +struct pio_device { + void __iomem *regs; + const struct platform_device *pdev; + struct clk *clk; + u32 alloc_mask; + char name[32]; +}; + +static struct pio_device pio_dev[MAX_NR_PIO_DEVICES]; + +void portmux_set_func(unsigned int portmux_id, unsigned int pin_id, + unsigned int function_id) +{ + struct pio_device *pio; + u32 mask = 1 << pin_id; + + BUG_ON(portmux_id >= MAX_NR_PIO_DEVICES); + + pio = &pio_dev[portmux_id]; + + if (function_id) + pio_writel(pio, BSR, mask); + else + pio_writel(pio, ASR, mask); + pio_writel(pio, PDR, mask); +} + +static int __init pio_probe(struct platform_device *pdev) +{ + struct pio_device *pio = NULL; + + BUG_ON(pdev->id >= MAX_NR_PIO_DEVICES); + pio = &pio_dev[pdev->id]; + BUG_ON(!pio->regs); + + /* TODO: Interrupts */ + + platform_set_drvdata(pdev, pio); + + printk(KERN_INFO "%s: Atmel Port Multiplexer at 0x%p (irq %d)\n", + pio->name, pio->regs, platform_get_irq(pdev, 0)); + + return 0; +} + +static struct platform_driver pio_driver = { + .probe = pio_probe, + .driver = { + .name = "pio", + }, +}; + +static int __init pio_init(void) +{ + return platform_driver_register(&pio_driver); +} +subsys_initcall(pio_init); + +void __init at32_init_pio(struct platform_device *pdev) +{ + struct resource *regs; + struct pio_device *pio; + + if (pdev->id > MAX_NR_PIO_DEVICES) { + dev_err(&pdev->dev, "only %d PIO devices supported\n", + MAX_NR_PIO_DEVICES); + return; + } + + pio = &pio_dev[pdev->id]; + snprintf(pio->name, sizeof(pio->name), "pio%d", pdev->id); + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) { + dev_err(&pdev->dev, "no mmio resource defined\n"); + return; + } + + pio->clk = clk_get(&pdev->dev, "mck"); + if (IS_ERR(pio->clk)) + /* + * This is a fatal error, but if we continue we might + * be so lucky that we manage to initialize the + * console and display this message... + */ + dev_err(&pdev->dev, "no mck clock defined\n"); + else + clk_enable(pio->clk); + + pio->pdev = pdev; + pio->regs = ioremap(regs->start, regs->end - regs->start + 1); + + pio_writel(pio, ODR, ~0UL); + pio_writel(pio, PER, ~0UL); +} diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h new file mode 100644 index 000000000000..cfea12351599 --- /dev/null +++ b/arch/avr32/mach-at32ap/pio.h @@ -0,0 +1,178 @@ +/* + * Atmel PIO2 Port Multiplexer support + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ARCH_AVR32_AT32AP_PIO_H__ +#define __ARCH_AVR32_AT32AP_PIO_H__ + +/* PIO register offsets */ +#define PIO_PER 0x0000 +#define PIO_PDR 0x0004 +#define PIO_PSR 0x0008 +#define PIO_OER 0x0010 +#define PIO_ODR 0x0014 +#define PIO_OSR 0x0018 +#define PIO_IFER 0x0020 +#define PIO_IFDR 0x0024 +#define PIO_ISFR 0x0028 +#define PIO_SODR 0x0030 +#define PIO_CODR 0x0034 +#define PIO_ODSR 0x0038 +#define PIO_PDSR 0x003c +#define PIO_IER 0x0040 +#define PIO_IDR 0x0044 +#define PIO_IMR 0x0048 +#define PIO_ISR 0x004c +#define PIO_MDER 0x0050 +#define PIO_MDDR 0x0054 +#define PIO_MDSR 0x0058 +#define PIO_PUDR 0x0060 +#define PIO_PUER 0x0064 +#define PIO_PUSR 0x0068 +#define PIO_ASR 0x0070 +#define PIO_BSR 0x0074 +#define PIO_ABSR 0x0078 +#define PIO_OWER 0x00a0 +#define PIO_OWDR 0x00a4 +#define PIO_OWSR 0x00a8 + +/* Bitfields in PER */ + +/* Bitfields in PDR */ + +/* Bitfields in PSR */ + +/* Bitfields in OER */ + +/* Bitfields in ODR */ + +/* Bitfields in OSR */ + +/* Bitfields in IFER */ + +/* Bitfields in IFDR */ + +/* Bitfields in ISFR */ + +/* Bitfields in SODR */ + +/* Bitfields in CODR */ + +/* Bitfields in ODSR */ + +/* Bitfields in PDSR */ + +/* Bitfields in IER */ + +/* Bitfields in IDR */ + +/* Bitfields in IMR */ + +/* Bitfields in ISR */ + +/* Bitfields in MDER */ + +/* Bitfields in MDDR */ + +/* Bitfields in MDSR */ + +/* Bitfields in PUDR */ + +/* Bitfields in PUER */ + +/* Bitfields in PUSR */ + +/* Bitfields in ASR */ + +/* Bitfields in BSR */ + +/* Bitfields in ABSR */ +#define PIO_P0_OFFSET 0 +#define PIO_P0_SIZE 1 +#define PIO_P1_OFFSET 1 +#define PIO_P1_SIZE 1 +#define PIO_P2_OFFSET 2 +#define PIO_P2_SIZE 1 +#define PIO_P3_OFFSET 3 +#define PIO_P3_SIZE 1 +#define PIO_P4_OFFSET 4 +#define PIO_P4_SIZE 1 +#define PIO_P5_OFFSET 5 +#define PIO_P5_SIZE 1 +#define PIO_P6_OFFSET 6 +#define PIO_P6_SIZE 1 +#define PIO_P7_OFFSET 7 +#define PIO_P7_SIZE 1 +#define PIO_P8_OFFSET 8 +#define PIO_P8_SIZE 1 +#define PIO_P9_OFFSET 9 +#define PIO_P9_SIZE 1 +#define PIO_P10_OFFSET 10 +#define PIO_P10_SIZE 1 +#define PIO_P11_OFFSET 11 +#define PIO_P11_SIZE 1 +#define PIO_P12_OFFSET 12 +#define PIO_P12_SIZE 1 +#define PIO_P13_OFFSET 13 +#define PIO_P13_SIZE 1 +#define PIO_P14_OFFSET 14 +#define PIO_P14_SIZE 1 +#define PIO_P15_OFFSET 15 +#define PIO_P15_SIZE 1 +#define PIO_P16_OFFSET 16 +#define PIO_P16_SIZE 1 +#define PIO_P17_OFFSET 17 +#define PIO_P17_SIZE 1 +#define PIO_P18_OFFSET 18 +#define PIO_P18_SIZE 1 +#define PIO_P19_OFFSET 19 +#define PIO_P19_SIZE 1 +#define PIO_P20_OFFSET 20 +#define PIO_P20_SIZE 1 +#define PIO_P21_OFFSET 21 +#define PIO_P21_SIZE 1 +#define PIO_P22_OFFSET 22 +#define PIO_P22_SIZE 1 +#define PIO_P23_OFFSET 23 +#define PIO_P23_SIZE 1 +#define PIO_P24_OFFSET 24 +#define PIO_P24_SIZE 1 +#define PIO_P25_OFFSET 25 +#define PIO_P25_SIZE 1 +#define PIO_P26_OFFSET 26 +#define PIO_P26_SIZE 1 +#define PIO_P27_OFFSET 27 +#define PIO_P27_SIZE 1 +#define PIO_P28_OFFSET 28 +#define PIO_P28_SIZE 1 +#define PIO_P29_OFFSET 29 +#define PIO_P29_SIZE 1 +#define PIO_P30_OFFSET 30 +#define PIO_P30_SIZE 1 +#define PIO_P31_OFFSET 31 +#define PIO_P31_SIZE 1 + +/* Bitfields in OWER */ + +/* Bitfields in OWDR */ + +/* Bitfields in OWSR */ + +/* Bit manipulation macros */ +#define PIO_BIT(name) (1 << PIO_##name##_OFFSET) +#define PIO_BF(name,value) (((value) & ((1 << PIO_##name##_SIZE) - 1)) << PIO_##name##_OFFSET) +#define PIO_BFEXT(name,value) (((value) >> PIO_##name##_OFFSET) & ((1 << PIO_##name##_SIZE) - 1)) +#define PIO_BFINS(name,value,old) (((old) & ~(((1 << PIO_##name##_SIZE) - 1) << PIO_##name##_OFFSET)) | PIO_BF(name,value)) + +/* Register access macros */ +#define pio_readl(port,reg) readl((port)->regs + PIO_##reg) +#define pio_writel(port,reg,value) writel((value), (port)->regs + PIO_##reg) + +void at32_init_pio(struct platform_device *pdev); + +#endif /* __ARCH_AVR32_AT32AP_PIO_H__ */ diff --git a/arch/avr32/mach-at32ap/sm.c b/arch/avr32/mach-at32ap/sm.c new file mode 100644 index 000000000000..03306eb0345e --- /dev/null +++ b/arch/avr32/mach-at32ap/sm.c @@ -0,0 +1,289 @@ +/* + * System Manager driver for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "sm.h" + +#define SM_EIM_IRQ_RESOURCE 1 +#define SM_PM_IRQ_RESOURCE 2 +#define SM_RTC_IRQ_RESOURCE 3 + +#define to_eim(irqc) container_of(irqc, struct at32_sm, irqc) + +struct at32_sm system_manager; + +int __init at32_sm_init(void) +{ + struct resource *regs; + struct at32_sm *sm = &system_manager; + int ret = -ENXIO; + + regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0); + if (!regs) + goto fail; + + spin_lock_init(&sm->lock); + sm->pdev = &at32_sm_device; + + ret = -ENOMEM; + sm->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!sm->regs) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret); + return ret; +} + +/* + * External Interrupt Module (EIM). + * + * EIM gets level- or edge-triggered interrupts of either polarity + * from the outside and converts it to active-high level-triggered + * interrupts that the internal interrupt controller can handle. EIM + * also provides masking/unmasking of interrupts, as well as + * acknowledging of edge-triggered interrupts. + */ + +static irqreturn_t spurious_eim_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + printk(KERN_WARNING "Spurious EIM interrupt %d\n", irq); + disable_irq(irq); + return IRQ_NONE; +} + +static struct irqaction eim_spurious_action = { + .handler = spurious_eim_interrupt, +}; + +static irqreturn_t eim_handle_irq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct irq_controller * irqc = dev_id; + struct at32_sm *sm = to_eim(irqc); + unsigned long pending; + + /* + * No need to disable interrupts globally. The interrupt + * level relevant to this group must be masked all the time, + * so we know that this particular EIM instance will not be + * re-entered. + */ + spin_lock(&sm->lock); + + pending = intc_get_pending(sm->irqc.irq_group); + if (unlikely(!pending)) { + printk(KERN_ERR "EIM (group %u): No interrupts pending!\n", + sm->irqc.irq_group); + goto unlock; + } + + do { + struct irqaction *action; + unsigned int i; + + i = fls(pending) - 1; + pending &= ~(1 << i); + action = sm->action[i]; + + /* Acknowledge the interrupt */ + sm_writel(sm, EIM_ICR, 1 << i); + + spin_unlock(&sm->lock); + + if (action->flags & SA_INTERRUPT) + local_irq_disable(); + action->handler(sm->irqc.first_irq + i, action->dev_id, regs); + local_irq_enable(); + spin_lock(&sm->lock); + if (action->flags & SA_SAMPLE_RANDOM) + add_interrupt_randomness(sm->irqc.first_irq + i); + } while (pending); + +unlock: + spin_unlock(&sm->lock); + return IRQ_HANDLED; +} + +static void eim_mask(struct irq_controller *irqc, unsigned int irq) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned int i; + + i = irq - sm->irqc.first_irq; + sm_writel(sm, EIM_IDR, 1 << i); +} + +static void eim_unmask(struct irq_controller *irqc, unsigned int irq) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned int i; + + i = irq - sm->irqc.first_irq; + sm_writel(sm, EIM_IER, 1 << i); +} + +static int eim_setup(struct irq_controller *irqc, unsigned int irq, + struct irqaction *action) +{ + struct at32_sm *sm = to_eim(irqc); + sm->action[irq - sm->irqc.first_irq] = action; + /* Acknowledge earlier interrupts */ + sm_writel(sm, EIM_ICR, (1<<(irq - sm->irqc.first_irq))); + eim_unmask(irqc, irq); + return 0; +} + +static void eim_free(struct irq_controller *irqc, unsigned int irq, + void *dev) +{ + struct at32_sm *sm = to_eim(irqc); + eim_mask(irqc, irq); + sm->action[irq - sm->irqc.first_irq] = &eim_spurious_action; +} + +static int eim_set_type(struct irq_controller *irqc, unsigned int irq, + unsigned int type) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned long flags; + u32 value, pattern; + + spin_lock_irqsave(&sm->lock, flags); + + pattern = 1 << (irq - sm->irqc.first_irq); + + value = sm_readl(sm, EIM_MODE); + if (type & IRQ_TYPE_LEVEL) + value |= pattern; + else + value &= ~pattern; + sm_writel(sm, EIM_MODE, value); + value = sm_readl(sm, EIM_EDGE); + if (type & IRQ_EDGE_RISING) + value |= pattern; + else + value &= ~pattern; + sm_writel(sm, EIM_EDGE, value); + value = sm_readl(sm, EIM_LEVEL); + if (type & IRQ_LEVEL_HIGH) + value |= pattern; + else + value &= ~pattern; + sm_writel(sm, EIM_LEVEL, value); + + spin_unlock_irqrestore(&sm->lock, flags); + + return 0; +} + +static unsigned int eim_get_type(struct irq_controller *irqc, + unsigned int irq) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned long flags; + unsigned int type = 0; + u32 mode, edge, level, pattern; + + pattern = 1 << (irq - sm->irqc.first_irq); + + spin_lock_irqsave(&sm->lock, flags); + mode = sm_readl(sm, EIM_MODE); + edge = sm_readl(sm, EIM_EDGE); + level = sm_readl(sm, EIM_LEVEL); + spin_unlock_irqrestore(&sm->lock, flags); + + if (mode & pattern) + type |= IRQ_TYPE_LEVEL; + if (edge & pattern) + type |= IRQ_EDGE_RISING; + if (level & pattern) + type |= IRQ_LEVEL_HIGH; + + return type; +} + +static struct irq_controller_class eim_irq_class = { + .typename = "EIM", + .handle = eim_handle_irq, + .setup = eim_setup, + .free = eim_free, + .mask = eim_mask, + .unmask = eim_unmask, + .set_type = eim_set_type, + .get_type = eim_get_type, +}; + +static int __init eim_init(void) +{ + struct at32_sm *sm = &system_manager; + unsigned int i; + u32 pattern; + int ret; + + /* + * The EIM is really the same module as SM, so register + * mapping, etc. has been taken care of already. + */ + + /* + * Find out how many interrupt lines that are actually + * implemented in hardware. + */ + sm_writel(sm, EIM_IDR, ~0UL); + sm_writel(sm, EIM_MODE, ~0UL); + pattern = sm_readl(sm, EIM_MODE); + sm->irqc.nr_irqs = fls(pattern); + + ret = -ENOMEM; + sm->action = kmalloc(sizeof(*sm->action) * sm->irqc.nr_irqs, + GFP_KERNEL); + if (!sm->action) + goto out; + + for (i = 0; i < sm->irqc.nr_irqs; i++) + sm->action[i] = &eim_spurious_action; + + spin_lock_init(&sm->lock); + sm->irqc.irq_group = sm->pdev->resource[SM_EIM_IRQ_RESOURCE].start; + sm->irqc.class = &eim_irq_class; + + ret = intc_register_controller(&sm->irqc); + if (ret < 0) + goto out_free_actions; + + printk("EIM: External Interrupt Module at 0x%p, IRQ group %u\n", + sm->regs, sm->irqc.irq_group); + printk("EIM: Handling %u external IRQs, starting with IRQ%u\n", + sm->irqc.nr_irqs, sm->irqc.first_irq); + + return 0; + +out_free_actions: + kfree(sm->action); +out: + return ret; +} +arch_initcall(eim_init); diff --git a/arch/avr32/mach-at32ap/sm.h b/arch/avr32/mach-at32ap/sm.h new file mode 100644 index 000000000000..27565822ae2a --- /dev/null +++ b/arch/avr32/mach-at32ap/sm.h @@ -0,0 +1,240 @@ +/* + * Register definitions for SM + * + * System Manager + */ +#ifndef __ASM_AVR32_SM_H__ +#define __ASM_AVR32_SM_H__ + +/* SM register offsets */ +#define SM_PM_MCCTRL 0x0000 +#define SM_PM_CKSEL 0x0004 +#define SM_PM_CPU_MASK 0x0008 +#define SM_PM_HSB_MASK 0x000c +#define SM_PM_PBA_MASK 0x0010 +#define SM_PM_PBB_MASK 0x0014 +#define SM_PM_PLL0 0x0020 +#define SM_PM_PLL1 0x0024 +#define SM_PM_VCTRL 0x0030 +#define SM_PM_VMREF 0x0034 +#define SM_PM_VMV 0x0038 +#define SM_PM_IER 0x0040 +#define SM_PM_IDR 0x0044 +#define SM_PM_IMR 0x0048 +#define SM_PM_ISR 0x004c +#define SM_PM_ICR 0x0050 +#define SM_PM_GCCTRL 0x0060 +#define SM_RTC_CTRL 0x0080 +#define SM_RTC_VAL 0x0084 +#define SM_RTC_TOP 0x0088 +#define SM_RTC_IER 0x0090 +#define SM_RTC_IDR 0x0094 +#define SM_RTC_IMR 0x0098 +#define SM_RTC_ISR 0x009c +#define SM_RTC_ICR 0x00a0 +#define SM_WDT_CTRL 0x00b0 +#define SM_WDT_CLR 0x00b4 +#define SM_WDT_EXT 0x00b8 +#define SM_RC_RCAUSE 0x00c0 +#define SM_EIM_IER 0x0100 +#define SM_EIM_IDR 0x0104 +#define SM_EIM_IMR 0x0108 +#define SM_EIM_ISR 0x010c +#define SM_EIM_ICR 0x0110 +#define SM_EIM_MODE 0x0114 +#define SM_EIM_EDGE 0x0118 +#define SM_EIM_LEVEL 0x011c +#define SM_EIM_TEST 0x0120 +#define SM_EIM_NMIC 0x0124 + +/* Bitfields in PM_MCCTRL */ + +/* Bitfields in PM_CKSEL */ +#define SM_CPUSEL_OFFSET 0 +#define SM_CPUSEL_SIZE 3 +#define SM_CPUDIV_OFFSET 7 +#define SM_CPUDIV_SIZE 1 +#define SM_HSBSEL_OFFSET 8 +#define SM_HSBSEL_SIZE 3 +#define SM_HSBDIV_OFFSET 15 +#define SM_HSBDIV_SIZE 1 +#define SM_PBASEL_OFFSET 16 +#define SM_PBASEL_SIZE 3 +#define SM_PBADIV_OFFSET 23 +#define SM_PBADIV_SIZE 1 +#define SM_PBBSEL_OFFSET 24 +#define SM_PBBSEL_SIZE 3 +#define SM_PBBDIV_OFFSET 31 +#define SM_PBBDIV_SIZE 1 + +/* Bitfields in PM_CPU_MASK */ + +/* Bitfields in PM_HSB_MASK */ + +/* Bitfields in PM_PBA_MASK */ + +/* Bitfields in PM_PBB_MASK */ + +/* Bitfields in PM_PLL0 */ +#define SM_PLLEN_OFFSET 0 +#define SM_PLLEN_SIZE 1 +#define SM_PLLOSC_OFFSET 1 +#define SM_PLLOSC_SIZE 1 +#define SM_PLLOPT_OFFSET 2 +#define SM_PLLOPT_SIZE 3 +#define SM_PLLDIV_OFFSET 8 +#define SM_PLLDIV_SIZE 8 +#define SM_PLLMUL_OFFSET 16 +#define SM_PLLMUL_SIZE 8 +#define SM_PLLCOUNT_OFFSET 24 +#define SM_PLLCOUNT_SIZE 6 +#define SM_PLLTEST_OFFSET 31 +#define SM_PLLTEST_SIZE 1 + +/* Bitfields in PM_PLL1 */ + +/* Bitfields in PM_VCTRL */ +#define SM_VAUTO_OFFSET 0 +#define SM_VAUTO_SIZE 1 +#define SM_PM_VCTRL_VAL_OFFSET 8 +#define SM_PM_VCTRL_VAL_SIZE 7 + +/* Bitfields in PM_VMREF */ +#define SM_REFSEL_OFFSET 0 +#define SM_REFSEL_SIZE 4 + +/* Bitfields in PM_VMV */ +#define SM_PM_VMV_VAL_OFFSET 0 +#define SM_PM_VMV_VAL_SIZE 8 + +/* Bitfields in PM_IER */ + +/* Bitfields in PM_IDR */ + +/* Bitfields in PM_IMR */ + +/* Bitfields in PM_ISR */ + +/* Bitfields in PM_ICR */ +#define SM_LOCK0_OFFSET 0 +#define SM_LOCK0_SIZE 1 +#define SM_LOCK1_OFFSET 1 +#define SM_LOCK1_SIZE 1 +#define SM_WAKE_OFFSET 2 +#define SM_WAKE_SIZE 1 +#define SM_VOK_OFFSET 3 +#define SM_VOK_SIZE 1 +#define SM_VMRDY_OFFSET 4 +#define SM_VMRDY_SIZE 1 +#define SM_CKRDY_OFFSET 5 +#define SM_CKRDY_SIZE 1 + +/* Bitfields in PM_GCCTRL */ +#define SM_OSCSEL_OFFSET 0 +#define SM_OSCSEL_SIZE 1 +#define SM_PLLSEL_OFFSET 1 +#define SM_PLLSEL_SIZE 1 +#define SM_CEN_OFFSET 2 +#define SM_CEN_SIZE 1 +#define SM_CPC_OFFSET 3 +#define SM_CPC_SIZE 1 +#define SM_DIVEN_OFFSET 4 +#define SM_DIVEN_SIZE 1 +#define SM_DIV_OFFSET 8 +#define SM_DIV_SIZE 8 + +/* Bitfields in RTC_CTRL */ +#define SM_PCLR_OFFSET 1 +#define SM_PCLR_SIZE 1 +#define SM_TOPEN_OFFSET 2 +#define SM_TOPEN_SIZE 1 +#define SM_CLKEN_OFFSET 3 +#define SM_CLKEN_SIZE 1 +#define SM_PSEL_OFFSET 8 +#define SM_PSEL_SIZE 16 + +/* Bitfields in RTC_VAL */ +#define SM_RTC_VAL_VAL_OFFSET 0 +#define SM_RTC_VAL_VAL_SIZE 31 + +/* Bitfields in RTC_TOP */ +#define SM_RTC_TOP_VAL_OFFSET 0 +#define SM_RTC_TOP_VAL_SIZE 32 + +/* Bitfields in RTC_IER */ + +/* Bitfields in RTC_IDR */ + +/* Bitfields in RTC_IMR */ + +/* Bitfields in RTC_ISR */ + +/* Bitfields in RTC_ICR */ +#define SM_TOPI_OFFSET 0 +#define SM_TOPI_SIZE 1 + +/* Bitfields in WDT_CTRL */ +#define SM_KEY_OFFSET 24 +#define SM_KEY_SIZE 8 + +/* Bitfields in WDT_CLR */ + +/* Bitfields in WDT_EXT */ + +/* Bitfields in RC_RCAUSE */ +#define SM_POR_OFFSET 0 +#define SM_POR_SIZE 1 +#define SM_BOD_OFFSET 1 +#define SM_BOD_SIZE 1 +#define SM_EXT_OFFSET 2 +#define SM_EXT_SIZE 1 +#define SM_WDT_OFFSET 3 +#define SM_WDT_SIZE 1 +#define SM_NTAE_OFFSET 4 +#define SM_NTAE_SIZE 1 +#define SM_SERP_OFFSET 5 +#define SM_SERP_SIZE 1 + +/* Bitfields in EIM_IER */ + +/* Bitfields in EIM_IDR */ + +/* Bitfields in EIM_IMR */ + +/* Bitfields in EIM_ISR */ + +/* Bitfields in EIM_ICR */ + +/* Bitfields in EIM_MODE */ + +/* Bitfields in EIM_EDGE */ +#define SM_INT0_OFFSET 0 +#define SM_INT0_SIZE 1 +#define SM_INT1_OFFSET 1 +#define SM_INT1_SIZE 1 +#define SM_INT2_OFFSET 2 +#define SM_INT2_SIZE 1 +#define SM_INT3_OFFSET 3 +#define SM_INT3_SIZE 1 + +/* Bitfields in EIM_LEVEL */ + +/* Bitfields in EIM_TEST */ +#define SM_TESTEN_OFFSET 31 +#define SM_TESTEN_SIZE 1 + +/* Bitfields in EIM_NMIC */ +#define SM_EN_OFFSET 0 +#define SM_EN_SIZE 1 + +/* Bit manipulation macros */ +#define SM_BIT(name) (1 << SM_##name##_OFFSET) +#define SM_BF(name,value) (((value) & ((1 << SM_##name##_SIZE) - 1)) << SM_##name##_OFFSET) +#define SM_BFEXT(name,value) (((value) >> SM_##name##_OFFSET) & ((1 << SM_##name##_SIZE) - 1)) +#define SM_BFINS(name,value,old) (((old) & ~(((1 << SM_##name##_SIZE) - 1) << SM_##name##_OFFSET)) | SM_BF(name,value)) + +/* Register access macros */ +#define sm_readl(port,reg) readl((port)->regs + SM_##reg) +#define sm_writel(port,reg,value) writel((value), (port)->regs + SM_##reg) + +#endif /* __ASM_AVR32_SM_H__ */ diff --git a/arch/avr32/mm/Makefile b/arch/avr32/mm/Makefile new file mode 100644 index 000000000000..0066491f90d4 --- /dev/null +++ b/arch/avr32/mm/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the Linux/AVR32 kernel. +# + +obj-y += init.o clear_page.o copy_page.o dma-coherent.o +obj-y += ioremap.o cache.o fault.o tlb.o diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c new file mode 100644 index 000000000000..450515b245a0 --- /dev/null +++ b/arch/avr32/mm/cache.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include +#include + +/* + * If you attempt to flush anything more than this, you need superuser + * privileges. The value is completely arbitrary. + */ +#define CACHEFLUSH_MAX_LEN 1024 + +void invalidate_dcache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.dcache.linesz; + + //printk("invalidate dcache: %p + %u\n", start, size); + + /* You asked for it, you got it */ + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + invalidate_dcache_line((void *)v); +} + +void clean_dcache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.dcache.linesz; + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + clean_dcache_line((void *)v); + flush_write_buffer(); +} + +void flush_dcache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.dcache.linesz; + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + flush_dcache_line((void *)v); + flush_write_buffer(); +} + +void invalidate_icache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.icache.linesz; + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + invalidate_icache_line((void *)v); +} + +static inline void __flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long v, linesz; + + linesz = boot_cpu_data.dcache.linesz; + for (v = start; v < end; v += linesz) { + clean_dcache_line((void *)v); + invalidate_icache_line((void *)v); + } + + flush_write_buffer(); +} + +/* + * This one is called after a module has been loaded. + */ +void flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long linesz; + + linesz = boot_cpu_data.dcache.linesz; + __flush_icache_range(start & ~(linesz - 1), + (end + linesz - 1) & ~(linesz - 1)); +} + +/* + * This one is called from do_no_page(), do_swap_page() and install_page(). + */ +void flush_icache_page(struct vm_area_struct *vma, struct page *page) +{ + if (vma->vm_flags & VM_EXEC) { + void *v = kmap(page); + __flush_icache_range((unsigned long)v, (unsigned long)v + PAGE_SIZE); + kunmap(v); + } +} + +/* + * This one is used by copy_to_user_page() + */ +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + if (vma->vm_flags & VM_EXEC) + flush_icache_range(addr, addr + len); +} + +asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len) +{ + int ret; + + if (len > CACHEFLUSH_MAX_LEN) { + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + } + + ret = -EFAULT; + if (!access_ok(VERIFY_WRITE, addr, len)) + goto out; + + switch (operation) { + case CACHE_IFLUSH: + flush_icache_range((unsigned long)addr, + (unsigned long)addr + len); + ret = 0; + break; + default: + ret = -EINVAL; + } + +out: + return ret; +} diff --git a/arch/avr32/mm/clear_page.S b/arch/avr32/mm/clear_page.S new file mode 100644 index 000000000000..5d70dca00699 --- /dev/null +++ b/arch/avr32/mm/clear_page.S @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +/* + * clear_page + * r12: P1 address (to) + */ + .text + .global clear_page +clear_page: + sub r9, r12, -PAGE_SIZE + mov r10, 0 + mov r11, 0 +0: st.d r12++, r10 + cp r12, r9 + brne 0b + mov pc, lr diff --git a/arch/avr32/mm/copy_page.S b/arch/avr32/mm/copy_page.S new file mode 100644 index 000000000000..c2b3752946b8 --- /dev/null +++ b/arch/avr32/mm/copy_page.S @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +/* + * copy_page + * + * r12 to (P1 address) + * r11 from (P1 address) + * r8-r10 scratch + */ + .text + .global copy_page +copy_page: + sub r10, r11, -(1 << PAGE_SHIFT) + /* pref r11[0] */ +1: /* pref r11[8] */ + ld.d r8, r11++ + st.d r12++, r8 + cp r11, r10 + brlo 1b + mov pc, lr diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c new file mode 100644 index 000000000000..44ab8a7bdae2 --- /dev/null +++ b/arch/avr32/mm/dma-coherent.c @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#include +#include + +void dma_cache_sync(void *vaddr, size_t size, int direction) +{ + /* + * No need to sync an uncached area + */ + if (PXSEG(vaddr) == P2SEG) + return; + + switch (direction) { + case DMA_FROM_DEVICE: /* invalidate only */ + dma_cache_inv(vaddr, size); + break; + case DMA_TO_DEVICE: /* writeback only */ + dma_cache_wback(vaddr, size); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + dma_cache_wback_inv(vaddr, size); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL(dma_cache_sync); + +static struct page *__dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page, *free, *end; + int order; + + size = PAGE_ALIGN(size); + order = get_order(size); + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + split_page(page, order); + + /* + * When accessing physical memory with valid cache data, we + * get a cache hit even if the virtual memory region is marked + * as uncached. + * + * Since the memory is newly allocated, there is no point in + * doing a writeback. If the previous owner cares, he should + * have flushed the cache before releasing the memory. + */ + invalidate_dcache_region(phys_to_virt(page_to_phys(page)), size); + + *handle = page_to_bus(page); + free = page + (size >> PAGE_SHIFT); + end = page + (1 << order); + + /* + * Free any unused pages + */ + while (free < end) { + __free_page(free); + free++; + } + + return page; +} + +static void __dma_free(struct device *dev, size_t size, + struct page *page, dma_addr_t handle) +{ + struct page *end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT); + + while (page < end) + __free_page(page++); +} + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + void *ret = NULL; + + page = __dma_alloc(dev, size, handle, gfp); + if (page) + ret = phys_to_uncached(page_to_phys(page)); + + return ret; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle) +{ + void *addr = phys_to_cached(uncached_to_phys(cpu_addr)); + struct page *page; + + pr_debug("dma_free_coherent addr %p (phys %08lx) size %u\n", + cpu_addr, (unsigned long)handle, (unsigned)size); + BUG_ON(!virt_addr_valid(addr)); + page = virt_to_page(addr); + __dma_free(dev, size, page, handle); +} +EXPORT_SYMBOL(dma_free_coherent); + +#if 0 +void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + + page = __dma_alloc(dev, size, handle, gfp); + + /* Now, map the page into P3 with write-combining turned on */ + return __ioremap(page_to_phys(page), size, _PAGE_BUFFER); +} +EXPORT_SYMBOL(dma_alloc_writecombine); + +void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle) +{ + struct page *page; + + iounmap(cpu_addr); + + page = bus_to_page(handle); + __dma_free(dev, size, page, handle); +} +EXPORT_SYMBOL(dma_free_writecombine); +#endif diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c new file mode 100644 index 000000000000..678557260a35 --- /dev/null +++ b/arch/avr32/mm/fault.c @@ -0,0 +1,315 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/sh/mm/fault.c: + * Copyright (C) 1999 Niibe Yutaka + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef DEBUG +static void dump_code(unsigned long pc) +{ + char *p = (char *)pc; + char val; + int i; + + + printk(KERN_DEBUG "Code:"); + for (i = 0; i < 16; i++) { + if (__get_user(val, p + i)) + break; + printk(" %02x", val); + } + printk("\n"); +} +#endif + +#ifdef CONFIG_KPROBES +ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + +/* Hook to register for page fault notifications */ +int register_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); +} + +int unregister_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); +} + +static inline int notify_page_fault(enum die_val val, struct pt_regs *regs, + int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .trapnr = trap, + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +} +#else +static inline int notify_page_fault(enum die_val val, struct pt_regs *regs, + int trap, int sig) +{ + return NOTIFY_DONE; +} +#endif + +/* + * This routine handles page faults. It determines the address and the + * problem, and then passes it off to one of the appropriate routines. + * + * ecr is the Exception Cause Register. Possible values are: + * 5: Page not found (instruction access) + * 6: Protection fault (instruction access) + * 12: Page not found (read access) + * 13: Page not found (write access) + * 14: Protection fault (read access) + * 15: Protection fault (write access) + */ +asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; + const struct exception_table_entry *fixup; + unsigned long address; + unsigned long page; + int writeaccess = 0; + + if (notify_page_fault(DIE_PAGE_FAULT, regs, + ecr, SIGSEGV) == NOTIFY_STOP) + return; + + address = sysreg_read(TLBEAR); + + tsk = current; + mm = tsk->mm; + + /* + * If we're in an interrupt or have no user context, we must + * not take the fault... + */ + if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) + goto no_context; + + local_irq_enable(); + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + + /* + * Ok, we have a good vm_area for this memory access, so we + * can handle it... + */ +good_area: + //pr_debug("good area: vm_flags = 0x%lx\n", vma->vm_flags); + switch (ecr) { + case ECR_PROTECTION_X: + case ECR_TLB_MISS_X: + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; + break; + case ECR_PROTECTION_R: + case ECR_TLB_MISS_R: + if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + goto bad_area; + break; + case ECR_PROTECTION_W: + case ECR_TLB_MISS_W: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + writeaccess = 1; + break; + default: + panic("Unhandled case %lu in do_page_fault!", ecr); + } + + /* + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the + * fault. + */ +survive: + switch (handle_mm_fault(mm, vma, address, writeaccess)) { + case VM_FAULT_MINOR: + tsk->min_flt++; + break; + case VM_FAULT_MAJOR: + tsk->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + return; + + /* + * Something tried to access memory that isn't in our memory + * map. Fix it, but check if it's kernel or user first... + */ +bad_area: + pr_debug("Bad area [%s:%u]: addr %08lx, ecr %lu\n", + tsk->comm, tsk->pid, address, ecr); + + up_read(&mm->mmap_sem); + + if (user_mode(regs)) { + /* Hmm...we have to pass address and ecr somehow... */ + /* tsk->thread.address = address; + tsk->thread.error_code = ecr; */ +#ifdef DEBUG + show_regs(regs); + dump_code(regs->pc); + + page = sysreg_read(PTBR); + printk("ptbr = %08lx", page); + if (page) { + page = ((unsigned long *)page)[address >> 22]; + printk(" pgd = %08lx", page); + if (page & _PAGE_PRESENT) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT]; + printk(" pte = %08lx\n", page); + } + } +#endif + pr_debug("Sending SIGSEGV to PID %d...\n", + tsk->pid); + force_sig(SIGSEGV, tsk); + return; + } + +no_context: + pr_debug("No context\n"); + + /* Are we prepared to handle this kernel fault? */ + fixup = search_exception_tables(regs->pc); + if (fixup) { + regs->pc = fixup->fixup; + pr_debug("Found fixup at %08lx\n", fixup->fixup); + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have + * to terminate things with extreme prejudice. + */ + if (address < PAGE_SIZE) + printk(KERN_ALERT + "Unable to handle kernel NULL pointer dereference"); + else + printk(KERN_ALERT + "Unable to handle kernel paging request"); + printk(" at virtual address %08lx\n", address); + printk(KERN_ALERT "pc = %08lx\n", regs->pc); + + page = sysreg_read(PTBR); + printk(KERN_ALERT "ptbr = %08lx", page); + if (page) { + page = ((unsigned long *)page)[address >> 22]; + printk(" pgd = %08lx", page); + if (page & _PAGE_PRESENT) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT]; + printk(" pte = %08lx\n", page); + } + } + die("\nOops", regs, ecr); + do_exit(SIGKILL); + + /* + * We ran out of memory, or some other thing happened to us + * that made us unable to handle the page fault gracefully. + */ +out_of_memory: + printk("Out of memory\n"); + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: Killing process %s\n", tsk->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel or + * user mode. + */ + /* address, error_code, trap_no, ... */ +#ifdef DEBUG + show_regs(regs); + dump_code(regs->pc); +#endif + pr_debug("Sending SIGBUS to PID %d...\n", tsk->pid); + force_sig(SIGBUS, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + goto no_context; +} + +asmlinkage void do_bus_error(unsigned long addr, int write_access, + struct pt_regs *regs) +{ + printk(KERN_ALERT + "Bus error at physical address 0x%08lx (%s access)\n", + addr, write_access ? "write" : "read"); + printk(KERN_INFO "DTLB dump:\n"); + dump_dtlb(); + die("Bus Error", regs, write_access); + do_exit(SIGKILL); +} + +/* + * This functionality is currently not possible to implement because + * we're using segmentation to ensure a fixed mapping of the kernel + * virtual address space. + * + * It would be possible to implement this, but it would require us to + * disable segmentation at startup and load the kernel mappings into + * the TLB like any other pages. There will be lots of trickery to + * avoid recursive invocation of the TLB miss handler, though... + */ +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + +} +EXPORT_SYMBOL(kernel_map_pages); +#endif diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c new file mode 100644 index 000000000000..3e6c41039808 --- /dev/null +++ b/arch/avr32/mm/init.c @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +struct page *empty_zero_page; + +/* + * Cache of MMU context last used. + */ +unsigned long mmu_context_cache = NO_CONTEXT; + +#define START_PFN (NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT) +#define MAX_LOW_PFN (NODE_DATA(0)->bdata->node_low_pfn) + +void show_mem(void) +{ + int total = 0, reserved = 0, cached = 0; + int slab = 0, free = 0, shared = 0; + pg_data_t *pgdat; + + printk("Mem-info:\n"); + show_free_areas(); + + for_each_online_pgdat(pgdat) { + struct page *page, *end; + + page = pgdat->node_mem_map; + end = page + pgdat->node_spanned_pages; + + do { + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (PageSlab(page)) + slab++; + else if (!page_count(page)) + free++; + else + shared += page_count(page) - 1; + page++; + } while (page < end); + } + + printk ("%d pages of RAM\n", total); + printk ("%d free pages\n", free); + printk ("%d reserved pages\n", reserved); + printk ("%d slab pages\n", slab); + printk ("%d pages shared\n", shared); + printk ("%d pages swap cached\n", cached); +} + +static void __init print_memory_map(const char *what, + struct tag_mem_range *mem) +{ + printk ("%s:\n", what); + for (; mem; mem = mem->next) { + printk (" %08lx - %08lx\n", + (unsigned long)mem->addr, + (unsigned long)(mem->addr + mem->size)); + } +} + +#define MAX_LOWMEM HIGHMEM_START +#define MAX_LOWMEM_PFN PFN_DOWN(MAX_LOWMEM) + +/* + * Sort a list of memory regions in-place by ascending address. + * + * We're using bubble sort because we only have singly linked lists + * with few elements. + */ +static void __init sort_mem_list(struct tag_mem_range **pmem) +{ + int done; + struct tag_mem_range **a, **b; + + if (!*pmem) + return; + + do { + done = 1; + a = pmem, b = &(*pmem)->next; + while (*b) { + if ((*a)->addr > (*b)->addr) { + struct tag_mem_range *tmp; + tmp = (*b)->next; + (*b)->next = *a; + *a = *b; + *b = tmp; + done = 0; + } + a = &(*a)->next; + b = &(*a)->next; + } + } while (!done); +} + +/* + * Find a free memory region large enough for storing the + * bootmem bitmap. + */ +static unsigned long __init +find_bootmap_pfn(const struct tag_mem_range *mem) +{ + unsigned long bootmap_pages, bootmap_len; + unsigned long node_pages = PFN_UP(mem->size); + unsigned long bootmap_addr = mem->addr; + struct tag_mem_range *reserved = mem_reserved; + struct tag_mem_range *ramdisk = mem_ramdisk; + unsigned long kern_start = virt_to_phys(_stext); + unsigned long kern_end = virt_to_phys(_end); + + bootmap_pages = bootmem_bootmap_pages(node_pages); + bootmap_len = bootmap_pages << PAGE_SHIFT; + + /* + * Find a large enough region without reserved pages for + * storing the bootmem bitmap. We can take advantage of the + * fact that all lists have been sorted. + * + * We have to check explicitly reserved regions as well as the + * kernel image and any RAMDISK images... + * + * Oh, and we have to make sure we don't overwrite the taglist + * since we're going to use it until the bootmem allocator is + * fully up and running. + */ + while (1) { + if ((bootmap_addr < kern_end) && + ((bootmap_addr + bootmap_len) > kern_start)) + bootmap_addr = kern_end; + + while (reserved && + (bootmap_addr >= (reserved->addr + reserved->size))) + reserved = reserved->next; + + if (reserved && + ((bootmap_addr + bootmap_len) >= reserved->addr)) { + bootmap_addr = reserved->addr + reserved->size; + continue; + } + + while (ramdisk && + (bootmap_addr >= (ramdisk->addr + ramdisk->size))) + ramdisk = ramdisk->next; + + if (!ramdisk || + ((bootmap_addr + bootmap_len) < ramdisk->addr)) + break; + + bootmap_addr = ramdisk->addr + ramdisk->size; + } + + if ((PFN_UP(bootmap_addr) + bootmap_len) >= (mem->addr + mem->size)) + return ~0UL; + + return PFN_UP(bootmap_addr); +} + +void __init setup_bootmem(void) +{ + unsigned bootmap_size; + unsigned long first_pfn, bootmap_pfn, pages; + unsigned long max_pfn, max_low_pfn; + unsigned long kern_start = virt_to_phys(_stext); + unsigned long kern_end = virt_to_phys(_end); + unsigned node = 0; + struct tag_mem_range *bank, *res; + + sort_mem_list(&mem_phys); + sort_mem_list(&mem_reserved); + + print_memory_map("Physical memory", mem_phys); + print_memory_map("Reserved memory", mem_reserved); + + nodes_clear(node_online_map); + + if (mem_ramdisk) { +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = __va(mem_ramdisk->addr); + initrd_end = initrd_start + mem_ramdisk->size; + + print_memory_map("RAMDISK images", mem_ramdisk); + if (mem_ramdisk->next) + printk(KERN_WARNING + "Warning: Only the first RAMDISK image " + "will be used\n"); + sort_mem_list(&mem_ramdisk); +#else + printk(KERN_WARNING "RAM disk image present, but " + "no initrd support in kernel!\n"); +#endif + } + + if (mem_phys->next) + printk(KERN_WARNING "Only using first memory bank\n"); + + for (bank = mem_phys; bank; bank = NULL) { + first_pfn = PFN_UP(bank->addr); + max_low_pfn = max_pfn = PFN_DOWN(bank->addr + bank->size); + bootmap_pfn = find_bootmap_pfn(bank); + if (bootmap_pfn > max_pfn) + panic("No space for bootmem bitmap!\n"); + + if (max_low_pfn > MAX_LOWMEM_PFN) { + max_low_pfn = MAX_LOWMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* + * Lowmem is memory that can be addressed + * directly through P1/P2 + */ + printk(KERN_WARNING + "Node %u: Only %ld MiB of memory will be used.\n", + node, MAX_LOWMEM >> 20); + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +#else +#error HIGHMEM is not supported by AVR32 yet +#endif + } + + /* Initialize the boot-time allocator with low memory only. */ + bootmap_size = init_bootmem_node(NODE_DATA(node), bootmap_pfn, + first_pfn, max_low_pfn); + + printk("Node %u: bdata = %p, bdata->node_bootmem_map = %p\n", + node, NODE_DATA(node)->bdata, + NODE_DATA(node)->bdata->node_bootmem_map); + + /* + * Register fully available RAM pages with the bootmem + * allocator. + */ + pages = max_low_pfn - first_pfn; + free_bootmem_node (NODE_DATA(node), PFN_PHYS(first_pfn), + PFN_PHYS(pages)); + + /* + * Reserve space for the kernel image (if present in + * this node)... + */ + if ((kern_start >= PFN_PHYS(first_pfn)) && + (kern_start < PFN_PHYS(max_pfn))) { + printk("Node %u: Kernel image %08lx - %08lx\n", + node, kern_start, kern_end); + reserve_bootmem_node(NODE_DATA(node), kern_start, + kern_end - kern_start); + } + + /* ...the bootmem bitmap... */ + reserve_bootmem_node(NODE_DATA(node), + PFN_PHYS(bootmap_pfn), + bootmap_size); + + /* ...any RAMDISK images... */ + for (res = mem_ramdisk; res; res = res->next) { + if (res->addr > PFN_PHYS(max_pfn)) + break; + + if (res->addr >= PFN_PHYS(first_pfn)) { + printk("Node %u: RAMDISK %08lx - %08lx\n", + node, + (unsigned long)res->addr, + (unsigned long)(res->addr + res->size)); + reserve_bootmem_node(NODE_DATA(node), + res->addr, res->size); + } + } + + /* ...and any other reserved regions. */ + for (res = mem_reserved; res; res = res->next) { + if (res->addr > PFN_PHYS(max_pfn)) + break; + + if (res->addr >= PFN_PHYS(first_pfn)) { + printk("Node %u: Reserved %08lx - %08lx\n", + node, + (unsigned long)res->addr, + (unsigned long)(res->addr + res->size)); + reserve_bootmem_node(NODE_DATA(node), + res->addr, res->size); + } + } + + node_set_online(node); + } +} + +/* + * paging_init() sets up the page tables + * + * This routine also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + extern unsigned long _evba; + void *zero_page; + int nid; + + /* + * Make sure we can handle exceptions before enabling + * paging. Not that we should ever _get_ any exceptions this + * early, but you never know... + */ + printk("Exception vectors start at %p\n", &_evba); + sysreg_write(EVBA, (unsigned long)&_evba); + + /* + * Since we are ready to handle exceptions now, we should let + * the CPU generate them... + */ + __asm__ __volatile__ ("csrf %0" : : "i"(SR_EM_BIT)); + + /* + * Allocate the zero page. The allocator will panic if it + * can't satisfy the request, so no need to check. + */ + zero_page = alloc_bootmem_low_pages_node(NODE_DATA(0), + PAGE_SIZE); + + { + pgd_t *pg_dir; + int i; + + pg_dir = swapper_pg_dir; + sysreg_write(PTBR, (unsigned long)pg_dir); + + for (i = 0; i < PTRS_PER_PGD; i++) + pgd_val(pg_dir[i]) = 0; + + enable_mmu(); + printk ("CPU: Paging enabled\n"); + } + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long low, start_pfn; + + start_pfn = pgdat->bdata->node_boot_start; + start_pfn >>= PAGE_SHIFT; + low = pgdat->bdata->node_low_pfn; + + memset(zones_size, 0, sizeof(zones_size)); + zones_size[ZONE_NORMAL] = low - start_pfn; + + printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n", + nid, start_pfn, low); + + free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL); + + printk("Node %u: mem_map starts at %p\n", + pgdat->node_id, pgdat->node_mem_map); + } + + mem_map = NODE_DATA(0)->node_mem_map; + + memset(zero_page, 0, PAGE_SIZE); + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); +} + +void __init mem_init(void) +{ + int codesize, reservedpages, datasize, initsize; + int nid, i; + + reservedpages = 0; + high_memory = NULL; + + /* this will put all low memory onto the freelists */ + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + unsigned long node_pages = 0; + void *node_high_memory; + + num_physpages += pgdat->node_present_pages; + + if (pgdat->node_spanned_pages != 0) + node_pages = free_all_bootmem_node(pgdat); + + totalram_pages += node_pages; + + for (i = 0; i < node_pages; i++) + if (PageReserved(pgdat->node_mem_map + i)) + reservedpages++; + + node_high_memory = (void *)((pgdat->node_start_pfn + + pgdat->node_spanned_pages) + << PAGE_SHIFT); + if (node_high_memory > high_memory) + high_memory = node_high_memory; + } + + max_mapnr = MAP_NR(high_memory); + + codesize = (unsigned long)_etext - (unsigned long)_text; + datasize = (unsigned long)_edata - (unsigned long)_data; + initsize = (unsigned long)__init_end - (unsigned long)__init_begin; + + printk ("Memory: %luk/%luk available (%dk kernel code, " + "%dk reserved, %dk data, %dk init)\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), + totalram_pages << (PAGE_SHIFT - 10), + codesize >> 10, + reservedpages << (PAGE_SHIFT - 10), + datasize >> 10, + initsize >> 10); +} + +static inline void free_area(unsigned long addr, unsigned long end, char *s) +{ + unsigned int size = (end - addr) >> 10; + + for (; addr < end; addr += PAGE_SIZE) { + struct page *page = virt_to_page(addr); + ClearPageReserved(page); + init_page_count(page); + free_page(addr); + totalram_pages++; + } + + if (size && s) + printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n", + s, size, end - (size << 10), end); +} + +void free_initmem(void) +{ + free_area((unsigned long)__init_begin, (unsigned long)__init_end, + "init"); +} + +#ifdef CONFIG_BLK_DEV_INITRD + +static int keep_initrd; + +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (!keep_initrd) + free_area(start, end, "initrd"); +} + +static int __init keepinitrd_setup(char *__unused) +{ + keep_initrd = 1; + return 1; +} + +__setup("keepinitrd", keepinitrd_setup); +#endif diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c new file mode 100644 index 000000000000..536021877df6 --- /dev/null +++ b/arch/avr32/mm/ioremap.c @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +#include +#include +#include +#include +#include + +static inline int remap_area_pte(pte_t *pte, unsigned long address, + unsigned long end, unsigned long phys_addr, + pgprot_t prot) +{ + unsigned long pfn; + + pfn = phys_addr >> PAGE_SHIFT; + do { + WARN_ON(!pte_none(*pte)); + + set_pte(pte, pfn_pte(pfn, prot)); + address += PAGE_SIZE; + pfn++; + pte++; + } while (address && (address < end)); + + return 0; +} + +static inline int remap_area_pmd(pmd_t *pmd, unsigned long address, + unsigned long end, unsigned long phys_addr, + pgprot_t prot) +{ + unsigned long next; + + phys_addr -= address; + + do { + pte_t *pte = pte_alloc_kernel(pmd, address); + if (!pte) + return -ENOMEM; + + next = (address + PMD_SIZE) & PMD_MASK; + if (remap_area_pte(pte, address, next, + address + phys_addr, prot)) + return -ENOMEM; + + address = next; + pmd++; + } while (address && (address < end)); + return 0; +} + +static int remap_area_pud(pud_t *pud, unsigned long address, + unsigned long end, unsigned long phys_addr, + pgprot_t prot) +{ + unsigned long next; + + phys_addr -= address; + + do { + pmd_t *pmd = pmd_alloc(&init_mm, pud, address); + if (!pmd) + return -ENOMEM; + next = (address + PUD_SIZE) & PUD_MASK; + if (remap_area_pmd(pmd, address, next, + phys_addr + address, prot)) + return -ENOMEM; + + address = next; + pud++; + } while (address && address < end); + + return 0; +} + +static int remap_area_pages(unsigned long address, unsigned long phys_addr, + size_t size, pgprot_t prot) +{ + unsigned long end = address + size; + unsigned long next; + pgd_t *pgd; + int err = 0; + + phys_addr -= address; + + pgd = pgd_offset_k(address); + flush_cache_all(); + BUG_ON(address >= end); + + spin_lock(&init_mm.page_table_lock); + do { + pud_t *pud = pud_alloc(&init_mm, pgd, address); + + err = -ENOMEM; + if (!pud) + break; + + next = (address + PGDIR_SIZE) & PGDIR_MASK; + if (next < address || next > end) + next = end; + err = remap_area_pud(pud, address, next, + phys_addr + address, prot); + if (err) + break; + + address = next; + pgd++; + } while (address && (address < end)); + + spin_unlock(&init_mm.page_table_lock); + flush_tlb_all(); + return err; +} + +/* + * Re-map an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access physical + * memory directly. + */ +void __iomem *__ioremap(unsigned long phys_addr, size_t size, + unsigned long flags) +{ + void *addr; + struct vm_struct *area; + unsigned long offset, last_addr; + pgprot_t prot; + + /* + * Check if we can simply use the P4 segment. This area is + * uncacheable, so if caching/buffering is requested, we can't + * use it. + */ + if ((phys_addr >= P4SEG) && (flags == 0)) + return (void __iomem *)phys_addr; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * XXX: When mapping regular RAM, we'd better make damn sure + * it's never used for anything else. But this is really the + * caller's responsibility... + */ + if (PHYSADDR(P2SEGADDR(phys_addr)) == phys_addr) + return (void __iomem *)P2SEGADDR(phys_addr); + + /* Mappings have to be page-aligned */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY + | _PAGE_ACCESSED | _PAGE_TYPE_SMALL | flags); + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + area->phys_addr = phys_addr; + addr = area->addr; + if (remap_area_pages((unsigned long)addr, phys_addr, size, prot)) { + vunmap(addr); + return NULL; + } + + return (void __iomem *)(offset + (char *)addr); +} +EXPORT_SYMBOL(__ioremap); + +void __iounmap(void __iomem *addr) +{ + struct vm_struct *p; + + if ((unsigned long)addr >= P4SEG) + return; + + p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); + if (unlikely(!p)) { + printk (KERN_ERR "iounmap: bad address %p\n", addr); + return; + } + + kfree (p); +} +EXPORT_SYMBOL(__iounmap); diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c new file mode 100644 index 000000000000..5d0523bbe298 --- /dev/null +++ b/arch/avr32/mm/tlb.c @@ -0,0 +1,378 @@ +/* + * AVR32 TLB operations + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +#include + +#define _TLBEHI_I 0x100 + +void show_dtlb_entry(unsigned int index) +{ + unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags; + + local_irq_save(flags); + mmucr_save = sysreg_read(MMUCR); + tlbehi_save = sysreg_read(TLBEHI); + mmucr = mmucr_save & 0x13; + mmucr |= index << 14; + sysreg_write(MMUCR, mmucr); + + asm volatile("tlbr" : : : "memory"); + cpu_sync_pipeline(); + + tlbehi = sysreg_read(TLBEHI); + tlbelo = sysreg_read(TLBELO); + + printk("%2u: %c %c %02x %05x %05x %o %o %c %c %c %c\n", + index, + (tlbehi & 0x200)?'1':'0', + (tlbelo & 0x100)?'1':'0', + (tlbehi & 0xff), + (tlbehi >> 12), (tlbelo >> 12), + (tlbelo >> 4) & 7, (tlbelo >> 2) & 3, + (tlbelo & 0x200)?'1':'0', + (tlbelo & 0x080)?'1':'0', + (tlbelo & 0x001)?'1':'0', + (tlbelo & 0x002)?'1':'0'); + + sysreg_write(MMUCR, mmucr_save); + sysreg_write(TLBEHI, tlbehi_save); + cpu_sync_pipeline(); + local_irq_restore(flags); +} + +void dump_dtlb(void) +{ + unsigned int i; + + printk("ID V G ASID VPN PFN AP SZ C B W D\n"); + for (i = 0; i < 32; i++) + show_dtlb_entry(i); +} + +static unsigned long last_mmucr; + +static inline void set_replacement_pointer(unsigned shift) +{ + unsigned long mmucr, mmucr_save; + + mmucr = mmucr_save = sysreg_read(MMUCR); + + /* Does this mapping already exist? */ + __asm__ __volatile__( + " tlbs\n" + " mfsr %0, %1" + : "=r"(mmucr) + : "i"(SYSREG_MMUCR)); + + if (mmucr & SYSREG_BIT(MMUCR_N)) { + /* Not found -- pick a not-recently-accessed entry */ + unsigned long rp; + unsigned long tlbar = sysreg_read(TLBARLO); + + rp = 32 - fls(tlbar); + if (rp == 32) { + rp = 0; + sysreg_write(TLBARLO, -1L); + } + + mmucr &= 0x13; + mmucr |= (rp << shift); + + sysreg_write(MMUCR, mmucr); + } + + last_mmucr = mmucr; +} + +static void update_dtlb(unsigned long address, pte_t pte, unsigned long asid) +{ + unsigned long vpn; + + vpn = (address & MMU_VPN_MASK) | _TLBEHI_VALID | asid; + sysreg_write(TLBEHI, vpn); + cpu_sync_pipeline(); + + set_replacement_pointer(14); + + sysreg_write(TLBELO, pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK); + + /* Let's go */ + asm volatile("nop\n\ttlbw" : : : "memory"); + cpu_sync_pipeline(); +} + +void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t pte) +{ + unsigned long flags; + + /* ptrace may call this routine */ + if (vma && current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + update_dtlb(address, pte, get_asid()); + local_irq_restore(flags); +} + +void __flush_tlb_page(unsigned long asid, unsigned long page) +{ + unsigned long mmucr, tlbehi; + + page |= asid; + sysreg_write(TLBEHI, page); + cpu_sync_pipeline(); + asm volatile("tlbs"); + mmucr = sysreg_read(MMUCR); + + if (!(mmucr & SYSREG_BIT(MMUCR_N))) { + unsigned long tlbarlo; + unsigned long entry; + + /* Clear the "valid" bit */ + tlbehi = sysreg_read(TLBEHI); + tlbehi &= ~_TLBEHI_VALID; + sysreg_write(TLBEHI, tlbehi); + cpu_sync_pipeline(); + + /* mark the entry as "not accessed" */ + entry = (mmucr >> 14) & 0x3f; + tlbarlo = sysreg_read(TLBARLO); + tlbarlo |= (0x80000000 >> entry); + sysreg_write(TLBARLO, tlbarlo); + + /* update the entry with valid bit clear */ + asm volatile("tlbw"); + cpu_sync_pipeline(); + } +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) { + unsigned long flags, asid; + unsigned long saved_asid = MMU_NO_ASID; + + asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK; + page &= PAGE_MASK; + + local_irq_save(flags); + if (vma->vm_mm != current->mm) { + saved_asid = get_asid(); + set_asid(asid); + } + + __flush_tlb_page(asid, page); + + if (saved_asid != MMU_NO_ASID) + set_asid(saved_asid); + local_irq_restore(flags); + } +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + if (mm->context != NO_CONTEXT) { + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */ + mm->context = NO_CONTEXT; + if (mm == current->mm) + activate_context(mm); + } else { + unsigned long asid = mm->context & MMU_CONTEXT_ASID_MASK; + unsigned long saved_asid = MMU_NO_ASID; + + start &= PAGE_MASK; + end += (PAGE_SIZE - 1); + end &= PAGE_MASK; + if (mm != current->mm) { + saved_asid = get_asid(); + set_asid(asid); + } + + while (start < end) { + __flush_tlb_page(asid, start); + start += PAGE_SIZE; + } + if (saved_asid != MMU_NO_ASID) + set_asid(saved_asid); + } + local_irq_restore(flags); + } +} + +/* + * TODO: If this is only called for addresses > TASK_SIZE, we can probably + * skip the ASID stuff and just use the Global bit... + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */ + flush_tlb_all(); + } else { + unsigned long asid = init_mm.context & MMU_CONTEXT_ASID_MASK; + unsigned long saved_asid = get_asid(); + + start &= PAGE_MASK; + end += (PAGE_SIZE - 1); + end &= PAGE_MASK; + set_asid(asid); + while (start < end) { + __flush_tlb_page(asid, start); + start += PAGE_SIZE; + } + set_asid(saved_asid); + } + local_irq_restore(flags); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + /* Invalidate all TLB entries of this process by getting a new ASID */ + if (mm->context != NO_CONTEXT) { + unsigned long flags; + + local_irq_save(flags); + mm->context = NO_CONTEXT; + if (mm == current->mm) + activate_context(mm); + local_irq_restore(flags); + } +} + +void flush_tlb_all(void) +{ + unsigned long flags; + + local_irq_save(flags); + sysreg_write(MMUCR, sysreg_read(MMUCR) | SYSREG_BIT(MMUCR_I)); + local_irq_restore(flags); +} + +#ifdef CONFIG_PROC_FS + +#include +#include +#include + +static void *tlb_start(struct seq_file *tlb, loff_t *pos) +{ + static unsigned long tlb_index; + + if (*pos >= 32) + return NULL; + + tlb_index = 0; + return &tlb_index; +} + +static void *tlb_next(struct seq_file *tlb, void *v, loff_t *pos) +{ + unsigned long *index = v; + + if (*index >= 31) + return NULL; + + ++*pos; + ++*index; + return index; +} + +static void tlb_stop(struct seq_file *tlb, void *v) +{ + +} + +static int tlb_show(struct seq_file *tlb, void *v) +{ + unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags; + unsigned long *index = v; + + if (*index == 0) + seq_puts(tlb, "ID V G ASID VPN PFN AP SZ C B W D\n"); + + BUG_ON(*index >= 32); + + local_irq_save(flags); + mmucr_save = sysreg_read(MMUCR); + tlbehi_save = sysreg_read(TLBEHI); + mmucr = mmucr_save & 0x13; + mmucr |= *index << 14; + sysreg_write(MMUCR, mmucr); + + asm volatile("tlbr" : : : "memory"); + cpu_sync_pipeline(); + + tlbehi = sysreg_read(TLBEHI); + tlbelo = sysreg_read(TLBELO); + + sysreg_write(MMUCR, mmucr_save); + sysreg_write(TLBEHI, tlbehi_save); + cpu_sync_pipeline(); + local_irq_restore(flags); + + seq_printf(tlb, "%2lu: %c %c %02x %05x %05x %o %o %c %c %c %c\n", + *index, + (tlbehi & 0x200)?'1':'0', + (tlbelo & 0x100)?'1':'0', + (tlbehi & 0xff), + (tlbehi >> 12), (tlbelo >> 12), + (tlbelo >> 4) & 7, (tlbelo >> 2) & 3, + (tlbelo & 0x200)?'1':'0', + (tlbelo & 0x080)?'1':'0', + (tlbelo & 0x001)?'1':'0', + (tlbelo & 0x002)?'1':'0'); + + return 0; +} + +static struct seq_operations tlb_ops = { + .start = tlb_start, + .next = tlb_next, + .stop = tlb_stop, + .show = tlb_show, +}; + +static int tlb_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &tlb_ops); +} + +static struct file_operations proc_tlb_operations = { + .open = tlb_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proctlb_init(void) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry("tlb", 0, NULL); + if (entry) + entry->proc_fops = &proc_tlb_operations; + return 0; +} +late_initcall(proctlb_init); +#endif /* CONFIG_PROC_FS */ diff --git a/include/asm-avr32/Kbuild b/include/asm-avr32/Kbuild new file mode 100644 index 000000000000..8770e73ce938 --- /dev/null +++ b/include/asm-avr32/Kbuild @@ -0,0 +1,3 @@ +include include/asm-generic/Kbuild.asm + +headers-y += cachectl.h diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h new file mode 100644 index 000000000000..50bf6e31a143 --- /dev/null +++ b/include/asm-avr32/a.out.h @@ -0,0 +1,26 @@ +#ifndef __ASM_AVR32_A_OUT_H +#define __ASM_AVR32_A_OUT_H + +struct exec +{ + unsigned long a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#ifdef __KERNEL__ + +#define STACK_TOP TASK_SIZE + +#endif + +#endif /* __ASM_AVR32_A_OUT_H */ diff --git a/include/asm-avr32/addrspace.h b/include/asm-avr32/addrspace.h new file mode 100644 index 000000000000..366794858ec7 --- /dev/null +++ b/include/asm-avr32/addrspace.h @@ -0,0 +1,43 @@ +/* + * Defitions for the address spaces of the AVR32 CPUs. Heavily based on + * include/asm-sh/addrspace.h + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_ADDRSPACE_H +#define __ASM_AVR32_ADDRSPACE_H + +#ifdef CONFIG_MMU + +/* Memory segments when segmentation is enabled */ +#define P0SEG 0x00000000 +#define P1SEG 0x80000000 +#define P2SEG 0xa0000000 +#define P3SEG 0xc0000000 +#define P4SEG 0xe0000000 + +/* Returns the privileged segment base of a given address */ +#define PXSEG(a) (((unsigned long)(a)) & 0xe0000000) + +/* Returns the physical address of a PnSEG (n=1,2) address */ +#define PHYSADDR(a) (((unsigned long)(a)) & 0x1fffffff) + +/* + * Map an address to a certain privileged segment + */ +#define P1SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P1SEG)) +#define P2SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P2SEG)) +#define P3SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P3SEG)) +#define P4SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P4SEG)) + +#endif /* CONFIG_MMU */ + +#endif /* __ASM_AVR32_ADDRSPACE_H */ diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h new file mode 100644 index 000000000000..ce1150d4438d --- /dev/null +++ b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h @@ -0,0 +1,36 @@ +/* + * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * Peripheral Data Controller (PDC) registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91RM9200_PDC_H +#define AT91RM9200_PDC_H + +#define AT91_PDC_RPR 0x100 /* Receive Pointer Register */ +#define AT91_PDC_RCR 0x104 /* Receive Counter Register */ +#define AT91_PDC_TPR 0x108 /* Transmit Pointer Register */ +#define AT91_PDC_TCR 0x10c /* Transmit Counter Register */ +#define AT91_PDC_RNPR 0x110 /* Receive Next Pointer Register */ +#define AT91_PDC_RNCR 0x114 /* Receive Next Counter Register */ +#define AT91_PDC_TNPR 0x118 /* Transmit Next Pointer Register */ +#define AT91_PDC_TNCR 0x11c /* Transmit Next Counter Register */ + +#define AT91_PDC_PTCR 0x120 /* Transfer Control Register */ +#define AT91_PDC_RXTEN (1 << 0) /* Receiver Transfer Enable */ +#define AT91_PDC_RXTDIS (1 << 1) /* Receiver Transfer Disable */ +#define AT91_PDC_TXTEN (1 << 8) /* Transmitter Transfer Enable */ +#define AT91_PDC_TXTDIS (1 << 9) /* Transmitter Transfer Disable */ + +#define AT91_PDC_PTSR 0x124 /* Transfer Status Register */ + +#endif diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_usart.h b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h new file mode 100644 index 000000000000..79f851e31b9c --- /dev/null +++ b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h @@ -0,0 +1,123 @@ +/* + * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * USART registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91RM9200_USART_H +#define AT91RM9200_USART_H + +#define AT91_US_CR 0x00 /* Control Register */ +#define AT91_US_RSTRX (1 << 2) /* Reset Receiver */ +#define AT91_US_RSTTX (1 << 3) /* Reset Transmitter */ +#define AT91_US_RXEN (1 << 4) /* Receiver Enable */ +#define AT91_US_RXDIS (1 << 5) /* Receiver Disable */ +#define AT91_US_TXEN (1 << 6) /* Transmitter Enable */ +#define AT91_US_TXDIS (1 << 7) /* Transmitter Disable */ +#define AT91_US_RSTSTA (1 << 8) /* Reset Status Bits */ +#define AT91_US_STTBRK (1 << 9) /* Start Break */ +#define AT91_US_STPBRK (1 << 10) /* Stop Break */ +#define AT91_US_STTTO (1 << 11) /* Start Time-out */ +#define AT91_US_SENDA (1 << 12) /* Send Address */ +#define AT91_US_RSTIT (1 << 13) /* Reset Iterations */ +#define AT91_US_RSTNACK (1 << 14) /* Reset Non Acknowledge */ +#define AT91_US_RETTO (1 << 15) /* Rearm Time-out */ +#define AT91_US_DTREN (1 << 16) /* Data Terminal Ready Enable */ +#define AT91_US_DTRDIS (1 << 17) /* Data Terminal Ready Disable */ +#define AT91_US_RTSEN (1 << 18) /* Request To Send Enable */ +#define AT91_US_RTSDIS (1 << 19) /* Request To Send Disable */ + +#define AT91_US_MR 0x04 /* Mode Register */ +#define AT91_US_USMODE (0xf << 0) /* Mode of the USART */ +#define AT91_US_USMODE_NORMAL 0 +#define AT91_US_USMODE_RS485 1 +#define AT91_US_USMODE_HWHS 2 +#define AT91_US_USMODE_MODEM 3 +#define AT91_US_USMODE_ISO7816_T0 4 +#define AT91_US_USMODE_ISO7816_T1 6 +#define AT91_US_USMODE_IRDA 8 +#define AT91_US_USCLKS (3 << 4) /* Clock Selection */ +#define AT91_US_CHRL (3 << 6) /* Character Length */ +#define AT91_US_CHRL_5 (0 << 6) +#define AT91_US_CHRL_6 (1 << 6) +#define AT91_US_CHRL_7 (2 << 6) +#define AT91_US_CHRL_8 (3 << 6) +#define AT91_US_SYNC (1 << 8) /* Synchronous Mode Select */ +#define AT91_US_PAR (7 << 9) /* Parity Type */ +#define AT91_US_PAR_EVEN (0 << 9) +#define AT91_US_PAR_ODD (1 << 9) +#define AT91_US_PAR_SPACE (2 << 9) +#define AT91_US_PAR_MARK (3 << 9) +#define AT91_US_PAR_NONE (4 << 9) +#define AT91_US_PAR_MULTI_DROP (6 << 9) +#define AT91_US_NBSTOP (3 << 12) /* Number of Stop Bits */ +#define AT91_US_NBSTOP_1 (0 << 12) +#define AT91_US_NBSTOP_1_5 (1 << 12) +#define AT91_US_NBSTOP_2 (2 << 12) +#define AT91_US_CHMODE (3 << 14) /* Channel Mode */ +#define AT91_US_CHMODE_NORMAL (0 << 14) +#define AT91_US_CHMODE_ECHO (1 << 14) +#define AT91_US_CHMODE_LOC_LOOP (2 << 14) +#define AT91_US_CHMODE_REM_LOOP (3 << 14) +#define AT91_US_MSBF (1 << 16) /* Bit Order */ +#define AT91_US_MODE9 (1 << 17) /* 9-bit Character Length */ +#define AT91_US_CLKO (1 << 18) /* Clock Output Select */ +#define AT91_US_OVER (1 << 19) /* Oversampling Mode */ +#define AT91_US_INACK (1 << 20) /* Inhibit Non Acknowledge */ +#define AT91_US_DSNACK (1 << 21) /* Disable Successive NACK */ +#define AT91_US_MAX_ITER (7 << 24) /* Max Iterations */ +#define AT91_US_FILTER (1 << 28) /* Infrared Receive Line Filter */ + +#define AT91_US_IER 0x08 /* Interrupt Enable Register */ +#define AT91_US_RXRDY (1 << 0) /* Receiver Ready */ +#define AT91_US_TXRDY (1 << 1) /* Transmitter Ready */ +#define AT91_US_RXBRK (1 << 2) /* Break Received / End of Break */ +#define AT91_US_ENDRX (1 << 3) /* End of Receiver Transfer */ +#define AT91_US_ENDTX (1 << 4) /* End of Transmitter Transfer */ +#define AT91_US_OVRE (1 << 5) /* Overrun Error */ +#define AT91_US_FRAME (1 << 6) /* Framing Error */ +#define AT91_US_PARE (1 << 7) /* Parity Error */ +#define AT91_US_TIMEOUT (1 << 8) /* Receiver Time-out */ +#define AT91_US_TXEMPTY (1 << 9) /* Transmitter Empty */ +#define AT91_US_ITERATION (1 << 10) /* Max number of Repetitions Reached */ +#define AT91_US_TXBUFE (1 << 11) /* Transmission Buffer Empty */ +#define AT91_US_RXBUFF (1 << 12) /* Reception Buffer Full */ +#define AT91_US_NACK (1 << 13) /* Non Acknowledge */ +#define AT91_US_RIIC (1 << 16) /* Ring Indicator Input Change */ +#define AT91_US_DSRIC (1 << 17) /* Data Set Ready Input Change */ +#define AT91_US_DCDIC (1 << 18) /* Data Carrier Detect Input Change */ +#define AT91_US_CTSIC (1 << 19) /* Clear to Send Input Change */ +#define AT91_US_RI (1 << 20) /* RI */ +#define AT91_US_DSR (1 << 21) /* DSR */ +#define AT91_US_DCD (1 << 22) /* DCD */ +#define AT91_US_CTS (1 << 23) /* CTS */ + +#define AT91_US_IDR 0x0c /* Interrupt Disable Register */ +#define AT91_US_IMR 0x10 /* Interrupt Mask Register */ +#define AT91_US_CSR 0x14 /* Channel Status Register */ +#define AT91_US_RHR 0x18 /* Receiver Holding Register */ +#define AT91_US_THR 0x1c /* Transmitter Holding Register */ + +#define AT91_US_BRGR 0x20 /* Baud Rate Generator Register */ +#define AT91_US_CD (0xffff << 0) /* Clock Divider */ + +#define AT91_US_RTOR 0x24 /* Receiver Time-out Register */ +#define AT91_US_TO (0xffff << 0) /* Time-out Value */ + +#define AT91_US_TTGR 0x28 /* Transmitter Timeguard Register */ +#define AT91_US_TG (0xff << 0) /* Timeguard Value */ + +#define AT91_US_FIDI 0x40 /* FI DI Ratio Register */ +#define AT91_US_NER 0x44 /* Number of Errors Register */ +#define AT91_US_IF 0x4c /* IrDA Filter Register */ + +#endif diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h new file mode 100644 index 000000000000..39368e18ab20 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/board.h @@ -0,0 +1,35 @@ +/* + * Platform data definitions. + */ +#ifndef __ASM_ARCH_BOARD_H +#define __ASM_ARCH_BOARD_H + +#include + +/* Add basic devices: system manager, interrupt controller, portmuxes, etc. */ +void at32_add_system_devices(void); + +#define AT91_NR_UART 4 +extern struct platform_device *at91_default_console_device; + +struct platform_device *at32_add_device_usart(unsigned int id); + +struct eth_platform_data { + u8 valid; + u8 mii_phy_addr; + u8 is_rmii; + u8 hw_addr[6]; +}; +struct platform_device * +at32_add_device_eth(unsigned int id, struct eth_platform_data *data); + +struct platform_device *at32_add_device_spi(unsigned int id); + +struct lcdc_platform_data { + unsigned long fbmem_start; + unsigned long fbmem_size; +}; +struct platform_device * +at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data); + +#endif /* __ASM_ARCH_BOARD_H */ diff --git a/include/asm-avr32/arch-at32ap/init.h b/include/asm-avr32/arch-at32ap/init.h new file mode 100644 index 000000000000..43722634e069 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/init.h @@ -0,0 +1,21 @@ +/* + * AT32AP platform initialization calls. + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_AT32AP_INIT_H__ +#define __ASM_AVR32_AT32AP_INIT_H__ + +void setup_platform(void); + +/* Called by setup_platform */ +void at32_clock_init(void); +void at32_portmux_init(void); + +void at32_setup_serial_console(unsigned int usart_id); + +#endif /* __ASM_AVR32_AT32AP_INIT_H__ */ diff --git a/include/asm-avr32/arch-at32ap/portmux.h b/include/asm-avr32/arch-at32ap/portmux.h new file mode 100644 index 000000000000..4d50421262a1 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/portmux.h @@ -0,0 +1,16 @@ +/* + * AT32 portmux interface. + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_AT32_PORTMUX_H__ +#define __ASM_AVR32_AT32_PORTMUX_H__ + +void portmux_set_func(unsigned int portmux_id, unsigned int pin_id, + unsigned int function_id); + +#endif /* __ASM_AVR32_AT32_PORTMUX_H__ */ diff --git a/include/asm-avr32/arch-at32ap/sm.h b/include/asm-avr32/arch-at32ap/sm.h new file mode 100644 index 000000000000..265a9ead20bf --- /dev/null +++ b/include/asm-avr32/arch-at32ap/sm.h @@ -0,0 +1,27 @@ +/* + * AT32 System Manager interface. + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_AT32_SM_H__ +#define __ASM_AVR32_AT32_SM_H__ + +struct irq_chip; +struct platform_device; + +struct at32_sm { + spinlock_t lock; + void __iomem *regs; + struct irq_chip *eim_chip; + unsigned int eim_first_irq; + struct platform_device *pdev; +}; + +extern struct platform_device at32_sm_device; +extern struct at32_sm system_manager; + +#endif /* __ASM_AVR32_AT32_SM_H__ */ diff --git a/include/asm-avr32/asm.h b/include/asm-avr32/asm.h new file mode 100644 index 000000000000..515c7618952b --- /dev/null +++ b/include/asm-avr32/asm.h @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_ASM_H__ +#define __ASM_AVR32_ASM_H__ + +#include +#include +#include + +#define mask_interrupts ssrf SR_GM_BIT +#define mask_exceptions ssrf SR_EM_BIT +#define unmask_interrupts csrf SR_GM_BIT +#define unmask_exceptions csrf SR_EM_BIT + +#ifdef CONFIG_FRAME_POINTER + .macro save_fp + st.w --sp, r7 + .endm + .macro restore_fp + ld.w r7, sp++ + .endm + .macro zero_fp + mov r7, 0 + .endm +#else + .macro save_fp + .endm + .macro restore_fp + .endm + .macro zero_fp + .endm +#endif + .macro get_thread_info reg + mov \reg, sp + andl \reg, ~(THREAD_SIZE - 1) & 0xffff + .endm + + /* Save and restore registers */ + .macro save_min sr, tmp=lr + pushm lr + mfsr \tmp, \sr + zero_fp + st.w --sp, \tmp + .endm + + .macro restore_min sr, tmp=lr + ld.w \tmp, sp++ + mtsr \sr, \tmp + popm lr + .endm + + .macro save_half sr, tmp=lr + save_fp + pushm r8-r9,r10,r11,r12,lr + zero_fp + mfsr \tmp, \sr + st.w --sp, \tmp + .endm + + .macro restore_half sr, tmp=lr + ld.w \tmp, sp++ + mtsr \sr, \tmp + popm r8-r9,r10,r11,r12,lr + restore_fp + .endm + + .macro save_full_user sr, tmp=lr + stmts --sp, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr + st.w --sp, lr + zero_fp + mfsr \tmp, \sr + st.w --sp, \tmp + .endm + + .macro restore_full_user sr, tmp=lr + ld.w \tmp, sp++ + mtsr \sr, \tmp + ld.w lr, sp++ + ldmts sp++, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr + .endm + + /* uaccess macros */ + .macro branch_if_kernel scratch, label + get_thread_info \scratch + ld.w \scratch, \scratch[TI_flags] + bld \scratch, TIF_USERSPACE + brcc \label + .endm + + .macro ret_if_privileged scratch, addr, size, ret + sub \scratch, \size, 1 + add \scratch, \addr + retcs \ret + retmi \ret + .endm + +#endif /* __ASM_AVR32_ASM_H__ */ diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h new file mode 100644 index 000000000000..e0b9c44c126c --- /dev/null +++ b/include/asm-avr32/atomic.h @@ -0,0 +1,201 @@ +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc. + * + * But use these as seldom as possible since they are slower than + * regular operations. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_ATOMIC_H +#define __ASM_AVR32_ATOMIC_H + +#include + +typedef struct { volatile int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } + +#define atomic_read(v) ((v)->counter) +#define atomic_set(v, i) (((v)->counter) = i) + +/* + * atomic_sub_return - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. Returns the resulting value. + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + int result; + + asm volatile( + "/* atomic_sub_return */\n" + "1: ssrf 5\n" + " ld.w %0, %2\n" + " sub %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "ir"(i) + : "cc"); + + return result; +} + +/* + * atomic_add_return - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. Returns the resulting value. + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + int result; + + if (__builtin_constant_p(i)) + result = atomic_sub_return(-i, v); + else + asm volatile( + "/* atomic_add_return */\n" + "1: ssrf 5\n" + " ld.w %0, %1\n" + " add %0, %3\n" + " stcond %2, %0\n" + " brne 1b" + : "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "r"(i) + : "cc", "memory"); + + return result; +} + +/* + * atomic_sub_unless - sub unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * If the atomic value v is not equal to u, this function subtracts a + * from v, and returns non zero. If v is equal to u then it returns + * zero. This is done as an atomic operation. +*/ +static inline int atomic_sub_unless(atomic_t *v, int a, int u) +{ + int tmp, result = 0; + + asm volatile( + "/* atomic_sub_unless */\n" + "1: ssrf 5\n" + " ld.w %0, %3\n" + " cp.w %0, %5\n" + " breq 1f\n" + " sub %0, %4\n" + " stcond %2, %0\n" + " brne 1b\n" + " mov %1, 1\n" + "1:" + : "=&r"(tmp), "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "ir"(a), "ir"(u) + : "cc", "memory"); + + return result; +} + +/* + * atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * If the atomic value v is not equal to u, this function adds a to v, + * and returns non zero. If v is equal to u then it returns zero. This + * is done as an atomic operation. +*/ +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int tmp, result; + + if (__builtin_constant_p(a)) + result = atomic_sub_unless(v, -a, u); + else { + result = 0; + asm volatile( + "/* atomic_add_unless */\n" + "1: ssrf 5\n" + " ld.w %0, %3\n" + " cp.w %0, %5\n" + " breq 1f\n" + " add %0, %4\n" + " stcond %2, %0\n" + " brne 1b\n" + " mov %1, 1\n" + "1:" + : "=&r"(tmp), "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "r"(a), "ir"(u) + : "cc", "memory"); + } + + return result; +} + +/* + * atomic_sub_if_positive - conditionally subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically test @v and subtract @i if @v is greater or equal than @i. + * The function returns the old value of @v minus @i. + */ +static inline int atomic_sub_if_positive(int i, atomic_t *v) +{ + int result; + + asm volatile( + "/* atomic_sub_if_positive */\n" + "1: ssrf 5\n" + " ld.w %0, %2\n" + " sub %0, %3\n" + " brlt 1f\n" + " stcond %1, %0\n" + " brne 1b\n" + "1:" + : "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "ir"(i) + : "cc", "memory"); + + return result; +} + +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) + +#define atomic_sub(i, v) (void)atomic_sub_return(i, v) +#define atomic_add(i, v) (void)atomic_add_return(i, v) +#define atomic_dec(v) atomic_sub(1, (v)) +#define atomic_inc(v) atomic_add(1, (v)) + +#define atomic_dec_return(v) atomic_sub_return(1, v) +#define atomic_inc_return(v) atomic_add_return(1, v) + +#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) +#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) +#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0) + +#define atomic_inc_not_zero(v) atomic_add_unless(v, 1, 0) +#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) + +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#include + +#endif /* __ASM_AVR32_ATOMIC_H */ diff --git a/include/asm-avr32/auxvec.h b/include/asm-avr32/auxvec.h new file mode 100644 index 000000000000..d5dd435bf8f4 --- /dev/null +++ b/include/asm-avr32/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_AVR32_AUXVEC_H +#define __ASM_AVR32_AUXVEC_H + +#endif /* __ASM_AVR32_AUXVEC_H */ diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h new file mode 100644 index 000000000000..5299f8c8e11d --- /dev/null +++ b/include/asm-avr32/bitops.h @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_BITOPS_H +#define __ASM_AVR32_BITOPS_H + +#include +#include + +/* + * clear_bit() doesn't provide any barrier for the compiler + */ +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/* + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long tmp; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " sbr %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "i"(nr) + : "cc"); + } else { + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " or %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "r"(mask) + : "cc"); + } +} + +/* + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long tmp; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " cbr %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "i"(nr) + : "cc"); + } else { + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " andn %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "r"(mask) + : "cc"); + } +} + +/* + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp; + + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " eor %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "r"(mask) + : "cc"); +} + +/* + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp, old; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %3\n" + " mov %2, %0\n" + " sbr %0, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "i"(nr) + : "memory", "cc"); + } else { + asm volatile( + "1: ssrf 5\n" + " ld.w %2, %3\n" + " or %0, %2, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "r"(mask) + : "memory", "cc"); + } + + return (old & mask) != 0; +} + +/* + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp, old; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %3\n" + " mov %2, %0\n" + " cbr %0, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "i"(nr) + : "memory", "cc"); + } else { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %3\n" + " mov %2, %0\n" + " andn %0, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "r"(mask) + : "memory", "cc"); + } + + return (old & mask) != 0; +} + +/* + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp, old; + + asm volatile( + "1: ssrf 5\n" + " ld.w %2, %3\n" + " eor %0, %2, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "r"(mask) + : "memory", "cc"); + + return (old & mask) != 0; +} + +#include + +/* Find First bit Set */ +static inline unsigned long __ffs(unsigned long word) +{ + unsigned long result; + + asm("brev %1\n\t" + "clz %0,%1" + : "=r"(result), "=&r"(word) + : "1"(word)); + return result; +} + +/* Find First Zero */ +static inline unsigned long ffz(unsigned long word) +{ + return __ffs(~word); +} + +/* Find Last bit Set */ +static inline int fls(unsigned long word) +{ + unsigned long result; + + asm("clz %0,%1" : "=r"(result) : "r"(word)); + return 32 - result; +} + +unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); +unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); +unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); +unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + +/* + * ffs: find first bit set. This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + * + * The difference is that bit numbering starts at 1, and if no bit is set, + * the function returns 0. + */ +static inline int ffs(unsigned long word) +{ + if(word == 0) + return 0; + return __ffs(word) + 1; +} + +#include +#include +#include + +#include +#include +#include + +#endif /* __ASM_AVR32_BITOPS_H */ diff --git a/include/asm-avr32/bug.h b/include/asm-avr32/bug.h new file mode 100644 index 000000000000..521766bc9366 --- /dev/null +++ b/include/asm-avr32/bug.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_BUG_H +#define __ASM_AVR32_BUG_H + +#ifdef CONFIG_BUG + +/* + * According to our Chief Architect, this compact opcode is very + * unlikely to ever be implemented. + */ +#define AVR32_BUG_OPCODE 0x5df0 + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#define BUG() \ + do { \ + asm volatile(".hword %0\n\t" \ + ".hword %1\n\t" \ + ".long %2" \ + : \ + : "n"(AVR32_BUG_OPCODE), \ + "i"(__LINE__), "X"(__FILE__)); \ + } while (0) + +#else + +#define BUG() \ + do { \ + asm volatile(".hword %0\n\t" \ + : : "n"(AVR32_BUG_OPCODE)); \ + } while (0) + +#endif /* CONFIG_DEBUG_BUGVERBOSE */ + +#define HAVE_ARCH_BUG + +#endif /* CONFIG_BUG */ + +#include + +#endif /* __ASM_AVR32_BUG_H */ diff --git a/include/asm-avr32/bugs.h b/include/asm-avr32/bugs.h new file mode 100644 index 000000000000..7635e770622e --- /dev/null +++ b/include/asm-avr32/bugs.h @@ -0,0 +1,15 @@ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ +#ifndef __ASM_AVR32_BUGS_H +#define __ASM_AVR32_BUGS_H + +static void __init check_bugs(void) +{ + cpu_data->loops_per_jiffy = loops_per_jiffy; +} + +#endif /* __ASM_AVR32_BUGS_H */ diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h new file mode 100644 index 000000000000..402ff4125cdc --- /dev/null +++ b/include/asm-avr32/byteorder.h @@ -0,0 +1,25 @@ +/* + * AVR32 endian-conversion functions. + */ +#ifndef __ASM_AVR32_BYTEORDER_H +#define __ASM_AVR32_BYTEORDER_H + +#include +#include + +#ifdef __CHECKER__ +extern unsigned long __builtin_bswap_32(unsigned long x); +extern unsigned short __builtin_bswap_16(unsigned short x); +#endif + +#define __arch__swab32(x) __builtin_bswap_32(x) +#define __arch__swab16(x) __builtin_bswap_16(x) + +#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) +# define __BYTEORDER_HAS_U64__ +# define __SWAB_64_THRU_32__ +#endif + +#include + +#endif /* __ASM_AVR32_BYTEORDER_H */ diff --git a/include/asm-avr32/cache.h b/include/asm-avr32/cache.h new file mode 100644 index 000000000000..dabb955f3c00 --- /dev/null +++ b/include/asm-avr32/cache.h @@ -0,0 +1,29 @@ +#ifndef __ASM_AVR32_CACHE_H +#define __ASM_AVR32_CACHE_H + +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#ifndef __ASSEMBLER__ +struct cache_info { + unsigned int ways; + unsigned int sets; + unsigned int linesz; +}; +#endif /* __ASSEMBLER */ + +/* Cache operation constants */ +#define ICACHE_FLUSH 0x00 +#define ICACHE_INVALIDATE 0x01 +#define ICACHE_LOCK 0x02 +#define ICACHE_UNLOCK 0x03 +#define ICACHE_PREFETCH 0x04 + +#define DCACHE_FLUSH 0x08 +#define DCACHE_LOCK 0x09 +#define DCACHE_UNLOCK 0x0a +#define DCACHE_INVALIDATE 0x0b +#define DCACHE_CLEAN 0x0c +#define DCACHE_CLEAN_INVAL 0x0d + +#endif /* __ASM_AVR32_CACHE_H */ diff --git a/include/asm-avr32/cachectl.h b/include/asm-avr32/cachectl.h new file mode 100644 index 000000000000..4faf1ce60061 --- /dev/null +++ b/include/asm-avr32/cachectl.h @@ -0,0 +1,11 @@ +#ifndef __ASM_AVR32_CACHECTL_H +#define __ASM_AVR32_CACHECTL_H + +/* + * Operations that can be performed through the cacheflush system call + */ + +/* Clean the data cache, then invalidate the icache */ +#define CACHE_IFLUSH 0 + +#endif /* __ASM_AVR32_CACHECTL_H */ diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h new file mode 100644 index 000000000000..f1bf1708980e --- /dev/null +++ b/include/asm-avr32/cacheflush.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_CACHEFLUSH_H +#define __ASM_AVR32_CACHEFLUSH_H + +/* Keep includes the same across arches. */ +#include + +#define CACHE_OP_ICACHE_INVALIDATE 0x01 +#define CACHE_OP_DCACHE_INVALIDATE 0x0b +#define CACHE_OP_DCACHE_CLEAN 0x0c +#define CACHE_OP_DCACHE_CLEAN_INVAL 0x0d + +/* + * Invalidate any cacheline containing virtual address vaddr without + * writing anything back to memory. + * + * Note that this function may corrupt unrelated data structures when + * applied on buffers that are not cacheline aligned in both ends. + */ +static inline void invalidate_dcache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_DCACHE_INVALIDATE) + : "memory"); +} + +/* + * Make sure any cacheline containing virtual address vaddr is written + * to memory. + */ +static inline void clean_dcache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN) + : "memory"); +} + +/* + * Make sure any cacheline containing virtual address vaddr is written + * to memory and then invalidate it. + */ +static inline void flush_dcache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN_INVAL) + : "memory"); +} + +/* + * Invalidate any instruction cacheline containing virtual address + * vaddr. + */ +static inline void invalidate_icache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_ICACHE_INVALIDATE) + : "memory"); +} + +/* + * Applies the above functions on all lines that are touched by the + * specified virtual address range. + */ +void invalidate_dcache_region(void *start, size_t len); +void clean_dcache_region(void *start, size_t len); +void flush_dcache_region(void *start, size_t len); +void invalidate_icache_region(void *start, size_t len); + +/* + * Make sure any pending writes are completed before continuing. + */ +#define flush_write_buffer() asm volatile("sync 0" : : : "memory") + +/* + * The following functions are called when a virtual mapping changes. + * We do not need to flush anything in this case. + */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +/* + * I think we need to implement this one to be able to reliably + * execute pages from RAMDISK. However, if we implement the + * flush_dcache_*() functions, it might not be needed anymore. + * + * #define flush_icache_page(vma, page) do { } while (0) + */ +extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); + +/* + * These are (I think) related to D-cache aliasing. We might need to + * do something here, but only for certain configurations. No such + * configurations exist at this time. + */ +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(page) do { } while (0) +#define flush_dcache_mmap_unlock(page) do { } while (0) + +/* + * These are for I/D cache coherency. In this case, we do need to + * flush with all configurations. + */ +extern void flush_icache_range(unsigned long start, unsigned long end); +extern void flush_icache_user_range(struct vm_area_struct *vma, + struct page *page, + unsigned long addr, int len); + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) do { \ + memcpy(dst, src, len); \ + flush_icache_user_range(vma, page, vaddr, len); \ +} while(0) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* __ASM_AVR32_CACHEFLUSH_H */ diff --git a/include/asm-avr32/checksum.h b/include/asm-avr32/checksum.h new file mode 100644 index 000000000000..41b7af09edc4 --- /dev/null +++ b/include/asm-avr32/checksum.h @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_CHECKSUM_H +#define __ASM_AVR32_CHECKSUM_H + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +unsigned int csum_partial(const unsigned char * buff, int len, + unsigned int sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +unsigned int csum_partial_copy_generic(const char *src, char *dst, int len, + int sum, int *src_err_ptr, + int *dst_err_ptr); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * verify_area(). + */ +static inline +unsigned int csum_partial_copy_nocheck(const char *src, char *dst, + int len, int sum) +{ + return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); +} + +static inline +unsigned int csum_partial_copy_from_user (const char __user *src, char *dst, + int len, int sum, int *err_ptr) +{ + return csum_partial_copy_generic((const char __force *)src, dst, len, + sum, err_ptr, NULL); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +static inline unsigned short ip_fast_csum(unsigned char *iph, + unsigned int ihl) +{ + unsigned int sum, tmp; + + __asm__ __volatile__( + " ld.w %0, %1++\n" + " ld.w %3, %1++\n" + " sub %2, 4\n" + " add %0, %3\n" + " ld.w %3, %1++\n" + " adc %0, %0, %3\n" + " ld.w %3, %1++\n" + " adc %0, %0, %3\n" + " acr %0\n" + "1: ld.w %3, %1++\n" + " add %0, %3\n" + " acr %0\n" + " sub %2, 1\n" + " brne 1b\n" + " lsl %3, %0, 16\n" + " andl %0, 0\n" + " mov %2, 0xffff\n" + " add %0, %3\n" + " adc %0, %0, %2\n" + " com %0\n" + " lsr %0, 16\n" + : "=r"(sum), "=r"(iph), "=r"(ihl), "=r"(tmp) + : "1"(iph), "2"(ihl) + : "memory", "cc"); + return sum; +} + +/* + * Fold a partial checksum + */ + +static inline unsigned int csum_fold(unsigned int sum) +{ + unsigned int tmp; + + asm(" bfextu %1, %0, 0, 16\n" + " lsr %0, 16\n" + " add %0, %1\n" + " bfextu %1, %0, 16, 16\n" + " add %0, %1" + : "=&r"(sum), "=&r"(tmp) + : "0"(sum)); + + return ~sum; +} + +static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + asm(" add %0, %1\n" + " adc %0, %0, %2\n" + " adc %0, %0, %3\n" + " acr %0" + : "=r"(sum) + : "r"(daddr), "r"(saddr), "r"(ntohs(len) | (proto << 16)), + "0"(sum) + : "cc"); + + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline unsigned short ip_compute_csum(unsigned char * buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#endif /* __ASM_AVR32_CHECKSUM_H */ diff --git a/include/asm-avr32/cputime.h b/include/asm-avr32/cputime.h new file mode 100644 index 000000000000..e87e0f81cbeb --- /dev/null +++ b/include/asm-avr32/cputime.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_CPUTIME_H +#define __ASM_AVR32_CPUTIME_H + +#include + +#endif /* __ASM_AVR32_CPUTIME_H */ diff --git a/include/asm-avr32/current.h b/include/asm-avr32/current.h new file mode 100644 index 000000000000..c7b0549eab8a --- /dev/null +++ b/include/asm-avr32/current.h @@ -0,0 +1,15 @@ +#ifndef __ASM_AVR32_CURRENT_H +#define __ASM_AVR32_CURRENT_H + +#include + +struct task_struct; + +inline static struct task_struct * get_current(void) +{ + return current_thread_info()->task; +} + +#define current get_current() + +#endif /* __ASM_AVR32_CURRENT_H */ diff --git a/include/asm-avr32/delay.h b/include/asm-avr32/delay.h new file mode 100644 index 000000000000..cc3b2e3343b3 --- /dev/null +++ b/include/asm-avr32/delay.h @@ -0,0 +1,26 @@ +#ifndef __ASM_AVR32_DELAY_H +#define __ASM_AVR32_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/avr32/lib/delay.c + */ + +extern void __bad_udelay(void); +extern void __bad_ndelay(void); + +extern void __udelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); +extern void __const_udelay(unsigned long usecs); +extern void __delay(unsigned long loops); + +#define udelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \ + __udelay(n)) + +#define ndelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ + __ndelay(n)) + +#endif /* __ASM_AVR32_DELAY_H */ diff --git a/include/asm-avr32/div64.h b/include/asm-avr32/div64.h new file mode 100644 index 000000000000..d7ddd4fdeca6 --- /dev/null +++ b/include/asm-avr32/div64.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_DIV64_H +#define __ASM_AVR32_DIV64_H + +#include + +#endif /* __ASM_AVR32_DIV64_H */ diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h new file mode 100644 index 000000000000..4c40cb41cdf8 --- /dev/null +++ b/include/asm-avr32/dma-mapping.h @@ -0,0 +1,320 @@ +#ifndef __ASM_AVR32_DMA_MAPPING_H +#define __ASM_AVR32_DMA_MAPPING_H + +#include +#include +#include +#include +#include +#include + +extern void dma_cache_sync(void *vaddr, size_t size, int direction); + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask + * to this function. + */ +static inline int dma_supported(struct device *dev, u64 mask) +{ + /* Fix when needed. I really don't know of any limitations */ + return 1; +} + +static inline int dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + return 0; +} + +/** + * dma_alloc_coherent - allocate consistent memory for DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: required memory size + * @handle: bus-specific DMA address + * + * Allocate some uncached, unbuffered memory for a device for + * performing DMA. This function allocates pages, and will + * return the CPU-viewed address, and sets @handle to be the + * device-viewed address. + */ +extern void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); + +/** + * dma_free_coherent - free memory allocated by dma_alloc_coherent + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: size of memory originally requested in dma_alloc_coherent + * @cpu_addr: CPU-view address returned from dma_alloc_coherent + * @handle: device-view address returned from dma_alloc_coherent + * + * Free (and unmap) a DMA buffer previously allocated by + * dma_alloc_coherent(). + * + * References to memory and mappings associated with cpu_addr/handle + * during and after this call executing are illegal. + */ +extern void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle); + +/** + * dma_alloc_writecombine - allocate write-combining memory for DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: required memory size + * @handle: bus-specific DMA address + * + * Allocate some uncached, buffered memory for a device for + * performing DMA. This function allocates pages, and will + * return the CPU-viewed address, and sets @handle to be the + * device-viewed address. + */ +extern void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); + +/** + * dma_free_coherent - free memory allocated by dma_alloc_writecombine + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: size of memory originally requested in dma_alloc_writecombine + * @cpu_addr: CPU-view address returned from dma_alloc_writecombine + * @handle: device-view address returned from dma_alloc_writecombine + * + * Free (and unmap) a DMA buffer previously allocated by + * dma_alloc_writecombine(). + * + * References to memory and mappings associated with cpu_addr/handle + * during and after this call executing are illegal. + */ +extern void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle); + +/** + * dma_map_single - map a single buffer for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @cpu_addr: CPU direct mapped address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_single() or dma_sync_single(). + */ +static inline dma_addr_t +dma_map_single(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + dma_cache_sync(cpu_addr, size, direction); + return virt_to_bus(cpu_addr); +} + +/** + * dma_unmap_single - unmap a single buffer previously mapped + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Unmap a single streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_single() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static inline void +dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + +} + +/** + * dma_map_page - map a portion of a page for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page() or dma_sync_single(). + */ +static inline dma_addr_t +dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + return dma_map_single(dev, page_address(page) + offset, + size, direction); +} + +/** + * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Unmap a single streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_single() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static inline void +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + dma_unmap_single(dev, dma_address, size, direction); +} + +/** + * dma_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scatter-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +static inline int +dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + char *virt; + + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + virt = page_address(sg[i].page) + sg[i].offset; + dma_cache_sync(virt, sg[i].length, direction); + } + + return nents; +} + +/** + * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Unmap a set of streaming mode DMA translations. + * Again, CPU read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +static inline void +dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + +} + +/** + * dma_sync_single_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Make physical memory consistent for a single streaming mode DMA + * translation after a transfer. + * + * If you perform a dma_map_single() but wish to interrogate the + * buffer using the cpu, yet do not wish to teardown the DMA mapping, + * you must call this function before doing so. At the next point you + * give the DMA address back to the card, you must first perform a + * dma_sync_single_for_device, and then the device again owns the + * buffer. + */ +static inline void +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + dma_cache_sync(bus_to_virt(dma_handle), size, direction); +} + +static inline void +dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + dma_cache_sync(bus_to_virt(dma_handle), size, direction); +} + +/** + * dma_sync_sg_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Make physical memory consistent for a set of streaming + * mode DMA translations after a transfer. + * + * The same as dma_sync_single_for_* but for a scatter-gather list, + * same rules and usage. + */ +static inline void +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + dma_cache_sync(page_address(sg[i].page) + sg[i].offset, + sg[i].length, direction); + } +} + +static inline void +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + dma_cache_sync(page_address(sg[i].page) + sg[i].offset, + sg[i].length, direction); + } +} + +/* Now for the API extensions over the pci_ one */ + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +static inline int dma_is_consistent(dma_addr_t dma_addr) +{ + return 1; +} + +static inline int dma_get_cache_alignment(void) +{ + return boot_cpu_data.dcache.linesz; +} + +#endif /* __ASM_AVR32_DMA_MAPPING_H */ diff --git a/include/asm-avr32/dma.h b/include/asm-avr32/dma.h new file mode 100644 index 000000000000..9e91205590ac --- /dev/null +++ b/include/asm-avr32/dma.h @@ -0,0 +1,8 @@ +#ifndef __ASM_AVR32_DMA_H +#define __ASM_AVR32_DMA_H + +/* The maximum address that we can perform a DMA transfer to on this platform. + * Not really applicable to AVR32, but some functions need it. */ +#define MAX_DMA_ADDRESS 0xffffffff + +#endif /* __ASM_AVR32_DMA_H */ diff --git a/include/asm-avr32/elf.h b/include/asm-avr32/elf.h new file mode 100644 index 000000000000..d334b4994d2d --- /dev/null +++ b/include/asm-avr32/elf.h @@ -0,0 +1,110 @@ +#ifndef __ASM_AVR32_ELF_H +#define __ASM_AVR32_ELF_H + +/* AVR32 relocation numbers */ +#define R_AVR32_NONE 0 +#define R_AVR32_32 1 +#define R_AVR32_16 2 +#define R_AVR32_8 3 +#define R_AVR32_32_PCREL 4 +#define R_AVR32_16_PCREL 5 +#define R_AVR32_8_PCREL 6 +#define R_AVR32_DIFF32 7 +#define R_AVR32_DIFF16 8 +#define R_AVR32_DIFF8 9 +#define R_AVR32_GOT32 10 +#define R_AVR32_GOT16 11 +#define R_AVR32_GOT8 12 +#define R_AVR32_21S 13 +#define R_AVR32_16U 14 +#define R_AVR32_16S 15 +#define R_AVR32_8S 16 +#define R_AVR32_8S_EXT 17 +#define R_AVR32_22H_PCREL 18 +#define R_AVR32_18W_PCREL 19 +#define R_AVR32_16B_PCREL 20 +#define R_AVR32_16N_PCREL 21 +#define R_AVR32_14UW_PCREL 22 +#define R_AVR32_11H_PCREL 23 +#define R_AVR32_10UW_PCREL 24 +#define R_AVR32_9H_PCREL 25 +#define R_AVR32_9UW_PCREL 26 +#define R_AVR32_HI16 27 +#define R_AVR32_LO16 28 +#define R_AVR32_GOTPC 29 +#define R_AVR32_GOTCALL 30 +#define R_AVR32_LDA_GOT 31 +#define R_AVR32_GOT21S 32 +#define R_AVR32_GOT18SW 33 +#define R_AVR32_GOT16S 34 +#define R_AVR32_GOT7UW 35 +#define R_AVR32_32_CPENT 36 +#define R_AVR32_CPCALL 37 +#define R_AVR32_16_CP 38 +#define R_AVR32_9W_CP 39 +#define R_AVR32_RELATIVE 40 +#define R_AVR32_GLOB_DAT 41 +#define R_AVR32_JMP_SLOT 42 +#define R_AVR32_ALIGN 43 + +/* + * ELF register definitions.. + */ + +#include +#include + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct pt_regs) / sizeof (elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_fpu_struct elf_fpregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ( (x)->e_machine == EM_AVR32 ) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#ifdef __LITTLE_ENDIAN__ +#define ELF_DATA ELFDATA2LSB +#else +#define ELF_DATA ELFDATA2MSB +#endif +#define ELF_ARCH EM_AVR32 + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) + + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (0) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM (NULL) + +#ifdef __KERNEL__ +#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +#endif + +#endif /* __ASM_AVR32_ELF_H */ diff --git a/include/asm-avr32/emergency-restart.h b/include/asm-avr32/emergency-restart.h new file mode 100644 index 000000000000..3e7e014776ba --- /dev/null +++ b/include/asm-avr32/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_EMERGENCY_RESTART_H +#define __ASM_AVR32_EMERGENCY_RESTART_H + +#include + +#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */ diff --git a/include/asm-avr32/errno.h b/include/asm-avr32/errno.h new file mode 100644 index 000000000000..558a7249f06d --- /dev/null +++ b/include/asm-avr32/errno.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_ERRNO_H +#define __ASM_AVR32_ERRNO_H + +#include + +#endif /* __ASM_AVR32_ERRNO_H */ diff --git a/include/asm-avr32/fcntl.h b/include/asm-avr32/fcntl.h new file mode 100644 index 000000000000..14c0c4402b11 --- /dev/null +++ b/include/asm-avr32/fcntl.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_FCNTL_H +#define __ASM_AVR32_FCNTL_H + +#include + +#endif /* __ASM_AVR32_FCNTL_H */ diff --git a/include/asm-avr32/futex.h b/include/asm-avr32/futex.h new file mode 100644 index 000000000000..10419f14a68a --- /dev/null +++ b/include/asm-avr32/futex.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_FUTEX_H +#define __ASM_AVR32_FUTEX_H + +#include + +#endif /* __ASM_AVR32_FUTEX_H */ diff --git a/include/asm-avr32/hardirq.h b/include/asm-avr32/hardirq.h new file mode 100644 index 000000000000..267354356f60 --- /dev/null +++ b/include/asm-avr32/hardirq.h @@ -0,0 +1,34 @@ +#ifndef __ASM_AVR32_HARDIRQ_H +#define __ASM_AVR32_HARDIRQ_H + +#include +#include + +#ifndef __ASSEMBLY__ + +#include + +/* entry.S is sensitive to the offsets of these fields */ +typedef struct { + unsigned int __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +void ack_bad_irq(unsigned int irq); + +/* Standard mappings for irq_cpustat_t above */ +#include + +#endif /* __ASSEMBLY__ */ + +#define HARDIRQ_BITS 12 + +/* + * The hardirq mask has to be large enough to have + * space for potentially all IRQ sources in the system + * nesting on a single CPU: + */ +#if (1 << HARDIRQ_BITS) < NR_IRQS +# error HARDIRQ_BITS is too low! +#endif + +#endif /* __ASM_AVR32_HARDIRQ_H */ diff --git a/include/asm-avr32/hw_irq.h b/include/asm-avr32/hw_irq.h new file mode 100644 index 000000000000..218b0a6bfd1b --- /dev/null +++ b/include/asm-avr32/hw_irq.h @@ -0,0 +1,9 @@ +#ifndef __ASM_AVR32_HW_IRQ_H +#define __ASM_AVR32_HW_IRQ_H + +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) +{ + /* Nothing to do */ +} + +#endif /* __ASM_AVR32_HW_IRQ_H */ diff --git a/include/asm-avr32/intc.h b/include/asm-avr32/intc.h new file mode 100644 index 000000000000..1ac9ca75e8fd --- /dev/null +++ b/include/asm-avr32/intc.h @@ -0,0 +1,128 @@ +#ifndef __ASM_AVR32_INTC_H +#define __ASM_AVR32_INTC_H + +#include +#include + +struct irq_controller; +struct irqaction; +struct pt_regs; + +struct platform_device; + +/* Information about the internal interrupt controller */ +struct intc_device { + /* ioremapped address of configuration block */ + void __iomem *regs; + + /* the physical device */ + struct platform_device *pdev; + + /* Number of interrupt lines per group. */ + unsigned int irqs_per_group; + + /* The highest group ID + 1 */ + unsigned int nr_groups; + + /* + * Bitfield indicating which groups are actually in use. The + * size of the array is + * ceil(group_max / (8 * sizeof(unsigned int))). + */ + unsigned int group_mask[]; +}; + +struct irq_controller_class { + /* + * A short name identifying this kind of controller. + */ + const char *typename; + /* + * Handle the IRQ. Must do any necessary acking and masking. + */ + irqreturn_t (*handle)(int irq, void *dev_id, struct pt_regs *regs); + /* + * Register a new IRQ handler. + */ + int (*setup)(struct irq_controller *ctrl, unsigned int irq, + struct irqaction *action); + /* + * Unregister a IRQ handler. + */ + void (*free)(struct irq_controller *ctrl, unsigned int irq, + void *dev_id); + /* + * Mask the IRQ in the interrupt controller. + */ + void (*mask)(struct irq_controller *ctrl, unsigned int irq); + /* + * Unmask the IRQ in the interrupt controller. + */ + void (*unmask)(struct irq_controller *ctrl, unsigned int irq); + /* + * Set the type of the IRQ. See below for possible types. + * Return -EINVAL if a given type is not supported + */ + int (*set_type)(struct irq_controller *ctrl, unsigned int irq, + unsigned int type); + /* + * Return the IRQ type currently set + */ + unsigned int (*get_type)(struct irq_controller *ctrl, unsigned int irq); +}; + +struct irq_controller { + struct irq_controller_class *class; + unsigned int irq_group; + unsigned int first_irq; + unsigned int nr_irqs; + struct list_head list; +}; + +struct intc_group_desc { + struct irq_controller *ctrl; + irqreturn_t (*handle)(int, void *, struct pt_regs *); + unsigned long flags; + void *dev_id; + const char *devname; +}; + +/* + * The internal interrupt controller. Defined in board/part-specific + * devices.c. + * TODO: Should probably be defined per-cpu. + */ +extern struct intc_device intc; + +extern int request_internal_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, void *dev_id); +extern void free_internal_irq(unsigned int irq); + +/* Only used by time_init() */ +extern int setup_internal_irq(unsigned int irq, struct intc_group_desc *desc); + +/* + * Set interrupt priority for a given group. `group' can be found by + * using irq_to_group(irq). Priority can be from 0 (lowest) to 3 + * (highest). Higher-priority interrupts will preempt lower-priority + * interrupts (unless interrupts are masked globally). + * + * This function does not check for conflicts within a group. + */ +extern int intc_set_priority(unsigned int group, + unsigned int priority); + +/* + * Returns a bitmask of pending interrupts in a group. + */ +extern unsigned long intc_get_pending(unsigned int group); + +/* + * Register a new external interrupt controller. Returns the first + * external IRQ number that is assigned to the new controller. + */ +extern int intc_register_controller(struct irq_controller *ctrl); + +#endif /* __ASM_AVR32_INTC_H */ diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h new file mode 100644 index 000000000000..2fc8f111dce9 --- /dev/null +++ b/include/asm-avr32/io.h @@ -0,0 +1,253 @@ +#ifndef __ASM_AVR32_IO_H +#define __ASM_AVR32_IO_H + +#include + +#ifdef __KERNEL__ + +#include +#include + +/* virt_to_phys will only work when address is in P1 or P2 */ +static __inline__ unsigned long virt_to_phys(volatile void *address) +{ + return PHYSADDR(address); +} + +static __inline__ void * phys_to_virt(unsigned long address) +{ + return (void *)P1SEGADDR(address); +} + +#define cached_to_phys(addr) ((unsigned long)PHYSADDR(addr)) +#define uncached_to_phys(addr) ((unsigned long)PHYSADDR(addr)) +#define phys_to_cached(addr) ((void *)P1SEGADDR(addr)) +#define phys_to_uncached(addr) ((void *)P2SEGADDR(addr)) + +/* + * Generic IO read/write. These perform native-endian accesses. Note + * that some architectures will want to re-define __raw_{read,write}w. + */ +extern void __raw_writesb(unsigned int addr, const void *data, int bytelen); +extern void __raw_writesw(unsigned int addr, const void *data, int wordlen); +extern void __raw_writesl(unsigned int addr, const void *data, int longlen); + +extern void __raw_readsb(unsigned int addr, void *data, int bytelen); +extern void __raw_readsw(unsigned int addr, void *data, int wordlen); +extern void __raw_readsl(unsigned int addr, void *data, int longlen); + +static inline void writeb(unsigned char b, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *)addr = b; +} +static inline void writew(unsigned short b, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *)addr = b; +} +static inline void writel(unsigned int b, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *)addr = b; +} +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +static inline unsigned char readb(const volatile void __iomem *addr) +{ + return *(const volatile unsigned char __force *)addr; +} +static inline unsigned short readw(const volatile void __iomem *addr) +{ + return *(const volatile unsigned short __force *)addr; +} +static inline unsigned int readl(const volatile void __iomem *addr) +{ + return *(const volatile unsigned int __force *)addr; +} +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writesb(p, d, l) __raw_writesb((unsigned int)p, d, l) +#define writesw(p, d, l) __raw_writesw((unsigned int)p, d, l) +#define writesl(p, d, l) __raw_writesl((unsigned int)p, d, l) + +#define readsb(p, d, l) __raw_readsb((unsigned int)p, d, l) +#define readsw(p, d, l) __raw_readsw((unsigned int)p, d, l) +#define readsl(p, d, l) __raw_readsl((unsigned int)p, d, l) + +/* + * These two are only here because ALSA _thinks_ it needs them... + */ +static inline void memcpy_fromio(void * to, const volatile void __iomem *from, + unsigned long count) +{ + char *p = to; + while (count) { + count--; + *p = readb(from); + p++; + from++; + } +} + +static inline void memcpy_toio(volatile void __iomem *to, const void * from, + unsigned long count) +{ + const char *p = from; + while (count) { + count--; + writeb(*p, to); + p++; + to++; + } +} + +static inline void memset_io(volatile void __iomem *addr, unsigned char val, + unsigned long count) +{ + memset((void __force *)addr, val, count); +} + +/* + * Bad read/write accesses... + */ +extern void __readwrite_bug(const char *fn); + +#define IO_SPACE_LIMIT 0xffffffff + +/* Convert I/O port address to virtual address */ +#define __io(p) ((void __iomem *)phys_to_uncached(p)) + +/* + * IO port access primitives + * ------------------------- + * + * The AVR32 doesn't have special IO access instructions; all IO is memory + * mapped. Note that these are defined to perform little endian accesses + * only. Their primary purpose is to access PCI and ISA peripherals. + * + * Note that for a big endian machine, this implies that the following + * big endian mode connectivity is in place. + * + * The machine specific io.h include defines __io to translate an "IO" + * address to a memory address. + * + * Note that we prevent GCC re-ordering or caching values in expressions + * by introducing sequence points into the in*() definitions. Note that + * __raw_* do not guarantee this behaviour. + * + * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space. + */ +#define outb(v, p) __raw_writeb(v, __io(p)) +#define outw(v, p) __raw_writew(cpu_to_le16(v), __io(p)) +#define outl(v, p) __raw_writel(cpu_to_le32(v), __io(p)) + +#define inb(p) __raw_readb(__io(p)) +#define inw(p) le16_to_cpu(__raw_readw(__io(p))) +#define inl(p) le32_to_cpu(__raw_readl(__io(p))) + +static inline void __outsb(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + outb(*(u8 *)addr, port); + addr++; + } +} + +static inline void __insb(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + *(u8 *)addr = inb(port); + addr++; + } +} + +static inline void __outsw(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + outw(*(u16 *)addr, port); + addr += 2; + } +} + +static inline void __insw(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + *(u16 *)addr = inw(port); + addr += 2; + } +} + +static inline void __outsl(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + outl(*(u32 *)addr, port); + addr += 4; + } +} + +static inline void __insl(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + *(u32 *)addr = inl(port); + addr += 4; + } +} + +#define outsb(port, addr, count) __outsb(port, addr, count) +#define insb(port, addr, count) __insb(port, addr, count) +#define outsw(port, addr, count) __outsw(port, addr, count) +#define insw(port, addr, count) __insw(port, addr, count) +#define outsl(port, addr, count) __outsl(port, addr, count) +#define insl(port, addr, count) __insl(port, addr, count) + +extern void __iomem *__ioremap(unsigned long offset, size_t size, + unsigned long flags); +extern void __iounmap(void __iomem *addr); + +/* + * ioremap - map bus memory into CPU space + * @offset bus address of the memory + * @size size of the resource to map + * + * ioremap performs a platform specific sequence of operations to make + * bus memory CPU accessible via the readb/.../writel functions and + * the other mmio helpers. The returned address is not guaranteed to + * be usable directly as a virtual address. + */ +#define ioremap(offset, size) \ + __ioremap((offset), (size), 0) + +#define iounmap(addr) \ + __iounmap(addr) + +#define cached(addr) P1SEGADDR(addr) +#define uncached(addr) P2SEGADDR(addr) + +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt +#define page_to_bus page_to_phys +#define bus_to_page phys_to_page + +#define dma_cache_wback_inv(_start, _size) \ + flush_dcache_region(_start, _size) +#define dma_cache_inv(_start, _size) \ + invalidate_dcache_region(_start, _size) +#define dma_cache_wback(_start, _size) \ + clean_dcache_region(_start, _size) + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ + +#endif /* __ASM_AVR32_IO_H */ diff --git a/include/asm-avr32/ioctl.h b/include/asm-avr32/ioctl.h new file mode 100644 index 000000000000..c8472c1398ef --- /dev/null +++ b/include/asm-avr32/ioctl.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_IOCTL_H +#define __ASM_AVR32_IOCTL_H + +#include + +#endif /* __ASM_AVR32_IOCTL_H */ diff --git a/include/asm-avr32/ioctls.h b/include/asm-avr32/ioctls.h new file mode 100644 index 000000000000..0500426b7186 --- /dev/null +++ b/include/asm-avr32/ioctls.h @@ -0,0 +1,83 @@ +#ifndef __ASM_AVR32_IOCTLS_H +#define __ASM_AVR32_IOCTLS_H + +#include + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */ +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ + +#define FIONCLEX 0x5450 +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ +#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* __ASM_AVR32_IOCTLS_H */ diff --git a/include/asm-avr32/ipcbuf.h b/include/asm-avr32/ipcbuf.h new file mode 100644 index 000000000000..1552c9698f5e --- /dev/null +++ b/include/asm-avr32/ipcbuf.h @@ -0,0 +1,29 @@ +#ifndef __ASM_AVR32_IPCBUF_H +#define __ASM_AVR32_IPCBUF_H + +/* +* The user_ipc_perm structure for AVR32 architecture. +* Note extra padding because this structure is passed back and forth +* between kernel and user space. +* +* Pad space is left for: +* - 32-bit mode_t and seq +* - 2 miscellaneous 32-bit values +*/ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __ASM_AVR32_IPCBUF_H */ diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h new file mode 100644 index 000000000000..f7e725707dd7 --- /dev/null +++ b/include/asm-avr32/irq.h @@ -0,0 +1,10 @@ +#ifndef __ASM_AVR32_IRQ_H +#define __ASM_AVR32_IRQ_H + +#define NR_INTERNAL_IRQS 64 +#define NR_EXTERNAL_IRQS 64 +#define NR_IRQS (NR_INTERNAL_IRQS + NR_EXTERNAL_IRQS) + +#define irq_canonicalize(i) (i) + +#endif /* __ASM_AVR32_IOCTLS_H */ diff --git a/include/asm-avr32/irqflags.h b/include/asm-avr32/irqflags.h new file mode 100644 index 000000000000..93570daac38a --- /dev/null +++ b/include/asm-avr32/irqflags.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_IRQFLAGS_H +#define __ASM_AVR32_IRQFLAGS_H + +#include + +static inline unsigned long __raw_local_save_flags(void) +{ + return sysreg_read(SR); +} + +#define raw_local_save_flags(x) \ + do { (x) = __raw_local_save_flags(); } while (0) + +/* + * This will restore ALL status register flags, not only the interrupt + * mask flag. + * + * The empty asm statement informs the compiler of this fact while + * also serving as a barrier. + */ +static inline void raw_local_irq_restore(unsigned long flags) +{ + sysreg_write(SR, flags); + asm volatile("" : : : "memory", "cc"); +} + +static inline void raw_local_irq_disable(void) +{ + asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory"); +} + +static inline void raw_local_irq_enable(void) +{ + asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory"); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return (flags & SYSREG_BIT(GM)) != 0; +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +#endif /* __ASM_AVR32_IRQFLAGS_H */ diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h new file mode 100644 index 000000000000..f583b643ffb2 --- /dev/null +++ b/include/asm-avr32/kdebug.h @@ -0,0 +1,38 @@ +#ifndef __ASM_AVR32_KDEBUG_H +#define __ASM_AVR32_KDEBUG_H + +#include + +struct pt_regs; + +struct die_args { + struct pt_regs *regs; + int trapnr; +}; + +int register_die_notifier(struct notifier_block *nb); +int unregister_die_notifier(struct notifier_block *nb); +int register_page_fault_notifier(struct notifier_block *nb); +int unregister_page_fault_notifier(struct notifier_block *nb); +extern struct atomic_notifier_head avr32_die_chain; + +/* Grossly misnamed. */ +enum die_val { + DIE_FAULT, + DIE_BREAKPOINT, + DIE_SSTEP, + DIE_PAGE_FAULT, +}; + +static inline int notify_die(enum die_val val, struct pt_regs *regs, + int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .trapnr = trap, + }; + + return atomic_notifier_call_chain(&avr32_die_chain, val, &args); +} + +#endif /* __ASM_AVR32_KDEBUG_H */ diff --git a/include/asm-avr32/kmap_types.h b/include/asm-avr32/kmap_types.h new file mode 100644 index 000000000000..b7f5c6870107 --- /dev/null +++ b/include/asm-avr32/kmap_types.h @@ -0,0 +1,30 @@ +#ifndef __ASM_AVR32_KMAP_TYPES_H +#define __ASM_AVR32_KMAP_TYPES_H + +#ifdef CONFIG_DEBUG_HIGHMEM +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_PTE2, +D(10) KM_IRQ0, +D(11) KM_IRQ1, +D(12) KM_SOFTIRQ0, +D(13) KM_SOFTIRQ1, +D(14) KM_TYPE_NR +}; + +#undef D + +#endif /* __ASM_AVR32_KMAP_TYPES_H */ diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h new file mode 100644 index 000000000000..09a5cbe2f896 --- /dev/null +++ b/include/asm-avr32/kprobes.h @@ -0,0 +1,34 @@ +/* + * Kernel Probes (KProbes) + * + * Copyright (C) 2005-2006 Atmel Corporation + * Copyright (C) IBM Corporation, 2002, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_KPROBES_H +#define __ASM_AVR32_KPROBES_H + +#include + +typedef u16 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */ +#define MAX_INSN_SIZE 2 + +#define ARCH_INACTIVE_KPROBE_COUNT 1 + +#define arch_remove_kprobe(p) do { } while (0) + +/* Architecture specific copy of original instruction */ +struct arch_specific_insn { + kprobe_opcode_t insn[MAX_INSN_SIZE]; +}; + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#define flush_insn_slot(p) do { } while (0) + +#endif /* __ASM_AVR32_KPROBES_H */ diff --git a/include/asm-avr32/linkage.h b/include/asm-avr32/linkage.h new file mode 100644 index 000000000000..f7b285e910d4 --- /dev/null +++ b/include/asm-avr32/linkage.h @@ -0,0 +1,7 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .balign 2 +#define __ALIGN_STR ".balign 2" + +#endif /* __ASM_LINKAGE_H */ diff --git a/include/asm-avr32/local.h b/include/asm-avr32/local.h new file mode 100644 index 000000000000..1c1619694da3 --- /dev/null +++ b/include/asm-avr32/local.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_LOCAL_H +#define __ASM_AVR32_LOCAL_H + +#include + +#endif /* __ASM_AVR32_LOCAL_H */ diff --git a/include/asm-avr32/mach/serial_at91.h b/include/asm-avr32/mach/serial_at91.h new file mode 100644 index 000000000000..1290bb32802d --- /dev/null +++ b/include/asm-avr32/mach/serial_at91.h @@ -0,0 +1,33 @@ +/* + * linux/include/asm-arm/mach/serial_at91.h + * + * Based on serial_sa1100.h by Nicolas Pitre + * + * Copyright (C) 2002 ATMEL Rousset + * + * Low level machine dependent UART functions. + */ + +struct uart_port; + +/* + * This is a temporary structure for registering these + * functions; it is intended to be discarded after boot. + */ +struct at91_port_fns { + void (*set_mctrl)(struct uart_port *, u_int); + u_int (*get_mctrl)(struct uart_port *); + void (*enable_ms)(struct uart_port *); + void (*pm)(struct uart_port *, u_int, u_int); + int (*set_wake)(struct uart_port *, u_int); + int (*open)(struct uart_port *); + void (*close)(struct uart_port *); +}; + +#if defined(CONFIG_SERIAL_AT91) +void at91_register_uart_fns(struct at91_port_fns *fns); +#else +#define at91_register_uart_fns(fns) do { } while (0) +#endif + + diff --git a/include/asm-avr32/mman.h b/include/asm-avr32/mman.h new file mode 100644 index 000000000000..648f91e7187a --- /dev/null +++ b/include/asm-avr32/mman.h @@ -0,0 +1,17 @@ +#ifndef __ASM_AVR32_MMAN_H__ +#define __ASM_AVR32_MMAN_H__ + +#include + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* __ASM_AVR32_MMAN_H__ */ diff --git a/include/asm-avr32/mmu.h b/include/asm-avr32/mmu.h new file mode 100644 index 000000000000..60c2d2650d32 --- /dev/null +++ b/include/asm-avr32/mmu.h @@ -0,0 +1,10 @@ +#ifndef __ASM_AVR32_MMU_H +#define __ASM_AVR32_MMU_H + +/* Default "unsigned long" context */ +typedef unsigned long mm_context_t; + +#define MMU_ITLB_ENTRIES 64 +#define MMU_DTLB_ENTRIES 64 + +#endif /* __ASM_AVR32_MMU_H */ diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h new file mode 100644 index 000000000000..31add1ae8089 --- /dev/null +++ b/include/asm-avr32/mmu_context.h @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * ASID handling taken from SH implementation. + * Copyright (C) 1999 Niibe Yutaka + * Copyright (C) 2003 Paul Mundt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_MMU_CONTEXT_H +#define __ASM_AVR32_MMU_CONTEXT_H + +#include +#include +#include + +/* + * The MMU "context" consists of two things: + * (a) TLB cache version + * (b) ASID (Address Space IDentifier) + */ +#define MMU_CONTEXT_ASID_MASK 0x000000ff +#define MMU_CONTEXT_VERSION_MASK 0xffffff00 +#define MMU_CONTEXT_FIRST_VERSION 0x00000100 +#define NO_CONTEXT 0 + +#define MMU_NO_ASID 0x100 + +/* Virtual Page Number mask */ +#define MMU_VPN_MASK 0xfffff000 + +/* Cache of MMU context last used */ +extern unsigned long mmu_context_cache; + +/* + * Get MMU context if needed + */ +static inline void +get_mmu_context(struct mm_struct *mm) +{ + unsigned long mc = mmu_context_cache; + + if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0) + /* It's up to date, do nothing */ + return; + + /* It's old, we need to get new context with new version */ + mc = ++mmu_context_cache; + if (!(mc & MMU_CONTEXT_ASID_MASK)) { + /* + * We have exhausted all ASIDs of this version. + * Flush the TLB and start new cycle. + */ + flush_tlb_all(); + /* + * Fix version. Note that we avoid version #0 + * to distinguish NO_CONTEXT. + */ + if (!mc) + mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION; + } + mm->context = mc; +} + +/* + * Initialize the context related info for a new mm_struct + * instance. + */ +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + mm->context = NO_CONTEXT; + return 0; +} + +/* + * Destroy context related info for an mm_struct that is about + * to be put to rest. + */ +static inline void destroy_context(struct mm_struct *mm) +{ + /* Do nothing */ +} + +static inline void set_asid(unsigned long asid) +{ + /* XXX: We're destroying TLBEHI[8:31] */ + sysreg_write(TLBEHI, asid & MMU_CONTEXT_ASID_MASK); + cpu_sync_pipeline(); +} + +static inline unsigned long get_asid(void) +{ + unsigned long asid; + + asid = sysreg_read(TLBEHI); + return asid & MMU_CONTEXT_ASID_MASK; +} + +static inline void activate_context(struct mm_struct *mm) +{ + get_mmu_context(mm); + set_asid(mm->context & MMU_CONTEXT_ASID_MASK); +} + +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + if (likely(prev != next)) { + unsigned long __pgdir = (unsigned long)next->pgd; + + sysreg_write(PTBR, __pgdir); + activate_context(next); + } +} + +#define deactivate_mm(tsk,mm) do { } while(0) + +#define activate_mm(prev, next) switch_mm((prev), (next), NULL) + +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + + +static inline void enable_mmu(void) +{ + sysreg_write(MMUCR, (SYSREG_BIT(MMUCR_S) + | SYSREG_BIT(E) + | SYSREG_BIT(MMUCR_I))); + nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); + + if (mmu_context_cache == NO_CONTEXT) + mmu_context_cache = MMU_CONTEXT_FIRST_VERSION; + + set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK); +} + +static inline void disable_mmu(void) +{ + sysreg_write(MMUCR, SYSREG_BIT(MMUCR_S)); +} + +#endif /* __ASM_AVR32_MMU_CONTEXT_H */ diff --git a/include/asm-avr32/module.h b/include/asm-avr32/module.h new file mode 100644 index 000000000000..451444538a1b --- /dev/null +++ b/include/asm-avr32/module.h @@ -0,0 +1,28 @@ +#ifndef __ASM_AVR32_MODULE_H +#define __ASM_AVR32_MODULE_H + +struct mod_arch_syminfo { + unsigned long got_offset; + int got_initialized; +}; + +struct mod_arch_specific { + /* Starting offset of got in the module core memory. */ + unsigned long got_offset; + /* Size of the got. */ + unsigned long got_size; + /* Number of symbols in syminfo. */ + int nsyms; + /* Additional symbol information (got offsets). */ + struct mod_arch_syminfo *syminfo; +}; + +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr + +#define MODULE_PROC_FAMILY "AVR32v1" + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY + +#endif /* __ASM_AVR32_MODULE_H */ diff --git a/include/asm-avr32/msgbuf.h b/include/asm-avr32/msgbuf.h new file mode 100644 index 000000000000..ac18bc4da7f7 --- /dev/null +++ b/include/asm-avr32/msgbuf.h @@ -0,0 +1,31 @@ +#ifndef __ASM_AVR32_MSGBUF_H +#define __ASM_AVR32_MSGBUF_H + +/* + * The msqid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + unsigned long __unused1; + __kernel_time_t msg_rtime; /* last msgrcv time */ + unsigned long __unused2; + __kernel_time_t msg_ctime; /* last change time */ + unsigned long __unused3; + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* __ASM_AVR32_MSGBUF_H */ diff --git a/include/asm-avr32/mutex.h b/include/asm-avr32/mutex.h new file mode 100644 index 000000000000..458c1f7fbc18 --- /dev/null +++ b/include/asm-avr32/mutex.h @@ -0,0 +1,9 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h new file mode 100644 index 000000000000..f0a26de06cab --- /dev/null +++ b/include/asm-avr32/namei.h @@ -0,0 +1,7 @@ +#ifndef __ASM_AVR32_NAMEI_H +#define __ASM_AVR32_NAMEI_H + +/* This dummy routine may be changed to something useful */ +#define __emul_prefix() NULL + +#endif /* __ASM_AVR32_NAMEI_H */ diff --git a/include/asm-avr32/numnodes.h b/include/asm-avr32/numnodes.h new file mode 100644 index 000000000000..0b864d7ce330 --- /dev/null +++ b/include/asm-avr32/numnodes.h @@ -0,0 +1,7 @@ +#ifndef __ASM_AVR32_NUMNODES_H +#define __ASM_AVR32_NUMNODES_H + +/* Max 4 nodes */ +#define NODES_SHIFT 2 + +#endif /* __ASM_AVR32_NUMNODES_H */ diff --git a/include/asm-avr32/ocd.h b/include/asm-avr32/ocd.h new file mode 100644 index 000000000000..46f73180a127 --- /dev/null +++ b/include/asm-avr32/ocd.h @@ -0,0 +1,78 @@ +/* + * AVR32 OCD Registers + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_OCD_H +#define __ASM_AVR32_OCD_H + +/* Debug Registers */ +#define DBGREG_DID 0 +#define DBGREG_DC 8 +#define DBGREG_DS 16 +#define DBGREG_RWCS 28 +#define DBGREG_RWA 36 +#define DBGREG_RWD 40 +#define DBGREG_WT 44 +#define DBGREG_DTC 52 +#define DBGREG_DTSA0 56 +#define DBGREG_DTSA1 60 +#define DBGREG_DTEA0 72 +#define DBGREG_DTEA1 76 +#define DBGREG_BWC0A 88 +#define DBGREG_BWC0B 92 +#define DBGREG_BWC1A 96 +#define DBGREG_BWC1B 100 +#define DBGREG_BWC2A 104 +#define DBGREG_BWC2B 108 +#define DBGREG_BWC3A 112 +#define DBGREG_BWC3B 116 +#define DBGREG_BWA0A 120 +#define DBGREG_BWA0B 124 +#define DBGREG_BWA1A 128 +#define DBGREG_BWA1B 132 +#define DBGREG_BWA2A 136 +#define DBGREG_BWA2B 140 +#define DBGREG_BWA3A 144 +#define DBGREG_BWA3B 148 +#define DBGREG_BWD3A 153 +#define DBGREG_BWD3B 156 + +#define DBGREG_PID 284 + +#define SABAH_OCD 0x01 +#define SABAH_ICACHE 0x02 +#define SABAH_MEM_CACHED 0x04 +#define SABAH_MEM_UNCACHED 0x05 + +/* Fields in the Development Control register */ +#define DC_SS_BIT 8 + +#define DC_SS (1 << DC_SS_BIT) +#define DC_DBE (1 << 13) +#define DC_RID (1 << 27) +#define DC_ORP (1 << 28) +#define DC_MM (1 << 29) +#define DC_RES (1 << 30) + +/* Fields in the Development Status register */ +#define DS_SSS (1 << 0) +#define DS_SWB (1 << 1) +#define DS_HWB (1 << 2) +#define DS_BP_SHIFT 8 +#define DS_BP_MASK (0xff << DS_BP_SHIFT) + +#define __mfdr(addr) \ +({ \ + register unsigned long value; \ + asm volatile("mfdr %0, %1" : "=r"(value) : "i"(addr)); \ + value; \ +}) +#define __mtdr(addr, value) \ + asm volatile("mtdr %0, %1" : : "i"(addr), "r"(value)) + +#endif /* __ASM_AVR32_OCD_H */ diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h new file mode 100644 index 000000000000..0f630b3e9932 --- /dev/null +++ b/include/asm-avr32/page.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PAGE_H +#define __ASM_AVR32_PAGE_H + +#ifdef __KERNEL__ + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#ifdef __ASSEMBLY__ +#define PAGE_SIZE (1 << PAGE_SHIFT) +#else +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#endif +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PTE_MASK PAGE_MASK + +#ifndef __ASSEMBLY__ + +#include + +extern void clear_page(void *to); +extern void copy_page(void *to, void *from); + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned long pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) + +/* FIXME: These should be removed soon */ +extern unsigned long memory_start, memory_end; + +/* Pure 2^n version of get_order */ +static inline int get_order(unsigned long size) +{ + unsigned lz; + + size = (size - 1) >> PAGE_SHIFT; + asm("clz %0, %1" : "=r"(lz) : "r"(size)); + return 32 - lz; +} + +#endif /* !__ASSEMBLY__ */ + +/* Align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) + +/* + * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff + * permanently to the physical addresses 0x00000000 -> 0x1fffffff when + * segmentation is enabled. We want to make use of this in order to + * minimize TLB pressure. + */ +#define PAGE_OFFSET (0x80000000UL) + +/* + * ALSA uses virt_to_page() on DMA pages, which I'm not entirely sure + * is a good idea. Anyway, we can't simply subtract PAGE_OFFSET here + * in that case, so we'll have to mask out the three most significant + * bits of the address instead... + * + * What's the difference between __pa() and virt_to_phys() anyway? + */ +#define __pa(x) PHYSADDR(x) +#define __va(x) ((void *)(P1SEGADDR(x))) + +#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT) + +#define phys_to_page(phys) (pfn_to_page(phys >> PAGE_SHIFT)) +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) + +#ifndef CONFIG_NEED_MULTIPLE_NODES + +#define PHYS_PFN_OFFSET (CONFIG_PHYS_OFFSET >> PAGE_SHIFT) + +#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET) +#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr)) +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +/* + * Memory above this physical address will be considered highmem. + */ +#define HIGHMEM_START 0x20000000UL + +#endif /* __KERNEL__ */ + +#endif /* __ASM_AVR32_PAGE_H */ diff --git a/include/asm-avr32/param.h b/include/asm-avr32/param.h new file mode 100644 index 000000000000..34bc8d4c3b29 --- /dev/null +++ b/include/asm-avr32/param.h @@ -0,0 +1,23 @@ +#ifndef __ASM_AVR32_PARAM_H +#define __ASM_AVR32_PARAM_H + +#ifdef __KERNEL__ +# define HZ CONFIG_HZ +# define USER_HZ 100 /* User interfaces are in "ticks" */ +# define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */ +#endif + +#ifndef HZ +# define HZ 100 +#endif + +/* TODO: Should be configurable */ +#define EXEC_PAGESIZE 4096 + +#ifndef NOGROUP +# define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 + +#endif /* __ASM_AVR32_PARAM_H */ diff --git a/include/asm-avr32/pci.h b/include/asm-avr32/pci.h new file mode 100644 index 000000000000..0f5f134b896a --- /dev/null +++ b/include/asm-avr32/pci.h @@ -0,0 +1,8 @@ +#ifndef __ASM_AVR32_PCI_H__ +#define __ASM_AVR32_PCI_H__ + +/* We don't support PCI yet, but some drivers require this file anyway */ + +#define PCI_DMA_BUS_IS_PHYS (1) + +#endif /* __ASM_AVR32_PCI_H__ */ diff --git a/include/asm-avr32/percpu.h b/include/asm-avr32/percpu.h new file mode 100644 index 000000000000..69227b4cd0d4 --- /dev/null +++ b/include/asm-avr32/percpu.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_PERCPU_H +#define __ASM_AVR32_PERCPU_H + +#include + +#endif /* __ASM_AVR32_PERCPU_H */ diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h new file mode 100644 index 000000000000..7492cfb92ced --- /dev/null +++ b/include/asm-avr32/pgalloc.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PGALLOC_H +#define __ASM_AVR32_PGALLOC_H + +#include +#include +#include +#include + +#define pmd_populate_kernel(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) + +static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + struct page *pte) +{ + set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte))); +} + +/* + * Allocate and free page tables + */ +static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm) +{ + unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t)); + pgd_t *pgd = (pgd_t *)kmalloc(pgd_size, GFP_KERNEL); + + if (pgd) + memset(pgd, 0, pgd_size); + + return pgd; +} + +static inline void pgd_free(pgd_t *pgd) +{ + kfree(pgd); +} + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + int count = 0; + pte_t *pte; + + do { + pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); + if (pte) + clear_page(pte); + else { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + } + } while (!pte && (count++ < 10)); + + return pte; +} + +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + int count = 0; + struct page *pte; + + do { + pte = alloc_pages(GFP_KERNEL, 0); + if (pte) + clear_page(page_address(pte)); + else { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + } + } while (!pte && (count++ < 10)); + + return pte; +} + +static inline void pte_free_kernel(pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static inline void pte_free(struct page *pte) +{ + __free_page(pte); +} + +#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) + +#define check_pgt_cache() do { } while(0) + +#endif /* __ASM_AVR32_PGALLOC_H */ diff --git a/include/asm-avr32/pgtable-2level.h b/include/asm-avr32/pgtable-2level.h new file mode 100644 index 000000000000..425dd567b5b9 --- /dev/null +++ b/include/asm-avr32/pgtable-2level.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ASM_AVR32_PGTABLE_2LEVEL_H + +#include + +/* + * Traditional 2-level paging structure + */ +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +#define PTRS_PER_PTE 1024 + +#ifndef __ASSEMBLY__ +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#define set_pte(pteptr, pteval) (*(pteptr) = pteval) +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep, pteval) + +/* + * (pmds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) + +#define pte_pfn(x) ((unsigned long)(((x).pte >> PAGE_SHIFT))) +#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PGTABLE_2LEVEL_H */ diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h new file mode 100644 index 000000000000..6b8ca9db2bd5 --- /dev/null +++ b/include/asm-avr32/pgtable.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PGTABLE_H +#define __ASM_AVR32_PGTABLE_H + +#include + +#ifndef __ASSEMBLY__ +#include + +#endif /* !__ASSEMBLY__ */ + +/* + * Use two-level page tables just as the i386 (without PAE) + */ +#include + +/* + * The following code might need some cleanup when the values are + * final... + */ +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) +#define FIRST_USER_ADDRESS 0 + +#define PTE_PHYS_MASK 0x1ffff000 + +#ifndef __ASSEMBLY__ +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern void paging_init(void); + +/* + * ZERO_PAGE is a global shared page that is always zero: used for + * zero-mapped memory areas etc. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8 MiB value just means that there will be a 8 MiB "hole" + * after the uncached physical memory (P2 segment) until the vmalloc + * area starts. That means that any out-of-bounds memory accesses will + * hopefully be caught; we don't know if the end of the P1/P2 segments + * are actually used for anything, but it is anyway safer to let the + * MMU catch these kinds of errors than to rely on the memory bus. + * + * A "hole" of the same size is added to the end of the P3 segment as + * well. It might seem wasteful to use 16 MiB of virtual address space + * on this, but we do have 512 MiB of it... + * + * The vmalloc() routines leave a hole of 4 KiB between each vmalloced + * area for the same reason. + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) +#define VMALLOC_START (P3SEG + VMALLOC_OFFSET) +#define VMALLOC_END (P4SEG - VMALLOC_OFFSET) +#endif /* !__ASSEMBLY__ */ + +/* + * Page flags. Some of these flags are not directly supported by + * hardware, so we have to emulate them. + */ +#define _TLBEHI_BIT_VALID 9 +#define _TLBEHI_VALID (1 << _TLBEHI_BIT_VALID) + +#define _PAGE_BIT_WT 0 /* W-bit : write-through */ +#define _PAGE_BIT_DIRTY 1 /* D-bit : page changed */ +#define _PAGE_BIT_SZ0 2 /* SZ0-bit : Size of page */ +#define _PAGE_BIT_SZ1 3 /* SZ1-bit : Size of page */ +#define _PAGE_BIT_EXECUTE 4 /* X-bit : execute access allowed */ +#define _PAGE_BIT_RW 5 /* AP0-bit : write access allowed */ +#define _PAGE_BIT_USER 6 /* AP1-bit : user space access allowed */ +#define _PAGE_BIT_BUFFER 7 /* B-bit : bufferable */ +#define _PAGE_BIT_GLOBAL 8 /* G-bit : global (ignore ASID) */ +#define _PAGE_BIT_CACHABLE 9 /* C-bit : cachable */ + +/* If we drop support for 1K pages, we get two extra bits */ +#define _PAGE_BIT_PRESENT 10 +#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */ + +/* The following flags are only valid when !PRESENT */ +#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */ + +#define _PAGE_WT (1 << _PAGE_BIT_WT) +#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY) +#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE) +#define _PAGE_RW (1 << _PAGE_BIT_RW) +#define _PAGE_USER (1 << _PAGE_BIT_USER) +#define _PAGE_BUFFER (1 << _PAGE_BIT_BUFFER) +#define _PAGE_GLOBAL (1 << _PAGE_BIT_GLOBAL) +#define _PAGE_CACHABLE (1 << _PAGE_BIT_CACHABLE) + +/* Software flags */ +#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED) +#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT) +#define _PAGE_FILE (1 << _PAGE_BIT_FILE) + +/* + * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is + * usually called _PAGE_PROTNONE on other architectures. + * + * XXX: Find out if _PAGE_PROTNONE is equivalent with !_PAGE_USER. If + * so, we can encode all possible page sizes (although we can't really + * support 1K pages anyway due to the _PAGE_PRESENT and _PAGE_ACCESSED + * bits) + * + */ +#define _PAGE_TYPE_MASK ((1 << _PAGE_BIT_SZ0) | (1 << _PAGE_BIT_SZ1)) +#define _PAGE_TYPE_NONE (0 << _PAGE_BIT_SZ0) +#define _PAGE_TYPE_SMALL (1 << _PAGE_BIT_SZ0) +#define _PAGE_TYPE_MEDIUM (2 << _PAGE_BIT_SZ0) +#define _PAGE_TYPE_LARGE (3 << _PAGE_BIT_SZ0) + +/* + * Mask which drop software flags. We currently can't handle more than + * 512 MiB of physical memory, so we can use bits 29-31 for other + * stuff. With a fixed 4K page size, we can use bits 10-11 as well as + * bits 2-3 (SZ) + */ +#define _PAGE_FLAGS_HARDWARE_MASK 0xfffff3ff + +#define _PAGE_FLAGS_CACHE_MASK (_PAGE_CACHABLE | _PAGE_BUFFER | _PAGE_WT) + +/* TODO: Check for saneness */ +/* User-mode page table flags (to be set in a pgd or pmd entry) */ +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \ + | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +/* Kernel-mode page table flags */ +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \ + | _PAGE_ACCESSED | _PAGE_DIRTY) +/* Flags that may be modified by software */ +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY \ + | _PAGE_FLAGS_CACHE_MASK) + +#define _PAGE_FLAGS_READ (_PAGE_CACHABLE | _PAGE_BUFFER) +#define _PAGE_FLAGS_WRITE (_PAGE_FLAGS_READ | _PAGE_RW | _PAGE_DIRTY) + +#define _PAGE_NORMAL(x) __pgprot((x) | _PAGE_PRESENT | _PAGE_TYPE_SMALL \ + | _PAGE_ACCESSED) + +#define PAGE_NONE (_PAGE_ACCESSED | _PAGE_TYPE_NONE) +#define PAGE_READ (_PAGE_FLAGS_READ | _PAGE_USER) +#define PAGE_EXEC (_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_USER) +#define PAGE_WRITE (_PAGE_FLAGS_WRITE | _PAGE_USER) +#define PAGE_KERNEL _PAGE_NORMAL(_PAGE_FLAGS_WRITE | _PAGE_EXECUTE | _PAGE_GLOBAL) +#define PAGE_KERNEL_RO _PAGE_NORMAL(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_GLOBAL) + +#define _PAGE_P(x) _PAGE_NORMAL((x) & ~(_PAGE_RW | _PAGE_DIRTY)) +#define _PAGE_S(x) _PAGE_NORMAL(x) + +#define PAGE_COPY _PAGE_P(PAGE_WRITE | PAGE_READ) + +#ifndef __ASSEMBLY__ +/* + * The hardware supports flags for write- and execute access. Read is + * always allowed if the page is loaded into the TLB, so the "-w-", + * "--x" and "-wx" mappings are implemented as "rw-", "r-x" and "rwx", + * respectively. + * + * The "---" case is handled by software; the page will simply not be + * loaded into the TLB if the page type is _PAGE_TYPE_NONE. + */ + +#define __P000 __pgprot(PAGE_NONE) +#define __P001 _PAGE_P(PAGE_READ) +#define __P010 _PAGE_P(PAGE_WRITE) +#define __P011 _PAGE_P(PAGE_WRITE | PAGE_READ) +#define __P100 _PAGE_P(PAGE_EXEC) +#define __P101 _PAGE_P(PAGE_EXEC | PAGE_READ) +#define __P110 _PAGE_P(PAGE_EXEC | PAGE_WRITE) +#define __P111 _PAGE_P(PAGE_EXEC | PAGE_WRITE | PAGE_READ) + +#define __S000 __pgprot(PAGE_NONE) +#define __S001 _PAGE_S(PAGE_READ) +#define __S010 _PAGE_S(PAGE_WRITE) +#define __S011 _PAGE_S(PAGE_WRITE | PAGE_READ) +#define __S100 _PAGE_S(PAGE_EXEC) +#define __S101 _PAGE_S(PAGE_EXEC | PAGE_READ) +#define __S110 _PAGE_S(PAGE_EXEC | PAGE_WRITE) +#define __S111 _PAGE_S(PAGE_EXEC | PAGE_WRITE | PAGE_READ) + +#define pte_none(x) (!pte_val(x)) +#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) + +#define pte_clear(mm,addr,xp) \ + do { \ + set_pte_at(mm, addr, xp, __pte(0)); \ + } while (0) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) +{ + return pte_val(pte) & _PAGE_USER; +} +static inline int pte_write(pte_t pte) +{ + return pte_val(pte) & _PAGE_RW; +} +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXECUTE; +} +static inline int pte_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_DIRTY; +} +static inline int pte_young(pte_t pte) +{ + return pte_val(pte) & _PAGE_ACCESSED; +} + +/* + * The following only work if pte_present() is not true. + */ +static inline int pte_file(pte_t pte) +{ + return pte_val(pte) & _PAGE_FILE; +} + +/* Mutator functions for PTE bits */ +static inline pte_t pte_rdprotect(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); + return pte; +} +static inline pte_t pte_wrprotect(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); + return pte; +} +static inline pte_t pte_exprotect(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_EXECUTE)); + return pte; +} +static inline pte_t pte_mkclean(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); + return pte; +} +static inline pte_t pte_mkold(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); + return pte; +} +static inline pte_t pte_mkread(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); + return pte; +} +static inline pte_t pte_mkwrite(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); + return pte; +} +static inline pte_t pte_mkexec(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE)); + return pte; +} +static inline pte_t pte_mkdirty(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); + return pte; +} +static inline pte_t pte_mkyoung(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); + return pte; +} + +#define pmd_none(x) (!pmd_val(x)) +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) \ + != _KERNPG_TABLE) + +/* + * Permanent address of a page. We don't support highmem, so this is + * trivial. + */ +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) +#define pte_page(x) phys_to_page(pte_val(x) & PTE_PHYS_MASK) + +/* + * Mark the prot value as uncacheable and unbufferable + */ +#define pgprot_noncached(prot) \ + __pgprot(pgprot_val(prot) & ~(_PAGE_BUFFER | _PAGE_CACHABLE)) + +/* + * Mark the prot value as uncacheable but bufferable + */ +#define pgprot_writecombine(prot) \ + __pgprot((pgprot_val(prot) & ~_PAGE_CACHABLE) | _PAGE_BUFFER) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * extern pte_t mk_pte(struct page *page, pgprot_t pgprot) + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) + | pgprot_val(newprot))); + return pte; +} + +#define page_pte(page) page_pte_prot(page, __pgprot(0)) + +#define pmd_page_vaddr(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +#define pmd_page(pmd) (phys_to_page(pmd_val(pmd))) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) +#define pgd_offset_current(address) \ + ((pgd_t *)__mfsr(SYSREG_PTBR) + pgd_index(address)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* Find an entry in the third-level page table.. */ +#define pte_index(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(dir, address) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) +#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +struct vm_area_struct; +extern void update_mmu_cache(struct vm_area_struct * vma, + unsigned long address, pte_t pte); + +/* + * Encode and decode a swap entry + * + * Constraints: + * _PAGE_FILE at bit 0 + * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE) + * _PAGE_PRESENT at bit 10 + * + * We encode the type into bits 4-9 and offset into bits 11-31. This + * gives us a 21 bits offset, or 2**21 * 4K = 8G usable swap space per + * device, and 64 possible types. + * + * NOTE: We should set ZEROs at the position of _PAGE_PRESENT + * and _PAGE_PROTNONE bits + */ +#define __swp_type(x) (((x).val >> 4) & 0x3f) +#define __swp_offset(x) ((x).val >> 11) +#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +/* + * Encode and decode a nonlinear file mapping entry. We have to + * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't + * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?) + */ +#define PTE_FILE_MAX_BITS 30 +#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \ + | ((pte_val(pte) >> 11) << 9)) +#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \ + | (((off) >> 9) << 11) \ + | _PAGE_FILE) }) + +typedef pte_t *pte_addr_t; + +#define kern_addr_valid(addr) (1) + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#define MK_IOSPACE_PFN(space, pfn) (pfn) +#define GET_IOSPACE(pfn) 0 +#define GET_PFN(pfn) (pfn) + +/* No page table caches to initialize (?) */ +#define pgtable_cache_init() do { } while(0) + +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PGTABLE_H */ diff --git a/include/asm-avr32/poll.h b/include/asm-avr32/poll.h new file mode 100644 index 000000000000..736e29755dfc --- /dev/null +++ b/include/asm-avr32/poll.h @@ -0,0 +1,27 @@ +#ifndef __ASM_AVR32_POLL_H +#define __ASM_AVR32_POLL_H + +/* These are specified by iBCS2 */ +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 + +/* The rest seem to be more-or-less nonstandard. Check them! */ +#define POLLRDNORM 0x0040 +#define POLLRDBAND 0x0080 +#define POLLWRNORM 0x0100 +#define POLLWRBAND 0x0200 +#define POLLMSG 0x0400 +#define POLLREMOVE 0x1000 +#define POLLRDHUP 0x2000 + +struct pollfd { + int fd; + short events; + short revents; +}; + +#endif /* __ASM_AVR32_POLL_H */ diff --git a/include/asm-avr32/posix_types.h b/include/asm-avr32/posix_types.h new file mode 100644 index 000000000000..2831b039b349 --- /dev/null +++ b/include/asm-avr32/posix_types.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_POSIX_TYPES_H +#define __ASM_AVR32_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; +typedef long __kernel_off_t; +typedef int __kernel_pid_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; + +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +typedef unsigned short __kernel_old_dev_t; + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; +#endif + +typedef struct { +#if defined(__KERNEL__) || defined(__USE_ALL) + int val[2]; +#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */ + int __val[2]; +#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */ +} __kernel_fsid_t; + +#if defined(__KERNEL__) + +#undef __FD_SET +static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] |= (1UL<<__rem); +} + +#undef __FD_CLR +static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem); +} + + +#undef __FD_ISSET +static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static __inline__ void __FD_ZERO(__kernel_fd_set *__p) +{ + unsigned long *__tmp = __p->fds_bits; + int __i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + __tmp[ 8] = 0; __tmp[ 9] = 0; + __tmp[10] = 0; __tmp[11] = 0; + __tmp[12] = 0; __tmp[13] = 0; + __tmp[14] = 0; __tmp[15] = 0; + return; + + case 8: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + return; + + case 4: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + return; + } + } + __i = __FDSET_LONGS; + while (__i) { + __i--; + *__tmp = 0; + __tmp++; + } +} + +#endif /* defined(__KERNEL__) */ + +#endif /* __ASM_AVR32_POSIX_TYPES_H */ diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h new file mode 100644 index 000000000000..f6913778a45f --- /dev/null +++ b/include/asm-avr32/processor.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PROCESSOR_H +#define __ASM_AVR32_PROCESSOR_H + +#include +#include + +#define TASK_SIZE 0x80000000 + +#ifndef __ASSEMBLY__ + +static inline void *current_text_addr(void) +{ + register void *pc asm("pc"); + return pc; +} + +enum arch_type { + ARCH_AVR32A, + ARCH_AVR32B, + ARCH_MAX +}; + +enum cpu_type { + CPU_MORGAN, + CPU_AT32AP, + CPU_MAX +}; + +enum tlb_config { + TLB_NONE, + TLB_SPLIT, + TLB_UNIFIED, + TLB_INVALID +}; + +struct avr32_cpuinfo { + struct clk *clk; + unsigned long loops_per_jiffy; + enum arch_type arch_type; + enum cpu_type cpu_type; + unsigned short arch_revision; + unsigned short cpu_revision; + enum tlb_config tlb_config; + + struct cache_info icache; + struct cache_info dcache; +}; + +extern struct avr32_cpuinfo boot_cpu_data; + +#ifdef CONFIG_SMP +extern struct avr32_cpuinfo cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's + */ +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + +#define cpu_relax() barrier() +#define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") + +struct cpu_context { + unsigned long sr; + unsigned long pc; + unsigned long ksp; /* Kernel stack pointer */ + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; +}; + +/* This struct contains the CPU context as stored by switch_to() */ +struct thread_struct { + struct cpu_context cpu_context; + unsigned long single_step_addr; + u16 single_step_insn; +}; + +#define INIT_THREAD { \ + .cpu_context = { \ + .ksp = sizeof(init_stack) + (long)&init_stack, \ + }, \ +} + +/* + * Do necessary setup to start up a newly executed thread. + */ +#define start_thread(regs, new_pc, new_sp) \ + do { \ + set_fs(USER_DS); \ + memset(regs, 0, sizeof(*regs)); \ + regs->sr = MODE_USER; \ + regs->pc = new_pc & ~1; \ + regs->sp = new_sp; \ + } while(0) + +struct task_struct; + +/* Free all resources held by a thread */ +extern void release_thread(struct task_struct *); + +/* Create a kernel thread without removing it from tasklists */ +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* Prepare to copy thread state - unlazy all lazy status */ +#define prepare_to_copy(tsk) do { } while(0) + +/* Return saved PC of a blocked thread */ +#define thread_saved_pc(tsk) ((tsk)->thread.cpu_context.pc) + +struct pt_regs; +void show_trace(struct task_struct *task, unsigned long *stack, + struct pt_regs *regs); + +extern unsigned long get_wchan(struct task_struct *p); + +#define KSTK_EIP(tsk) ((tsk)->thread.cpu_context.pc) +#define KSTK_ESP(tsk) ((tsk)->thread.cpu_context.ksp) + +#define ARCH_HAS_PREFETCH + +static inline void prefetch(const void *x) +{ + const char *c = x; + asm volatile("pref %0" : : "r"(c)); +} +#define PREFETCH_STRIDE L1_CACHE_BYTES + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PROCESSOR_H */ diff --git a/include/asm-avr32/ptrace.h b/include/asm-avr32/ptrace.h new file mode 100644 index 000000000000..60f0f19a81f1 --- /dev/null +++ b/include/asm-avr32/ptrace.h @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PTRACE_H +#define __ASM_AVR32_PTRACE_H + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 + +/* + * Status Register bits + */ +#define SR_H 0x40000000 +#define SR_R 0x20000000 +#define SR_J 0x10000000 +#define SR_DM 0x08000000 +#define SR_D 0x04000000 +#define MODE_NMI 0x01c00000 +#define MODE_EXCEPTION 0x01800000 +#define MODE_INT3 0x01400000 +#define MODE_INT2 0x01000000 +#define MODE_INT1 0x00c00000 +#define MODE_INT0 0x00800000 +#define MODE_SUPERVISOR 0x00400000 +#define MODE_USER 0x00000000 +#define MODE_MASK 0x01c00000 +#define SR_EM 0x00200000 +#define SR_I3M 0x00100000 +#define SR_I2M 0x00080000 +#define SR_I1M 0x00040000 +#define SR_I0M 0x00020000 +#define SR_GM 0x00010000 + +#define SR_H_BIT 30 +#define SR_R_BIT 29 +#define SR_J_BIT 28 +#define SR_DM_BIT 27 +#define SR_D_BIT 26 +#define MODE_SHIFT 22 +#define SR_EM_BIT 21 +#define SR_I3M_BIT 20 +#define SR_I2M_BIT 19 +#define SR_I1M_BIT 18 +#define SR_I0M_BIT 17 +#define SR_GM_BIT 16 + +/* The user-visible part */ +#define SR_L 0x00000020 +#define SR_Q 0x00000010 +#define SR_V 0x00000008 +#define SR_N 0x00000004 +#define SR_Z 0x00000002 +#define SR_C 0x00000001 + +#define SR_L_BIT 5 +#define SR_Q_BIT 4 +#define SR_V_BIT 3 +#define SR_N_BIT 2 +#define SR_Z_BIT 1 +#define SR_C_BIT 0 + +/* + * The order is defined by the stmts instruction. r0 is stored first, + * so it gets the highest address. + * + * Registers 0-12 are general-purpose registers (r12 is normally used for + * the function return value). + * Register 13 is the stack pointer + * Register 14 is the link register + * Register 15 is the program counter (retrieved from the RAR sysreg) + */ +#define FRAME_SIZE_FULL 72 +#define REG_R12_ORIG 68 +#define REG_R0 64 +#define REG_R1 60 +#define REG_R2 56 +#define REG_R3 52 +#define REG_R4 48 +#define REG_R5 44 +#define REG_R6 40 +#define REG_R7 36 +#define REG_R8 32 +#define REG_R9 28 +#define REG_R10 24 +#define REG_R11 20 +#define REG_R12 16 +#define REG_SP 12 +#define REG_LR 8 + +#define FRAME_SIZE_MIN 8 +#define REG_PC 4 +#define REG_SR 0 + +#ifndef __ASSEMBLY__ +struct pt_regs { + /* These are always saved */ + unsigned long sr; + unsigned long pc; + + /* These are sometimes saved */ + unsigned long lr; + unsigned long sp; + unsigned long r12; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; + + /* Only saved on system call */ + unsigned long r12_orig; +}; + +#ifdef __KERNEL__ +# define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER) +extern void show_regs (struct pt_regs *); + +static __inline__ int valid_user_regs(struct pt_regs *regs) +{ + /* + * Some of the Java bits might be acceptable if/when we + * implement some support for that stuff... + */ + if ((regs->sr & 0xffff0000) == 0) + return 1; + + /* + * Force status register flags to be sane and report this + * illegal behaviour... + */ + regs->sr &= 0x0000ffff; + return 0; +} + +#define instruction_pointer(regs) ((regs)->pc) + +#define profile_pc(regs) instruction_pointer(regs) + +#endif /* __KERNEL__ */ + +#endif /* ! __ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PTRACE_H */ diff --git a/include/asm-avr32/resource.h b/include/asm-avr32/resource.h new file mode 100644 index 000000000000..c6dd101472b1 --- /dev/null +++ b/include/asm-avr32/resource.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_RESOURCE_H +#define __ASM_AVR32_RESOURCE_H + +#include + +#endif /* __ASM_AVR32_RESOURCE_H */ diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h new file mode 100644 index 000000000000..bfe7d753423c --- /dev/null +++ b/include/asm-avr32/scatterlist.h @@ -0,0 +1,21 @@ +#ifndef __ASM_AVR32_SCATTERLIST_H +#define __ASM_AVR32_SCATTERLIST_H + +struct scatterlist { + struct page *page; + unsigned int offset; + dma_addr_t dma_address; + unsigned int length; +}; + +/* These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->length) + +#define ISA_DMA_THRESHOLD (0xffffffff) + +#endif /* __ASM_AVR32_SCATTERLIST_H */ diff --git a/include/asm-avr32/sections.h b/include/asm-avr32/sections.h new file mode 100644 index 000000000000..aa14252e4181 --- /dev/null +++ b/include/asm-avr32/sections.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_SECTIONS_H +#define __ASM_AVR32_SECTIONS_H + +#include + +#endif /* __ASM_AVR32_SECTIONS_H */ diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h new file mode 100644 index 000000000000..ef99ddccc10c --- /dev/null +++ b/include/asm-avr32/semaphore.h @@ -0,0 +1,109 @@ +/* + * SMP- and interrupt-safe semaphores. + * + * Copyright (C) 2006 Atmel Corporation + * + * Based on include/asm-i386/semaphore.h + * Copyright (C) 1996 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SEMAPHORE_H +#define __ASM_AVR32_SEMAPHORE_H + +#include + +#include +#include +#include +#include + +struct semaphore { + atomic_t count; + int sleepers; + wait_queue_head_t wait; +}; + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .count = ATOMIC_INIT(n), \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ +} + +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) + +static inline void sema_init (struct semaphore *sem, int val) +{ + atomic_set(&sem->count, val); + sem->sleepers = 0; + init_waitqueue_head(&sem->wait); +} + +static inline void init_MUTEX (struct semaphore *sem) +{ + sema_init(sem, 1); +} + +static inline void init_MUTEX_LOCKED (struct semaphore *sem) +{ + sema_init(sem, 0); +} + +void __down(struct semaphore * sem); +int __down_interruptible(struct semaphore * sem); +void __up(struct semaphore * sem); + +/* + * This is ugly, but we want the default case to fall through. + * "__down_failed" is a special asm handler that calls the C + * routine that actually waits. See arch/i386/kernel/semaphore.c + */ +static inline void down(struct semaphore * sem) +{ + might_sleep(); + if (unlikely(atomic_dec_return (&sem->count) < 0)) + __down (sem); +} + +/* + * Interruptible try to acquire a semaphore. If we obtained + * it, return zero. If we were interrupted, returns -EINTR + */ +static inline int down_interruptible(struct semaphore * sem) +{ + int ret = 0; + + might_sleep(); + if (unlikely(atomic_dec_return (&sem->count) < 0)) + ret = __down_interruptible (sem); + return ret; +} + +/* + * Non-blockingly attempt to down() a semaphore. + * Returns zero if we acquired it + */ +static inline int down_trylock(struct semaphore * sem) +{ + return atomic_dec_if_positive(&sem->count) < 0; +} + +/* + * Note! This is subtle. We jump to wake people up only if + * the semaphore was negative (== somebody was waiting on it). + * The default case (no contention) will result in NO + * jumps for both down() and up(). + */ +static inline void up(struct semaphore * sem) +{ + if (unlikely(atomic_inc_return (&sem->count) <= 0)) + __up (sem); +} + +#endif /*__ASM_AVR32_SEMAPHORE_H */ diff --git a/include/asm-avr32/sembuf.h b/include/asm-avr32/sembuf.h new file mode 100644 index 000000000000..e472216e0c97 --- /dev/null +++ b/include/asm-avr32/sembuf.h @@ -0,0 +1,25 @@ +#ifndef __ASM_AVR32_SEMBUF_H +#define __ASM_AVR32_SEMBUF_H + +/* +* The semid64_ds structure for AVR32 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + unsigned long __unused1; + __kernel_time_t sem_ctime; /* last change time */ + unsigned long __unused2; + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_AVR32_SEMBUF_H */ diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h new file mode 100644 index 000000000000..10193da4113b --- /dev/null +++ b/include/asm-avr32/setup.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/include/asm-arm/setup.h + * Copyright (C) 1997-1999 Russel King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SETUP_H__ +#define __ASM_AVR32_SETUP_H__ + +#define COMMAND_LINE_SIZE 256 + +/* Magic number indicating that a tag table is present */ +#define ATAG_MAGIC 0xa2a25441 + +#ifndef __ASSEMBLY__ + +/* + * Generic memory range, used by several tags. + * + * addr is always physical. + * size is measured in bytes. + * next is for use by the OS, e.g. for grouping regions into + * linked lists. + */ +struct tag_mem_range { + u32 addr; + u32 size; + struct tag_mem_range * next; +}; + +/* The list ends with an ATAG_NONE node. */ +#define ATAG_NONE 0x00000000 + +struct tag_header { + u32 size; + u32 tag; +}; + +/* The list must start with an ATAG_CORE node */ +#define ATAG_CORE 0x54410001 + +struct tag_core { + u32 flags; + u32 pagesize; + u32 rootdev; +}; + +/* it is allowed to have multiple ATAG_MEM nodes */ +#define ATAG_MEM 0x54410002 +/* ATAG_MEM uses tag_mem_range */ + +/* command line: \0 terminated string */ +#define ATAG_CMDLINE 0x54410003 + +struct tag_cmdline { + char cmdline[1]; /* this is the minimum size */ +}; + +/* Ramdisk image (may be compressed) */ +#define ATAG_RDIMG 0x54410004 +/* ATAG_RDIMG uses tag_mem_range */ + +/* Information about various clocks present in the system */ +#define ATAG_CLOCK 0x54410005 + +struct tag_clock { + u32 clock_id; /* Which clock are we talking about? */ + u32 clock_flags; /* Special features */ + u64 clock_hz; /* Clock speed in Hz */ +}; + +/* The clock types we know about */ +#define CLOCK_BOOTCPU 0 + +/* Memory reserved for the system (e.g. the bootloader) */ +#define ATAG_RSVD_MEM 0x54410006 +/* ATAG_RSVD_MEM uses tag_mem_range */ + +/* Ethernet information */ + +#define ATAG_ETHERNET 0x54410007 + +struct tag_ethernet { + u8 mac_index; + u8 mii_phy_addr; + u8 hw_address[6]; +}; + +#define ETH_INVALID_PHY 0xff + +struct tag { + struct tag_header hdr; + union { + struct tag_core core; + struct tag_mem_range mem_range; + struct tag_cmdline cmdline; + struct tag_clock clock; + struct tag_ethernet ethernet; + } u; +}; + +struct tagtable { + u32 tag; + int (*parse)(struct tag *); +}; + +#define __tag __attribute_used__ __attribute__((__section__(".taglist"))) +#define __tagtable(tag, fn) \ + static struct tagtable __tagtable_##fn __tag = { tag, fn } + +#define tag_member_present(tag,member) \ + ((unsigned long)(&((struct tag *)0L)->member + 1) \ + <= (tag)->hdr.size * 4) + +#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size)) +#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2) + +#define for_each_tag(t,base) \ + for (t = base; t->hdr.size; t = tag_next(t)) + +extern struct tag_mem_range *mem_phys; +extern struct tag_mem_range *mem_reserved; +extern struct tag_mem_range *mem_ramdisk; + +extern struct tag *bootloader_tags; + +extern void setup_bootmem(void); +extern void setup_processor(void); +extern void board_setup_fbmem(unsigned long fbmem_start, + unsigned long fbmem_size); + +/* Chip-specific hook to enable the use of SDRAM */ +void chip_enable_sdram(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_AVR32_SETUP_H__ */ diff --git a/include/asm-avr32/shmbuf.h b/include/asm-avr32/shmbuf.h new file mode 100644 index 000000000000..c62fba41739a --- /dev/null +++ b/include/asm-avr32/shmbuf.h @@ -0,0 +1,42 @@ +#ifndef __ASM_AVR32_SHMBUF_H +#define __ASM_AVR32_SHMBUF_H + +/* + * The shmid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + unsigned long __unused1; + __kernel_time_t shm_dtime; /* last detach time */ + unsigned long __unused2; + __kernel_time_t shm_ctime; /* last change time */ + unsigned long __unused3; + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_AVR32_SHMBUF_H */ diff --git a/include/asm-avr32/shmparam.h b/include/asm-avr32/shmparam.h new file mode 100644 index 000000000000..3681266c77f7 --- /dev/null +++ b/include/asm-avr32/shmparam.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_SHMPARAM_H +#define __ASM_AVR32_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* __ASM_AVR32_SHMPARAM_H */ diff --git a/include/asm-avr32/sigcontext.h b/include/asm-avr32/sigcontext.h new file mode 100644 index 000000000000..e04062b5f39f --- /dev/null +++ b/include/asm-avr32/sigcontext.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SIGCONTEXT_H +#define __ASM_AVR32_SIGCONTEXT_H + +struct sigcontext { + unsigned long oldmask; + + /* CPU registers */ + unsigned long sr; + unsigned long pc; + unsigned long lr; + unsigned long sp; + unsigned long r12; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; +}; + +#endif /* __ASM_AVR32_SIGCONTEXT_H */ diff --git a/include/asm-avr32/siginfo.h b/include/asm-avr32/siginfo.h new file mode 100644 index 000000000000..5ee93f40a8a8 --- /dev/null +++ b/include/asm-avr32/siginfo.h @@ -0,0 +1,6 @@ +#ifndef _AVR32_SIGINFO_H +#define _AVR32_SIGINFO_H + +#include + +#endif diff --git a/include/asm-avr32/signal.h b/include/asm-avr32/signal.h new file mode 100644 index 000000000000..caffefeeba1f --- /dev/null +++ b/include/asm-avr32/signal.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SIGNAL_H +#define __ASM_AVR32_SIGNAL_H + +#include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifdef __KERNEL__ +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define _NSIG 64 +#define _NSIG_BPW 32 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + +/* + * SA_FLAGS values: + * + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_SIGINFO deliver the signal with SIGINFO structs + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NODEFER prevents the current signal from being masked in the handler. + * SA_RESETHAND clears the handler when the signal is delivered. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_RESTORER 0x04000000 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include + +#ifdef __KERNEL__ +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + __sigrestore_t sa_restorer; +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + __sigrestore_t sa_restorer; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ + +#include +#undef __HAVE_ARCH_SIG_BITOPS + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif /* __KERNEL__ */ + +#endif diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h new file mode 100644 index 000000000000..543229de8173 --- /dev/null +++ b/include/asm-avr32/socket.h @@ -0,0 +1,53 @@ +#ifndef __ASM_AVR32_SOCKET_H +#define __ASM_AVR32_SOCKET_H + +#include + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 + +#endif /* __ASM_AVR32_SOCKET_H */ diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h new file mode 100644 index 000000000000..84f3d65b3b3b --- /dev/null +++ b/include/asm-avr32/sockios.h @@ -0,0 +1,12 @@ +#ifndef __ASM_AVR32_SOCKIOS_H +#define __ASM_AVR32_SOCKIOS_H + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp */ + +#endif /* __ASM_AVR32_SOCKIOS_H */ diff --git a/include/asm-avr32/stat.h b/include/asm-avr32/stat.h new file mode 100644 index 000000000000..e72881e10230 --- /dev/null +++ b/include/asm-avr32/stat.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_STAT_H +#define __ASM_AVR32_STAT_H + +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +}; + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused4; + unsigned long __unused5; +}; + +#define STAT_HAVE_NSEC 1 + +struct stat64 { + unsigned long long st_dev; + + unsigned long long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + + unsigned long st_uid; + unsigned long st_gid; + + unsigned long long st_rdev; + + long long st_size; + unsigned long __pad1; /* align 64-bit st_blocks */ + unsigned long st_blksize; + + unsigned long long st_blocks; /* Number 512-byte blocks allocated. */ + + unsigned long st_atime; + unsigned long st_atime_nsec; + + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __ASM_AVR32_STAT_H */ diff --git a/include/asm-avr32/statfs.h b/include/asm-avr32/statfs.h new file mode 100644 index 000000000000..2961bd18c50e --- /dev/null +++ b/include/asm-avr32/statfs.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_STATFS_H +#define __ASM_AVR32_STATFS_H + +#include + +#endif /* __ASM_AVR32_STATFS_H */ diff --git a/include/asm-avr32/string.h b/include/asm-avr32/string.h new file mode 100644 index 000000000000..c91a623cd585 --- /dev/null +++ b/include/asm-avr32/string.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_STRING_H +#define __ASM_AVR32_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *b, int c, size_t len); + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *to, const void *from, size_t len); + +#endif /* __ASM_AVR32_STRING_H */ diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h new file mode 100644 index 000000000000..f91975f330f6 --- /dev/null +++ b/include/asm-avr32/sysreg.h @@ -0,0 +1,332 @@ +/* + * AVR32 System Registers + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SYSREG_H__ +#define __ASM_AVR32_SYSREG_H__ + +/* sysreg register offsets */ +#define SYSREG_SR 0x0000 +#define SYSREG_EVBA 0x0004 +#define SYSREG_ACBA 0x0008 +#define SYSREG_CPUCR 0x000c +#define SYSREG_ECR 0x0010 +#define SYSREG_RSR_SUP 0x0014 +#define SYSREG_RSR_INT0 0x0018 +#define SYSREG_RSR_INT1 0x001c +#define SYSREG_RSR_INT2 0x0020 +#define SYSREG_RSR_INT3 0x0024 +#define SYSREG_RSR_EX 0x0028 +#define SYSREG_RSR_NMI 0x002c +#define SYSREG_RSR_DBG 0x0030 +#define SYSREG_RAR_SUP 0x0034 +#define SYSREG_RAR_INT0 0x0038 +#define SYSREG_RAR_INT1 0x003c +#define SYSREG_RAR_INT2 0x0040 +#define SYSREG_RAR_INT3 0x0044 +#define SYSREG_RAR_EX 0x0048 +#define SYSREG_RAR_NMI 0x004c +#define SYSREG_RAR_DBG 0x0050 +#define SYSREG_JECR 0x0054 +#define SYSREG_JOSP 0x0058 +#define SYSREG_JAVA_LV0 0x005c +#define SYSREG_JAVA_LV1 0x0060 +#define SYSREG_JAVA_LV2 0x0064 +#define SYSREG_JAVA_LV3 0x0068 +#define SYSREG_JAVA_LV4 0x006c +#define SYSREG_JAVA_LV5 0x0070 +#define SYSREG_JAVA_LV6 0x0074 +#define SYSREG_JAVA_LV7 0x0078 +#define SYSREG_JTBA 0x007c +#define SYSREG_JBCR 0x0080 +#define SYSREG_CONFIG0 0x0100 +#define SYSREG_CONFIG1 0x0104 +#define SYSREG_COUNT 0x0108 +#define SYSREG_COMPARE 0x010c +#define SYSREG_TLBEHI 0x0110 +#define SYSREG_TLBELO 0x0114 +#define SYSREG_PTBR 0x0118 +#define SYSREG_TLBEAR 0x011c +#define SYSREG_MMUCR 0x0120 +#define SYSREG_TLBARLO 0x0124 +#define SYSREG_TLBARHI 0x0128 +#define SYSREG_PCCNT 0x012c +#define SYSREG_PCNT0 0x0130 +#define SYSREG_PCNT1 0x0134 +#define SYSREG_PCCR 0x0138 +#define SYSREG_BEAR 0x013c + +/* Bitfields in SR */ +#define SYSREG_SR_C_OFFSET 0 +#define SYSREG_SR_C_SIZE 1 +#define SYSREG_Z_OFFSET 1 +#define SYSREG_Z_SIZE 1 +#define SYSREG_SR_N_OFFSET 2 +#define SYSREG_SR_N_SIZE 1 +#define SYSREG_SR_V_OFFSET 3 +#define SYSREG_SR_V_SIZE 1 +#define SYSREG_Q_OFFSET 4 +#define SYSREG_Q_SIZE 1 +#define SYSREG_GM_OFFSET 16 +#define SYSREG_GM_SIZE 1 +#define SYSREG_I0M_OFFSET 17 +#define SYSREG_I0M_SIZE 1 +#define SYSREG_I1M_OFFSET 18 +#define SYSREG_I1M_SIZE 1 +#define SYSREG_I2M_OFFSET 19 +#define SYSREG_I2M_SIZE 1 +#define SYSREG_I3M_OFFSET 20 +#define SYSREG_I3M_SIZE 1 +#define SYSREG_EM_OFFSET 21 +#define SYSREG_EM_SIZE 1 +#define SYSREG_M0_OFFSET 22 +#define SYSREG_M0_SIZE 1 +#define SYSREG_M1_OFFSET 23 +#define SYSREG_M1_SIZE 1 +#define SYSREG_M2_OFFSET 24 +#define SYSREG_M2_SIZE 1 +#define SYSREG_SR_D_OFFSET 26 +#define SYSREG_SR_D_SIZE 1 +#define SYSREG_DM_OFFSET 27 +#define SYSREG_DM_SIZE 1 +#define SYSREG_SR_J_OFFSET 28 +#define SYSREG_SR_J_SIZE 1 +#define SYSREG_R_OFFSET 29 +#define SYSREG_R_SIZE 1 +#define SYSREG_H_OFFSET 30 +#define SYSREG_H_SIZE 1 + +/* Bitfields in EVBA */ + +/* Bitfields in ACBA */ + +/* Bitfields in CPUCR */ +#define SYSREG_BI_OFFSET 0 +#define SYSREG_BI_SIZE 1 +#define SYSREG_BE_OFFSET 1 +#define SYSREG_BE_SIZE 1 +#define SYSREG_FE_OFFSET 2 +#define SYSREG_FE_SIZE 1 +#define SYSREG_RE_OFFSET 3 +#define SYSREG_RE_SIZE 1 +#define SYSREG_IBE_OFFSET 4 +#define SYSREG_IBE_SIZE 1 +#define SYSREG_IEE_OFFSET 5 +#define SYSREG_IEE_SIZE 1 + +/* Bitfields in ECR */ +#define SYSREG_ECR_OFFSET 0 +#define SYSREG_ECR_SIZE 32 + +/* Bitfields in RSR_SUP */ + +/* Bitfields in RSR_INT0 */ + +/* Bitfields in RSR_INT1 */ + +/* Bitfields in RSR_INT2 */ + +/* Bitfields in RSR_INT3 */ + +/* Bitfields in RSR_EX */ + +/* Bitfields in RSR_NMI */ + +/* Bitfields in RSR_DBG */ + +/* Bitfields in RAR_SUP */ + +/* Bitfields in RAR_INT0 */ + +/* Bitfields in RAR_INT1 */ + +/* Bitfields in RAR_INT2 */ + +/* Bitfields in RAR_INT3 */ + +/* Bitfields in RAR_EX */ + +/* Bitfields in RAR_NMI */ + +/* Bitfields in RAR_DBG */ + +/* Bitfields in JECR */ + +/* Bitfields in JOSP */ + +/* Bitfields in JAVA_LV0 */ + +/* Bitfields in JAVA_LV1 */ + +/* Bitfields in JAVA_LV2 */ + +/* Bitfields in JAVA_LV3 */ + +/* Bitfields in JAVA_LV4 */ + +/* Bitfields in JAVA_LV5 */ + +/* Bitfields in JAVA_LV6 */ + +/* Bitfields in JAVA_LV7 */ + +/* Bitfields in JTBA */ + +/* Bitfields in JBCR */ + +/* Bitfields in CONFIG0 */ +#define SYSREG_CONFIG0_D_OFFSET 1 +#define SYSREG_CONFIG0_D_SIZE 1 +#define SYSREG_CONFIG0_S_OFFSET 2 +#define SYSREG_CONFIG0_S_SIZE 1 +#define SYSREG_O_OFFSET 3 +#define SYSREG_O_SIZE 1 +#define SYSREG_P_OFFSET 4 +#define SYSREG_P_SIZE 1 +#define SYSREG_CONFIG0_J_OFFSET 5 +#define SYSREG_CONFIG0_J_SIZE 1 +#define SYSREG_F_OFFSET 6 +#define SYSREG_F_SIZE 1 +#define SYSREG_MMUT_OFFSET 7 +#define SYSREG_MMUT_SIZE 3 +#define SYSREG_AR_OFFSET 10 +#define SYSREG_AR_SIZE 3 +#define SYSREG_AT_OFFSET 13 +#define SYSREG_AT_SIZE 3 +#define SYSREG_PROCESSORREVISION_OFFSET 16 +#define SYSREG_PROCESSORREVISION_SIZE 8 +#define SYSREG_PROCESSORID_OFFSET 24 +#define SYSREG_PROCESSORID_SIZE 8 + +/* Bitfields in CONFIG1 */ +#define SYSREG_DASS_OFFSET 0 +#define SYSREG_DASS_SIZE 3 +#define SYSREG_DLSZ_OFFSET 3 +#define SYSREG_DLSZ_SIZE 3 +#define SYSREG_DSET_OFFSET 6 +#define SYSREG_DSET_SIZE 4 +#define SYSREG_IASS_OFFSET 10 +#define SYSREG_IASS_SIZE 2 +#define SYSREG_ILSZ_OFFSET 13 +#define SYSREG_ILSZ_SIZE 3 +#define SYSREG_ISET_OFFSET 16 +#define SYSREG_ISET_SIZE 4 +#define SYSREG_DMMUSZ_OFFSET 20 +#define SYSREG_DMMUSZ_SIZE 6 +#define SYSREG_IMMUSZ_OFFSET 26 +#define SYSREG_IMMUSZ_SIZE 6 + +/* Bitfields in COUNT */ + +/* Bitfields in COMPARE */ + +/* Bitfields in TLBEHI */ +#define SYSREG_ASID_OFFSET 0 +#define SYSREG_ASID_SIZE 8 +#define SYSREG_TLBEHI_I_OFFSET 8 +#define SYSREG_TLBEHI_I_SIZE 1 +#define SYSREG_TLBEHI_V_OFFSET 9 +#define SYSREG_TLBEHI_V_SIZE 1 +#define SYSREG_VPN_OFFSET 10 +#define SYSREG_VPN_SIZE 22 + +/* Bitfields in TLBELO */ +#define SYSREG_W_OFFSET 0 +#define SYSREG_W_SIZE 1 +#define SYSREG_TLBELO_D_OFFSET 1 +#define SYSREG_TLBELO_D_SIZE 1 +#define SYSREG_SZ_OFFSET 2 +#define SYSREG_SZ_SIZE 2 +#define SYSREG_AP_OFFSET 4 +#define SYSREG_AP_SIZE 3 +#define SYSREG_B_OFFSET 7 +#define SYSREG_B_SIZE 1 +#define SYSREG_G_OFFSET 8 +#define SYSREG_G_SIZE 1 +#define SYSREG_TLBELO_C_OFFSET 9 +#define SYSREG_TLBELO_C_SIZE 1 +#define SYSREG_PFN_OFFSET 10 +#define SYSREG_PFN_SIZE 22 + +/* Bitfields in PTBR */ + +/* Bitfields in TLBEAR */ + +/* Bitfields in MMUCR */ +#define SYSREG_E_OFFSET 0 +#define SYSREG_E_SIZE 1 +#define SYSREG_M_OFFSET 1 +#define SYSREG_M_SIZE 1 +#define SYSREG_MMUCR_I_OFFSET 2 +#define SYSREG_MMUCR_I_SIZE 1 +#define SYSREG_MMUCR_N_OFFSET 3 +#define SYSREG_MMUCR_N_SIZE 1 +#define SYSREG_MMUCR_S_OFFSET 4 +#define SYSREG_MMUCR_S_SIZE 1 +#define SYSREG_DLA_OFFSET 8 +#define SYSREG_DLA_SIZE 6 +#define SYSREG_DRP_OFFSET 14 +#define SYSREG_DRP_SIZE 6 +#define SYSREG_ILA_OFFSET 20 +#define SYSREG_ILA_SIZE 6 +#define SYSREG_IRP_OFFSET 26 +#define SYSREG_IRP_SIZE 6 + +/* Bitfields in TLBARLO */ + +/* Bitfields in TLBARHI */ + +/* Bitfields in PCCNT */ + +/* Bitfields in PCNT0 */ + +/* Bitfields in PCNT1 */ + +/* Bitfields in PCCR */ + +/* Bitfields in BEAR */ + +/* Constants for ECR */ +#define ECR_UNRECOVERABLE 0 +#define ECR_TLB_MULTIPLE 1 +#define ECR_BUS_ERROR_WRITE 2 +#define ECR_BUS_ERROR_READ 3 +#define ECR_NMI 4 +#define ECR_ADDR_ALIGN_X 5 +#define ECR_PROTECTION_X 6 +#define ECR_DEBUG 7 +#define ECR_ILLEGAL_OPCODE 8 +#define ECR_UNIMPL_INSTRUCTION 9 +#define ECR_PRIVILEGE_VIOLATION 10 +#define ECR_FPE 11 +#define ECR_COPROC_ABSENT 12 +#define ECR_ADDR_ALIGN_R 13 +#define ECR_ADDR_ALIGN_W 14 +#define ECR_PROTECTION_R 15 +#define ECR_PROTECTION_W 16 +#define ECR_DTLB_MODIFIED 17 +#define ECR_TLB_MISS_X 20 +#define ECR_TLB_MISS_R 24 +#define ECR_TLB_MISS_W 28 + +/* Bit manipulation macros */ +#define SYSREG_BIT(name) (1 << SYSREG_##name##_OFFSET) +#define SYSREG_BF(name,value) (((value) & ((1 << SYSREG_##name##_SIZE) - 1)) << SYSREG_##name##_OFFSET) +#define SYSREG_BFEXT(name,value) (((value) >> SYSREG_##name##_OFFSET) & ((1 << SYSREG_##name##_SIZE) - 1)) +#define SYSREG_BFINS(name,value,old) (((old) & ~(((1 << SYSREG_##name##_SIZE) - 1) << SYSREG_##name##_OFFSET)) | SYSREG_BF(name,value)) + +#ifdef __CHECKER__ +extern unsigned long __builtin_mfsr(unsigned long reg); +extern void __builtin_mtsr(unsigned long reg, unsigned long value); +#endif + +/* Register access macros */ +#define sysreg_read(reg) __builtin_mfsr(SYSREG_##reg) +#define sysreg_write(reg, value) __builtin_mtsr(SYSREG_##reg, value) + +#endif /* __ASM_AVR32_SYSREG_H__ */ diff --git a/include/asm-avr32/system.h b/include/asm-avr32/system.h new file mode 100644 index 000000000000..ac596058697d --- /dev/null +++ b/include/asm-avr32/system.h @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SYSTEM_H +#define __ASM_AVR32_SYSTEM_H + +#include +#include + +#include +#include + +#define xchg(ptr,x) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) + +#define nop() asm volatile("nop") + +#define mb() asm volatile("" : : : "memory") +#define rmb() mb() +#define wmb() asm volatile("sync 0" : : : "memory") +#define read_barrier_depends() do { } while(0) +#define set_mb(var, value) do { var = value; mb(); } while(0) + +/* + * Help PathFinder and other Nexus-compliant debuggers keep track of + * the current PID by emitting an Ownership Trace Message each time we + * switch task. + */ +#ifdef CONFIG_OWNERSHIP_TRACE +#include +#define finish_arch_switch(prev) \ + do { \ + __mtdr(DBGREG_PID, prev->pid); \ + __mtdr(DBGREG_PID, current->pid); \ + } while(0) +#endif + +/* + * switch_to(prev, next, last) should switch from task `prev' to task + * `next'. `prev' will never be the same as `next'. + * + * We just delegate everything to the __switch_to assembly function, + * which is implemented in arch/avr32/kernel/switch_to.S + * + * mb() tells GCC not to cache `current' across this call. + */ +struct cpu_context; +struct task_struct; +extern struct task_struct *__switch_to(struct task_struct *, + struct cpu_context *, + struct cpu_context *); +#define switch_to(prev, next, last) \ + do { \ + last = __switch_to(prev, &prev->thread.cpu_context + 1, \ + &next->thread.cpu_context); \ + } while (0) + +#ifdef CONFIG_SMP +# error "The AVR32 port does not support SMP" +#else +# define smp_mb() barrier() +# define smp_rmb() barrier() +# define smp_wmb() barrier() +# define smp_read_barrier_depends() do { } while(0) +#endif + +#include + +extern void __xchg_called_with_bad_pointer(void); + +#ifdef __CHECKER__ +extern unsigned long __builtin_xchg(void *ptr, unsigned long x); +#endif + +#define xchg_u32(val, m) __builtin_xchg((void *)m, val) + +static inline unsigned long __xchg(unsigned long x, + volatile void *ptr, + int size) +{ + switch(size) { + case 4: + return xchg_u32(x, ptr); + default: + __xchg_called_with_bad_pointer(); + return x; + } +} + +static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, + unsigned long new) +{ + __u32 ret; + + asm volatile( + "1: ssrf 5\n" + " ld.w %[ret], %[m]\n" + " cp.w %[ret], %[old]\n" + " brne 2f\n" + " stcond %[m], %[new]\n" + " brne 1b\n" + "2:\n" + : [ret] "=&r"(ret), [m] "=m"(*m) + : "m"(m), [old] "ir"(old), [new] "r"(new) + : "memory", "cc"); + return ret; +} + +extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( + volatile int * m, unsigned long old, unsigned long new); +#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + case 8: + return __cmpxchg_u64(ptr, old, new); + } + + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr, old, new) \ + ((typeof(*(ptr)))__cmpxchg((ptr), (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +struct pt_regs; +extern void __die(const char *, struct pt_regs *, unsigned long, + const char *, const char *, unsigned long); +extern void __die_if_kernel(const char *, struct pt_regs *, unsigned long, + const char *, const char *, unsigned long); + +#define die(msg, regs, err) \ + __die(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__) +#define die_if_kernel(msg, regs, err) \ + __die_if_kernel(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__) + +#define arch_align_stack(x) (x) + +#endif /* __ASM_AVR32_SYSTEM_H */ diff --git a/include/asm-avr32/termbits.h b/include/asm-avr32/termbits.h new file mode 100644 index 000000000000..9dc6eacafa33 --- /dev/null +++ b/include/asm-avr32/termbits.h @@ -0,0 +1,173 @@ +#ifndef __ASM_AVR32_TERMBITS_H +#define __ASM_AVR32_TERMBITS_H + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate (not used) */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* __ASM_AVR32_TERMBITS_H */ diff --git a/include/asm-avr32/termios.h b/include/asm-avr32/termios.h new file mode 100644 index 000000000000..615bc0639e5c --- /dev/null +++ b/include/asm-avr32/termios.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TERMIOS_H +#define __ASM_AVR32_TERMIOS_H + +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +/* line disciplines */ +#define N_TTY 0 +#define N_SLIP 1 +#define N_MOUSE 2 +#define N_PPP 3 +#define N_STRIP 4 +#define N_AX25 5 +#define N_X25 6 /* X.25 async */ +#define N_6PACK 7 +#define N_MASC 8 /* Reserved for Mobitex module */ +#define N_R3964 9 /* Reserved for Simatic R3964 module */ +#define N_PROFIBUS_FDL 10 /* Reserved for Profibus */ +#define N_IRDA 11 /* Linux IR - http://irda.sourceforge.net/ */ +#define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data cards about SMS messages */ +#define N_HDLC 13 /* synchronous HDLC */ +#define N_SYNC_PPP 14 /* synchronous PPP */ +#define N_HCI 15 /* Bluetooth HCI UART */ + +#ifdef __KERNEL__ +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +#include + +#endif /* __KERNEL__ */ + +#endif /* __ASM_AVR32_TERMIOS_H */ diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h new file mode 100644 index 000000000000..d1f5b35ebd54 --- /dev/null +++ b/include/asm-avr32/thread_info.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_THREAD_INFO_H +#define __ASM_AVR32_THREAD_INFO_H + +#include + +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) + +#ifndef __ASSEMBLY__ +#include + +struct task_struct; +struct exec_domain; + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + __u32 cpu; + __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + struct restart_block restart_block; + __u8 supervisor_stack[0]; +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .restart_block = { \ + .fn = do_no_restart_syscall \ + } \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* + * Get the thread information struct from C. + * We do the usual trick and use the lower end of the stack for this + */ +static inline struct thread_info *current_thread_info(void) +{ + unsigned long addr = ~(THREAD_SIZE - 1); + + asm("and %0, sp" : "=r"(addr) : "0"(addr)); + return (struct thread_info *)addr; +} + +/* thread information allocation */ +#define alloc_thread_info(ti) \ + ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER)) +#define free_thread_info(ti) free_pages((unsigned long)(ti), 1) +#define get_thread_info(ti) get_task_struct((ti)->task) +#define put_thread_info(ti) put_task_struct((ti)->task) + +#endif /* !__ASSEMBLY__ */ + +#define PREEMPT_ACTIVE 0x40000000 + +/* + * Thread information flags + * - these are process state flags that various assembly files may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ +#define TIF_BREAKPOINT 5 /* true if we should break after return */ +#define TIF_SINGLE_STEP 6 /* single step after next break */ +#define TIF_MEMDIE 7 +#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal */ +#define TIF_USERSPACE 31 /* true if FS sets userspace */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_BREAKPOINT (1 << TIF_BREAKPOINT) +#define _TIF_SINGLE_STEP (1 << TIF_SINGLE_STEP) +#define _TIF_MEMDIE (1 << TIF_MEMDIE) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) + +/* XXX: These two masks must never span more than 16 bits! */ +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000013e +/* work to do on any return to userspace */ +#define _TIF_ALLWORK_MASK 0x0000013f +/* work to do on return from debug mode */ +#define _TIF_DBGWORK_MASK 0x0000017e + +#endif /* __ASM_AVR32_THREAD_INFO_H */ diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h new file mode 100644 index 000000000000..5e44ecb3ce0c --- /dev/null +++ b/include/asm-avr32/timex.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TIMEX_H +#define __ASM_AVR32_TIMEX_H + +/* + * This is the frequency of the timer used for Linux's timer interrupt. + * The value should be defined as accurate as possible or under certain + * circumstances Linux timekeeping might become inaccurate or fail. + * + * For many system the exact clockrate of the timer isn't known but due to + * the way this value is used we can get away with a wrong value as long + * as this value is: + * + * - a multiple of HZ + * - a divisor of the actual rate + * + * 500000 is a good such cheat value. + * + * The obscure number 1193182 is the same as used by the original i8254 + * time in legacy PC hardware; the chip is never found in AVR32 systems. + */ +#define CLOCK_TICK_RATE 500000 /* Underlying HZ */ + +typedef unsigned long cycles_t; + +static inline cycles_t get_cycles (void) +{ + return 0; +} + +extern int read_current_timer(unsigned long *timer_value); +#define ARCH_HAS_READ_CURRENT_TIMER 1 + +#endif /* __ASM_AVR32_TIMEX_H */ diff --git a/include/asm-avr32/tlb.h b/include/asm-avr32/tlb.h new file mode 100644 index 000000000000..5c55f9ce7c7d --- /dev/null +++ b/include/asm-avr32/tlb.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TLB_H +#define __ASM_AVR32_TLB_H + +#define tlb_start_vma(tlb, vma) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end) + +#define tlb_end_vma(tlb, vma) \ + flush_tlb_range(vma, vma->vm_start, vma->vm_end) + +#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0) + +/* + * Flush whole TLB for MM + */ +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +#include + +/* + * For debugging purposes + */ +extern void show_dtlb_entry(unsigned int index); +extern void dump_dtlb(void); + +#endif /* __ASM_AVR32_TLB_H */ diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h new file mode 100644 index 000000000000..730e268f81f3 --- /dev/null +++ b/include/asm-avr32/tlbflush.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TLBFLUSH_H +#define __ASM_AVR32_TLBFLUSH_H + +#include + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes' TLB entries + * - flush_tlb_mm(mm) flushes the specified mm context TLBs + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + */ +extern void flush_tlb(void); +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); +extern void __flush_tlb_page(unsigned long asid, unsigned long page); + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* Nothing to do */ +} + +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + +#endif /* __ASM_AVR32_TLBFLUSH_H */ diff --git a/include/asm-avr32/topology.h b/include/asm-avr32/topology.h new file mode 100644 index 000000000000..5b766cbb4806 --- /dev/null +++ b/include/asm-avr32/topology.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_TOPOLOGY_H +#define __ASM_AVR32_TOPOLOGY_H + +#include + +#endif /* __ASM_AVR32_TOPOLOGY_H */ diff --git a/include/asm-avr32/traps.h b/include/asm-avr32/traps.h new file mode 100644 index 000000000000..6a8fb944f414 --- /dev/null +++ b/include/asm-avr32/traps.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TRAPS_H +#define __ASM_AVR32_TRAPS_H + +#include + +struct undef_hook { + struct list_head node; + u32 insn_mask; + u32 insn_val; + int (*fn)(struct pt_regs *regs, u32 insn); +}; + +void register_undef_hook(struct undef_hook *hook); +void unregister_undef_hook(struct undef_hook *hook); + +#endif /* __ASM_AVR32_TRAPS_H */ diff --git a/include/asm-avr32/types.h b/include/asm-avr32/types.h new file mode 100644 index 000000000000..3f47db9675af --- /dev/null +++ b/include/asm-avr32/types.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TYPES_H +#define __ASM_AVR32_TYPES_H + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#define BITS_PER_LONG 32 + +#ifndef __ASSEMBLY__ + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +/* Dma addresses are 32-bits wide. */ + +typedef u32 dma_addr_t; + +#ifdef CONFIG_LBD +typedef u64 sector_t; +#define HAVE_SECTOR_T +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + + +#endif /* __ASM_AVR32_TYPES_H */ diff --git a/include/asm-avr32/uaccess.h b/include/asm-avr32/uaccess.h new file mode 100644 index 000000000000..821deb5a9d28 --- /dev/null +++ b/include/asm-avr32/uaccess.h @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_UACCESS_H +#define __ASM_AVR32_UACCESS_H + +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +typedef struct { + unsigned int is_user_space; +} mm_segment_t; + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons (Data Segment Register?), these macros are misnamed. + */ +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) +#define segment_eq(a,b) ((a).is_user_space == (b).is_user_space) + +#define USER_ADDR_LIMIT 0x80000000 + +#define KERNEL_DS MAKE_MM_SEG(0) +#define USER_DS MAKE_MM_SEG(1) + +#define get_ds() (KERNEL_DS) + +static inline mm_segment_t get_fs(void) +{ + return MAKE_MM_SEG(test_thread_flag(TIF_USERSPACE)); +} + +static inline void set_fs(mm_segment_t s) +{ + if (s.is_user_space) + set_thread_flag(TIF_USERSPACE); + else + clear_thread_flag(TIF_USERSPACE); +} + +/* + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + * + * We do the following checks: + * 1. Is the access from kernel space? + * 2. Does (addr + size) set the carry bit? + * 3. Is (addr + size) a negative number (i.e. >= 0x80000000)? + * + * If yes on the first check, access is granted. + * If no on any of the others, access is denied. + */ +#define __range_ok(addr, size) \ + (test_thread_flag(TIF_USERSPACE) \ + && (((unsigned long)(addr) >= 0x80000000) \ + || ((unsigned long)(size) > 0x80000000) \ + || (((unsigned long)(addr) + (unsigned long)(size)) > 0x80000000))) + +#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0)) + +static inline int +verify_area(int type, const void __user *addr, unsigned long size) +{ + return access_ok(type, addr, size) ? 0 : -EFAULT; +} + +/* Generic arbitrary sized copy. Return the number of bytes NOT copied */ +extern __kernel_size_t __copy_user(void *to, const void *from, + __kernel_size_t n); + +extern __kernel_size_t copy_to_user(void __user *to, const void *from, + __kernel_size_t n); +extern __kernel_size_t copy_from_user(void *to, const void __user *from, + __kernel_size_t n); + +static inline __kernel_size_t __copy_to_user(void __user *to, const void *from, + __kernel_size_t n) +{ + return __copy_user((void __force *)to, from, n); +} +static inline __kernel_size_t __copy_from_user(void *to, + const void __user *from, + __kernel_size_t n) +{ + return __copy_user(to, (const void __force *)from, n); +} + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +/* + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ +#define put_user(x,ptr) \ + __put_user_check((x),(ptr),sizeof(*(ptr))) + +/* + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define get_user(x,ptr) \ + __get_user_check((x),(ptr),sizeof(*(ptr))) + +/* + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ +#define __put_user(x,ptr) \ + __put_user_nocheck((x),(ptr),sizeof(*(ptr))) + +/* + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) + +extern int __get_user_bad(void); +extern int __put_user_bad(void); + +#define __get_user_nocheck(x, ptr, size) \ +({ \ + typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \ + int __gu_err = 0; \ + \ + switch (size) { \ + case 1: __get_user_asm("ub", __gu_val, ptr, __gu_err); break; \ + case 2: __get_user_asm("uh", __gu_val, ptr, __gu_err); break; \ + case 4: __get_user_asm("w", __gu_val, ptr, __gu_err); break; \ + case 8: __get_user_asm("d", __gu_val, ptr, __gu_err); break; \ + default: __gu_err = __get_user_bad(); break; \ + } \ + \ + x = __gu_val; \ + __gu_err; \ +}) + +#define __get_user_check(x, ptr, size) \ +({ \ + typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \ + const typeof(*(ptr)) __user * __gu_addr = (ptr); \ + int __gu_err = 0; \ + \ + if (access_ok(VERIFY_READ, __gu_addr, size)) { \ + switch (size) { \ + case 1: \ + __get_user_asm("ub", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + case 2: \ + __get_user_asm("uh", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + case 4: \ + __get_user_asm("w", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + case 8: \ + __get_user_asm("d", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + default: \ + __gu_err = __get_user_bad(); \ + break; \ + } \ + } else { \ + __gu_err = -EFAULT; \ + } \ + x = __gu_val; \ + __gu_err; \ +}) + +#define __get_user_asm(suffix, __gu_val, ptr, __gu_err) \ + asm volatile( \ + "1: ld." suffix " %1, %3 \n" \ + "2: \n" \ + " .section .fixup, \"ax\" \n" \ + "3: mov %0, %4 \n" \ + " rjmp 2b \n" \ + " .previous \n" \ + " .section __ex_table, \"a\" \n" \ + " .long 1b, 3b \n" \ + " .previous \n" \ + : "=r"(__gu_err), "=r"(__gu_val) \ + : "0"(__gu_err), "m"(*(ptr)), "i"(-EFAULT)) + +#define __put_user_nocheck(x, ptr, size) \ +({ \ + typeof(*(ptr)) __pu_val; \ + int __pu_err = 0; \ + \ + __pu_val = (x); \ + switch (size) { \ + case 1: __put_user_asm("b", ptr, __pu_val, __pu_err); break; \ + case 2: __put_user_asm("h", ptr, __pu_val, __pu_err); break; \ + case 4: __put_user_asm("w", ptr, __pu_val, __pu_err); break; \ + case 8: __put_user_asm("d", ptr, __pu_val, __pu_err); break; \ + default: __pu_err = __put_user_bad(); break; \ + } \ + __pu_err; \ +}) + +#define __put_user_check(x, ptr, size) \ +({ \ + typeof(*(ptr)) __pu_val; \ + typeof(*(ptr)) __user *__pu_addr = (ptr); \ + int __pu_err = 0; \ + \ + __pu_val = (x); \ + if (access_ok(VERIFY_WRITE, __pu_addr, size)) { \ + switch (size) { \ + case 1: \ + __put_user_asm("b", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 2: \ + __put_user_asm("h", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 4: \ + __put_user_asm("w", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 8: \ + __put_user_asm("d", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + default: \ + __pu_err = __put_user_bad(); \ + break; \ + } \ + } else { \ + __pu_err = -EFAULT; \ + } \ + __pu_err; \ +}) + +#define __put_user_asm(suffix, ptr, __pu_val, __gu_err) \ + asm volatile( \ + "1: st." suffix " %1, %3 \n" \ + "2: \n" \ + " .section .fixup, \"ax\" \n" \ + "3: mov %0, %4 \n" \ + " rjmp 2b \n" \ + " .previous \n" \ + " .section __ex_table, \"a\" \n" \ + " .long 1b, 3b \n" \ + " .previous \n" \ + : "=r"(__gu_err), "=m"(*(ptr)) \ + : "0"(__gu_err), "r"(__pu_val), "i"(-EFAULT)) + +extern __kernel_size_t clear_user(void __user *addr, __kernel_size_t size); +extern __kernel_size_t __clear_user(void __user *addr, __kernel_size_t size); + +extern long strncpy_from_user(char *dst, const char __user *src, long count); +extern long __strncpy_from_user(char *dst, const char __user *src, long count); + +extern long strnlen_user(const char __user *__s, long __n); +extern long __strnlen_user(const char __user *__s, long __n); + +#define strlen_user(s) strnlen_user(s, ~0UL >> 1) + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +#endif /* __ASM_AVR32_UACCESS_H */ diff --git a/include/asm-avr32/ucontext.h b/include/asm-avr32/ucontext.h new file mode 100644 index 000000000000..ac7259c2a799 --- /dev/null +++ b/include/asm-avr32/ucontext.h @@ -0,0 +1,12 @@ +#ifndef __ASM_AVR32_UCONTEXT_H +#define __ASM_AVR32_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext * uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; +}; + +#endif /* __ASM_AVR32_UCONTEXT_H */ diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h new file mode 100644 index 000000000000..3042723fcbfd --- /dev/null +++ b/include/asm-avr32/unaligned.h @@ -0,0 +1,25 @@ +#ifndef __ASM_AVR32_UNALIGNED_H +#define __ASM_AVR32_UNALIGNED_H + +/* + * AVR32 can handle some unaligned accesses, depending on the + * implementation. The AVR32 AP implementation can handle unaligned + * words, but halfwords must be halfword-aligned, and doublewords must + * be word-aligned. + * + * TODO: Make all this CPU-specific and optimize. + */ + +#include + +/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ + +#define get_unaligned(ptr) \ + ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) + +#define put_unaligned(val, ptr) \ + ({ __typeof__(*(ptr)) __tmp = (val); \ + memmove((ptr), &__tmp, sizeof(*(ptr))); \ + (void)0; }) + +#endif /* __ASM_AVR32_UNALIGNED_H */ diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h new file mode 100644 index 000000000000..1f528f92690d --- /dev/null +++ b/include/asm-avr32/unistd.h @@ -0,0 +1,387 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_UNISTD_H +#define __ASM_AVR32_UNISTD_H + +/* + * This file contains the system call numbers. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_umask 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_chown 16 +#define __NR_lchown 17 +#define __NR_lseek 18 +#define __NR__llseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount2 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 28 +#define __NR_utime 29 +#define __NR_stat 30 +#define __NR_fstat 31 +#define __NR_lstat 32 +#define __NR_access 33 +#define __NR_chroot 34 +#define __NR_sync 35 +#define __NR_fsync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_clone 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_getcwd 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_setfsuid 52 +#define __NR_setfsgid 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 56 +#define __NR_mremap 57 +#define __NR_setresuid 58 +#define __NR_getresuid 59 +#define __NR_setreuid 60 +#define __NR_setregid 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_rt_sigaction 67 +#define __NR_rt_sigreturn 68 +#define __NR_rt_sigprocmask 69 +#define __NR_rt_sigpending 70 +#define __NR_rt_sigtimedwait 71 +#define __NR_rt_sigqueueinfo 72 +#define __NR_rt_sigsuspend 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 /* SuS compliant getrlimit */ +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_fchdir 84 +#define __NR_readlink 85 +#define __NR_pread 86 +#define __NR_pwrite 87 +#define __NR_swapon 88 +#define __NR_reboot 89 +#define __NR_mmap2 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_wait4 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_vhangup 101 +#define __NR_sigaltstack 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_swapoff 106 +#define __NR_sysinfo 107 +#define __NR_ipc 108 +#define __NR_sendfile 109 +#define __NR_setdomainname 110 +#define __NR_uname 111 +#define __NR_adjtimex 112 +#define __NR_mprotect 113 +#define __NR_vfork 114 +#define __NR_init_module 115 +#define __NR_delete_module 116 +#define __NR_quotactl 117 +#define __NR_getpgid 118 +#define __NR_bdflush 119 +#define __NR_sysfs 120 +#define __NR_personality 121 +#define __NR_afs_syscall 122 /* Syscall for Andrew File System */ +#define __NR_getdents 123 +#define __NR_flock 124 +#define __NR_msync 125 +#define __NR_readv 126 +#define __NR_writev 127 +#define __NR_getsid 128 +#define __NR_fdatasync 129 +#define __NR__sysctl 130 +#define __NR_mlock 131 +#define __NR_munlock 132 +#define __NR_mlockall 133 +#define __NR_munlockall 134 +#define __NR_sched_setparam 135 +#define __NR_sched_getparam 136 +#define __NR_sched_setscheduler 137 +#define __NR_sched_getscheduler 138 +#define __NR_sched_yield 139 +#define __NR_sched_get_priority_max 140 +#define __NR_sched_get_priority_min 141 +#define __NR_sched_rr_get_interval 142 +#define __NR_nanosleep 143 +#define __NR_poll 144 +#define __NR_nfsservctl 145 +#define __NR_setresgid 146 +#define __NR_getresgid 147 +#define __NR_prctl 148 +#define __NR_socket 149 +#define __NR_bind 150 +#define __NR_connect 151 +#define __NR_listen 152 +#define __NR_accept 153 +#define __NR_getsockname 154 +#define __NR_getpeername 155 +#define __NR_socketpair 156 +#define __NR_send 157 +#define __NR_recv 158 +#define __NR_sendto 159 +#define __NR_recvfrom 160 +#define __NR_shutdown 161 +#define __NR_setsockopt 162 +#define __NR_getsockopt 163 +#define __NR_sendmsg 164 +#define __NR_recvmsg 165 +#define __NR_truncate64 166 +#define __NR_ftruncate64 167 +#define __NR_stat64 168 +#define __NR_lstat64 169 +#define __NR_fstat64 170 +#define __NR_pivot_root 171 +#define __NR_mincore 172 +#define __NR_madvise 173 +#define __NR_getdents64 174 +#define __NR_fcntl64 175 +#define __NR_gettid 176 +#define __NR_readahead 177 +#define __NR_setxattr 178 +#define __NR_lsetxattr 179 +#define __NR_fsetxattr 180 +#define __NR_getxattr 181 +#define __NR_lgetxattr 182 +#define __NR_fgetxattr 183 +#define __NR_listxattr 184 +#define __NR_llistxattr 185 +#define __NR_flistxattr 186 +#define __NR_removexattr 187 +#define __NR_lremovexattr 188 +#define __NR_fremovexattr 189 +#define __NR_tkill 190 +#define __NR_sendfile64 191 +#define __NR_futex 192 +#define __NR_sched_setaffinity 193 +#define __NR_sched_getaffinity 194 +#define __NR_capget 195 +#define __NR_capset 196 +#define __NR_io_setup 197 +#define __NR_io_destroy 198 +#define __NR_io_getevents 199 +#define __NR_io_submit 200 +#define __NR_io_cancel 201 +#define __NR_fadvise64 202 +#define __NR_exit_group 203 +#define __NR_lookup_dcookie 204 +#define __NR_epoll_create 205 +#define __NR_epoll_ctl 206 +#define __NR_epoll_wait 207 +#define __NR_remap_file_pages 208 +#define __NR_set_tid_address 209 + +#define __NR_timer_create 210 +#define __NR_timer_settime 211 +#define __NR_timer_gettime 212 +#define __NR_timer_getoverrun 213 +#define __NR_timer_delete 214 +#define __NR_clock_settime 215 +#define __NR_clock_gettime 216 +#define __NR_clock_getres 217 +#define __NR_clock_nanosleep 218 +#define __NR_statfs64 219 +#define __NR_fstatfs64 220 +#define __NR_tgkill 221 + /* 222 reserved for tux */ +#define __NR_utimes 223 +#define __NR_fadvise64_64 224 + +#define __NR_cacheflush 225 + +#define __NR_vserver 226 +#define __NR_mq_open 227 +#define __NR_mq_unlink 228 +#define __NR_mq_timedsend 229 +#define __NR_mq_timedreceive 230 +#define __NR_mq_notify 231 +#define __NR_mq_getsetattr 232 +#define __NR_kexec_load 233 +#define __NR_waitid 234 +#define __NR_add_key 235 +#define __NR_request_key 236 +#define __NR_keyctl 237 +#define __NR_ioprio_set 238 +#define __NR_ioprio_get 239 +#define __NR_inotify_init 240 +#define __NR_inotify_add_watch 241 +#define __NR_inotify_rm_watch 242 +#define __NR_openat 243 +#define __NR_mkdirat 244 +#define __NR_mknodat 245 +#define __NR_fchownat 246 +#define __NR_futimesat 247 +#define __NR_fstatat64 248 +#define __NR_unlinkat 249 +#define __NR_renameat 250 +#define __NR_linkat 251 +#define __NR_symlinkat 252 +#define __NR_readlinkat 253 +#define __NR_fchmodat 254 +#define __NR_faccessat 255 +#define __NR_pselect6 256 +#define __NR_ppoll 257 +#define __NR_unshare 258 +#define __NR_set_robust_list 259 +#define __NR_get_robust_list 260 +#define __NR_splice 261 +#define __NR_sync_file_range 262 +#define __NR_tee 263 +#define __NR_vmsplice 264 + +#define NR_syscalls 265 + + +/* + * AVR32 calling convention for system calls: + * - System call number in r8 + * - Parameters in r12 and downwards to r9 as well as r6 and r5. + * - Return value in r12 + */ + +/* + * user-visible error numbers are in the range -1 - -124: see + * + */ + +#define __syscall_return(type, res) do { \ + if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + errno = -(res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + +#ifdef __KERNEL__ +#define __ARCH_WANT_IPC_PARSE_VERSION +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_WAITPID +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND +#endif + +#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__) + +#include +#include +#include + +struct pt_regs; + +/* + * we need this inline - forking from kernel space will result + * in NO COPY ON WRITE (!!!), until an execve is executed. This + * is no problem, but for the stack. This is handled by not letting + * main() use the stack at all after fork(). Thus, no function + * calls - which means inline code for fork too, as otherwise we + * would use the stack upon exit from 'fork()'. + * + * Actually only pause and fork are needed inline, so that there + * won't be any messing with the stack from main(), but we define + * some others too. + */ +static inline int execve(const char *file, char **argv, char **envp) +{ + register long scno asm("r8") = __NR_execve; + register long sc1 asm("r12") = (long)file; + register long sc2 asm("r11") = (long)argv; + register long sc3 asm("r10") = (long)envp; + int res; + + asm volatile("scall" + : "=r"(sc1) + : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3) + : "lr", "memory"); + res = sc1; + __syscall_return(int, res); +} + +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); +asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + struct pt_regs *regs); +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs); +asmlinkage int sys_pipe(unsigned long __user *filedes); +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset); +asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len); +asmlinkage int sys_fork(struct pt_regs *regs); +asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, + unsigned long parent_tidptr, + unsigned long child_tidptr, struct pt_regs *regs); +asmlinkage int sys_vfork(struct pt_regs *regs); +asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv, + char __user *__user *uenvp, struct pt_regs *regs); + +#endif + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); + +#endif /* __ASM_AVR32_UNISTD_H */ diff --git a/include/asm-avr32/user.h b/include/asm-avr32/user.h new file mode 100644 index 000000000000..060fb3acee49 --- /dev/null +++ b/include/asm-avr32/user.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Note: We may not need these definitions for AVR32, as we don't + * support a.out. + */ +#ifndef __ASM_AVR32_USER_H +#define __ASM_AVR32_USER_H + +#include +#include +#include + +/* + * Core file format: The core file is written in such a way that gdb + * can understand it and provide useful information to the user (under + * linux we use the `trad-core' bfd). The file contents are as follows: + * + * upage: 1 page consisting of a user struct that tells gdb + * what is present in the file. Directly after this is a + * copy of the task_struct, which is currently not used by gdb, + * but it may come in handy at some point. All of the registers + * are stored as part of the upage. The upage should always be + * only one page long. + * data: The data segment follows next. We use current->end_text to + * current->brk to pick up all of the user variables, plus any memory + * that may have been sbrk'ed. No attempt is made to determine if a + * page is demand-zero or if a page is totally unused, we just cover + * the entire range. All of the addresses are rounded in such a way + * that an integral number of pages is written. + * stack: We need the stack information in order to get a meaningful + * backtrace. We need to write the data from usp to + * current->start_stack, so we round each of these in order to be able + * to write an integer number of pages. + */ + +struct user_fpu_struct { + /* We have no FPU (yet) */ +}; + +struct user { + struct pt_regs regs; /* entire machine state */ + size_t u_tsize; /* text size (pages) */ + size_t u_dsize; /* data size (pages) */ + size_t u_ssize; /* stack size (pages) */ + unsigned long start_code; /* text starting address */ + unsigned long start_data; /* data starting address */ + unsigned long start_stack; /* stack starting address */ + long int signal; /* signal causing core dump */ + struct regs * u_ar0; /* help gdb find registers */ + unsigned long magic; /* identifies a core file */ + char u_comm[32]; /* user command name */ +}; + +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_DATA_START_ADDR (u.start_data) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* __ASM_AVR32_USER_H */ diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h index 6a5796c81c90..666e0a5f00fc 100644 --- a/include/linux/elf-em.h +++ b/include/linux/elf-em.h @@ -31,6 +31,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_H8_300 46 /* Renesas H8/300,300H,H8S */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ +#define EM_AVR32 0x18ad /* Atmel AVR32 */ /* * This is an interim value that we will use until the committee comes diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 554ee688a9f8..3f21cc79a134 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -277,7 +277,7 @@ config DEBUG_HIGHMEM config DEBUG_BUGVERBOSE bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED depends on BUG - depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV + depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV default !EMBEDDED help Say Y here to make BUG() panics output the file name and line number @@ -315,7 +315,7 @@ config DEBUG_VM config FRAME_POINTER bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390) + depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32) default y if DEBUG_INFO && UML help If you say Y here the resulting kernel image will be slightly larger -- cgit v1.2.3-71-gd317 From 9c9b8b388296ad5a306ab238dc677cfe6ff4cb12 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 25 Sep 2006 23:32:26 -0700 Subject: [PATCH] x86: put .note.* sections into a PT_NOTE segment in vmlinux This patch will pack any .note.* section into a PT_NOTE segment in the output file. To do this, we tell ld that we need a PT_NOTE segment. This requires us to start explicitly mapping sections to segments, so we also need to explicitly create PT_LOAD segments for text and data, and map the sections to them appropriately. Fortunately, each section will default to its previous section's segment, so it doesn't take many changes to vmlinux.lds.S. This only changes i386 for now, but I presume the corresponding changes for other architectures will be as simple. This change also adds , which defines C and Assembler macros for actually creating ELF notes. Signed-off-by: Jeremy Fitzhardinge Cc: Eric W. Biederman Cc: Hollis Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/vmlinux.lds.S | 12 +++++- include/asm-generic/vmlinux.lds.h | 3 ++ include/linux/elfnote.h | 88 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 include/linux/elfnote.h (limited to 'include/linux') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 2d4f1386e2b1..1e7ac1c44ddc 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) jiffies = jiffies_64; + +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(4); /* R__ */ +} SECTIONS { . = __KERNEL_START; @@ -26,7 +32,7 @@ SECTIONS KPROBES_TEXT *(.fixup) *(.gnu.warning) - } = 0x9090 + } :text = 0x9090 _etext = .; /* End of text section */ @@ -48,7 +54,7 @@ SECTIONS .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ *(.data) CONSTRUCTORS - } + } :data . = ALIGN(4096); __nosave_begin = .; @@ -184,4 +190,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + NOTES } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db5a3732f106..253ae1328271 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -194,3 +194,6 @@ .stab.index 0 : { *(.stab.index) } \ .stab.indexstr 0 : { *(.stab.indexstr) } \ .comment 0 : { *(.comment) } + +#define NOTES \ + .notes : { *(.note.*) } :note diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h new file mode 100644 index 000000000000..16f9f8ebffd9 --- /dev/null +++ b/include/linux/elfnote.h @@ -0,0 +1,88 @@ +#ifndef _LINUX_ELFNOTE_H +#define _LINUX_ELFNOTE_H +/* + * Helper macros to generate ELF Note structures, which are put into a + * PT_NOTE segment of the final vmlinux image. These are useful for + * including name-value pairs of metadata into the kernel binary (or + * modules?) for use by external programs. + * + * Each note has three parts: a name, a type and a desc. The name is + * intended to distinguish the note's originator, so it would be a + * company, project, subsystem, etc; it must be in a suitable form for + * use in a section name. The type is an integer which is used to tag + * the data, and is considered to be within the "name" namespace (so + * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The + * "desc" field is the actual data. There are no constraints on the + * desc field's contents, though typically they're fairly small. + * + * All notes from a given NAME are put into a section named + * .note.NAME. When the kernel image is finally linked, all the notes + * are packed into a single .notes section, which is mapped into the + * PT_NOTE segment. Because notes for a given name are grouped into + * the same section, they'll all be adjacent the output file. + * + * This file defines macros for both C and assembler use. Their + * syntax is slightly different, but they're semantically similar. + * + * See the ELF specification for more detail about ELF notes. + */ + +#ifdef __ASSEMBLER__ +/* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and + * desc data with appropriate padding. The 'desc' argument includes + * the assembler pseudo op defining the type of the data: .asciz + * "hello, world" + */ +.macro ELFNOTE name type desc:vararg +.pushsection ".note.\name" + .align 4 + .long 2f - 1f /* namesz */ + .long 4f - 3f /* descsz */ + .long \type +1:.asciz "\name" +2:.align 4 +3:\desc +4:.align 4 +.popsection +.endm +#else /* !__ASSEMBLER__ */ +#include +/* + * Use an anonymous structure which matches the shape of + * Elf{32,64}_Nhdr, but includes the name and desc data. The size and + * type of name and desc depend on the macro arguments. "name" must + * be a literal string, and "desc" must be passed by value. You may + * only define one note per line, since __LINE__ is used to generate + * unique symbols. + */ +#define _ELFNOTE_PASTE(a,b) a##b +#define _ELFNOTE(size, name, unique, type, desc) \ + static const struct { \ + struct elf##size##_note _nhdr; \ + unsigned char _name[sizeof(name)] \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + typeof(desc) _desc \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + } _ELFNOTE_PASTE(_note_, unique) \ + __attribute_used__ \ + __attribute__((section(".note." name), \ + aligned(sizeof(Elf##size##_Word)), \ + unused)) = { \ + { \ + sizeof(name), \ + sizeof(desc), \ + type, \ + }, \ + name, \ + desc \ + } +#define ELFNOTE(size, name, type, desc) \ + _ELFNOTE(size, name, __LINE__, type, desc) + +#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) +#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) +#endif /* __ASSEMBLER__ */ + +#endif /* _LINUX_ELFNOTE_H */ -- cgit v1.2.3-71-gd317 From 5091e746848f74c9a2c0579b4ef8d8cd1a6b135d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 25 Sep 2006 23:32:28 -0700 Subject: [PATCH] Translate asm version of ELFNOTE macro into preprocessor macro I've come across some problems with the assembly version of the ELFNOTE macro currently in -mm. (in x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch) The first is that older gas does not support :varargs in .macro definitions (in my testing 2.17 does while 2.15 does not, I don't know when it became supported). The Changes file says binutils >= 2.12 so I think we need to avoid using it. There are no other uses in mainline or -mm. Old gas appears to just ignore it so you get "too many arguments" type errors. Secondly it seems that passing strings as arguments to assembler macros is broken without varargs. It looks like they get unquoted or each character is treated as a separate argument or something and this causes all manner of grief. I think this is because of the use of -traditional when compiling assembly files. Therefore I have translated the assembler macro into a pre-processor macro. I added the desctype as a separate argument instead of including it with the descdata as the previous version did since -traditional means the ELFNOTE definition after the #else needs to have the same number of arguments (I think so anyway, the -traditional CPP semantics are pretty fscking strange!). With this patch I am able to define elfnotes in assembly like this with both old and new assemblers. ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET) Which seems reasonable enough. Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elfnote.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 16f9f8ebffd9..67396db141e8 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -31,22 +31,24 @@ /* * Generate a structure with the same shape as Elf{32,64}_Nhdr (which * turn out to be the same size and shape), followed by the name and - * desc data with appropriate padding. The 'desc' argument includes - * the assembler pseudo op defining the type of the data: .asciz - * "hello, world" + * desc data with appropriate padding. The 'desctype' argument is the + * assembler pseudo op defining the type of the data e.g. .asciz while + * 'descdata' is the data itself e.g. "hello, world". + * + * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") + * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) */ -.macro ELFNOTE name type desc:vararg -.pushsection ".note.\name" - .align 4 - .long 2f - 1f /* namesz */ - .long 4f - 3f /* descsz */ - .long \type -1:.asciz "\name" -2:.align 4 -3:\desc -4:.align 4 -.popsection -.endm +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection ; #else /* !__ASSEMBLER__ */ #include /* -- cgit v1.2.3-71-gd317 From a3bc0dbc81d36fd38991b4373f6de8e1a507605a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 25 Sep 2006 23:32:33 -0700 Subject: [PATCH] smp_call_function_single() cleanup If we're going to implement smp_call_function_single() on three architecture with the same prototype then it should have a declaration in a non-arch-specific header file. Move it into . Cc: Stephane Eranian Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smp.c | 4 ++-- arch/ia64/kernel/perfmon.c | 1 + arch/ia64/sn/kernel/sn2/sn_hwperf.c | 3 ++- arch/x86_64/kernel/smpboot.c | 3 ++- include/asm-ia64/smp.h | 2 -- include/asm-x86_64/smp.h | 2 -- include/linux/smp.h | 3 +++ 7 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 304243ed7a30..465188e2d701 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -683,8 +683,8 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info, * or is or has executed. */ -int smp_call_function_single (int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ int me = get_cpu(); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 84a7e52f56f6..7bb7696e4ce2 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 9a8a29339d2d..b632b9c1e3b3 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -32,9 +32,10 @@ #include #include #include +#include + #include #include -#include #include #include #include diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 975380207b46..3ae9ffddddc0 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -46,9 +46,10 @@ #include #include #include - #include #include +#include + #include #include #include diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h index 719ff309ce09..74bde1c2bb1a 100644 --- a/include/asm-ia64/smp.h +++ b/include/asm-ia64/smp.h @@ -122,8 +122,6 @@ extern void __init smp_build_cpu_map(void); extern void __init init_smp_config (void); extern void smp_do_timer (struct pt_regs *regs); -extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info, - int retry, int wait); extern void smp_send_reschedule (int cpu); extern void lock_ipi_calllock(void); extern void unlock_ipi_calllock(void); diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 6805e1feb300..ce97f65e1d10 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -48,8 +48,6 @@ extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); void smp_stop_cpu(void); -extern int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int retry, int wait); extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/include/linux/smp.h b/include/linux/smp.h index 837e8bce1349..51649987f691 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus); */ int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); +int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, + int retry, int wait); + /* * Call a function on all processors */ -- cgit v1.2.3-71-gd317 From 930631edd4b1fe2781d9fe90edbe35d89dfc94cc Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 25 Sep 2006 23:32:40 -0700 Subject: [PATCH] add DIV_ROUND_UP() Add the DIV_ROUND_UP() helper macro: divide `n' by `d', rounding up. Stolen from the gfs2 tree(!) because the swsusp patches need it. Signed-off-by: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b2ae4fdce8b..e44a37e2c71c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ extern const char linux_banner[]; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) #define KERN_EMERG "<0>" /* system is unusable */ -- cgit v1.2.3-71-gd317 From ab954160350c91c77ae03740ef90458c3ad5412c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 25 Sep 2006 23:32:42 -0700 Subject: [PATCH] swsusp: write speedup Switch the swsusp writeout code from 4k-at-a-time to 4MB-at-a-time. Crufty old PIII testbox: 12.9 MB/s -> 20.9 MB/s Sony Vaio: 14.7 MB/s -> 26.5 MB/s The implementation is crude. A better one would use larger BIOs, but wouldn't gain any performance. The memcpys will be mostly pipelined with the IO and basically come for free. The ENOMEM path has not been tested. It should be. Cc: Pavel Machek Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 ++- kernel/power/swap.c | 93 ++++++++++++++++++++++++++++++++++++++++++---------- mm/page_io.c | 25 ++++++++++---- 3 files changed, 98 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index a2f5ad7c2d2e..3d434cbffe26 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -12,6 +12,8 @@ struct notifier_block; +struct bio; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -216,7 +218,8 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int rw_swap_page_sync(int, swp_entry_t, struct page *); +extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, + struct bio **bio_chain); /* linux/mm/swap_state.c */ extern struct address_space swapper_space; diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 79b66e734bdb..2dc883d361d5 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -49,18 +49,16 @@ static int mark_swapfiles(swp_entry_t start) { int error; - rw_swap_page_sync(READ, - swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header)); + rw_swap_page_sync(READ, swp_entry(root_swap, 0), + virt_to_page((unsigned long)&swsusp_header), NULL); if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); swsusp_header.image = start; - error = rw_swap_page_sync(WRITE, - swp_entry(root_swap, 0), - virt_to_page((unsigned long) - &swsusp_header)); + error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), + virt_to_page((unsigned long)&swsusp_header), + NULL); } else { pr_debug("swsusp: Partition is not swap space.\n"); error = -ENODEV; @@ -88,16 +86,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */ * write_page - Write one page to given swap location. * @buf: Address we're writing. * @offset: Offset of the swap page we're writing to. + * @bio_chain: Link the next write BIO here */ -static int write_page(void *buf, unsigned long offset) +static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) { swp_entry_t entry; int error = -ENOSPC; if (offset) { + struct page *page = virt_to_page(buf); + + if (bio_chain) { + /* + * Whether or not we successfully allocated a copy page, + * we take a ref on the page here. It gets undone in + * wait_on_bio_chain(). + */ + struct page *page_copy; + page_copy = alloc_page(GFP_ATOMIC); + if (page_copy == NULL) { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + get_page(page); + } else { + memcpy(page_address(page_copy), + page_address(page), PAGE_SIZE); + page = page_copy; + } + } entry = swp_entry(root_swap, offset); - error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf)); + error = rw_swap_page_sync(WRITE, entry, page, bio_chain); } return error; } @@ -185,37 +204,68 @@ static int get_swap_writer(struct swap_map_handle *handle) return 0; } -static int swap_write_page(struct swap_map_handle *handle, void *buf) +static int wait_on_bio_chain(struct bio **bio_chain) { - int error; + struct bio *bio; + struct bio *next_bio; + int ret = 0; + + if (bio_chain == NULL) + return 0; + + bio = *bio_chain; + while (bio) { + struct page *page; + + next_bio = bio->bi_private; + page = bio->bi_io_vec[0].bv_page; + wait_on_page_locked(page); + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + put_page(page); + bio_put(bio); + bio = next_bio; + } + *bio_chain = NULL; + return ret; +} + +static int swap_write_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) +{ + int error = 0; unsigned long offset; if (!handle->cur) return -EINVAL; offset = alloc_swap_page(root_swap, handle->bitmap); - error = write_page(buf, offset); + error = write_page(buf, offset, bio_chain); if (error) return error; handle->cur->entries[handle->k++] = offset; if (handle->k >= MAP_PAGE_ENTRIES) { + error = wait_on_bio_chain(bio_chain); + if (error) + goto out; offset = alloc_swap_page(root_swap, handle->bitmap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap); + error = write_page(handle->cur, handle->cur_swap, NULL); if (error) - return error; + goto out; memset(handle->cur, 0, PAGE_SIZE); handle->cur_swap = offset; handle->k = 0; } - return 0; +out: + return error; } static int flush_swap_writer(struct swap_map_handle *handle) { if (handle->cur && handle->cur_swap) - return write_page(handle->cur, handle->cur_swap); + return write_page(handle->cur, handle->cur_swap, NULL); else return -EINVAL; } @@ -232,6 +282,8 @@ static int save_image(struct swap_map_handle *handle, int ret; int error = 0; int nr_pages; + int err2; + struct bio *bio; struct timeval start; struct timeval stop; @@ -240,11 +292,13 @@ static int save_image(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; + bio = NULL; do_gettimeofday(&start); do { ret = snapshot_read_next(snapshot, PAGE_SIZE); if (ret > 0) { - error = swap_write_page(handle, data_of(*snapshot)); + error = swap_write_page(handle, data_of(*snapshot), + &bio); if (error) break; if (!(nr_pages % m)) @@ -252,7 +306,10 @@ static int save_image(struct swap_map_handle *handle, nr_pages++; } } while (ret > 0); + err2 = wait_on_bio_chain(&bio); do_gettimeofday(&stop); + if (!error) + error = err2; if (!error) printk("\b\b\b\bdone\n"); show_speed(&start, &stop, nr_to_write, "Wrote"); @@ -307,7 +364,7 @@ int swsusp_write(void) error = get_swap_writer(&handle); if (!error) { unsigned long start = handle.cur_swap; - error = swap_write_page(&handle, header); + error = swap_write_page(&handle, header, NULL); if (!error) error = save_image(&handle, &snapshot, header->pages - 1); diff --git a/mm/page_io.c b/mm/page_io.c index d2f0a5783370..f46a9862b7ef 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -156,10 +156,12 @@ out: * We use end_swap_bio_read() even for writes, because it happens to do what * we want. */ -int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) +int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, + struct bio **bio_chain) { struct bio *bio; int ret = 0; + int bio_rw; lock_page(page); @@ -170,11 +172,22 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) goto out; } - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - wait_on_page_locked(page); - - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; + bio_rw = rw; + if (!bio_chain) + bio_rw |= (1 << BIO_RW_SYNC); + if (bio_chain) + bio_get(bio); + submit_bio(bio_rw, bio); + if (bio_chain == NULL) { + wait_on_page_locked(page); + + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + } + if (bio_chain) { + bio->bi_private = *bio_chain; + *bio_chain = bio; + } out: return ret; } -- cgit v1.2.3-71-gd317 From 546e0d271941dd1ff6961e2a1f7eac75f1fc277e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 25 Sep 2006 23:32:44 -0700 Subject: [PATCH] swsusp: read speedup Implement async reads for swsusp resuming. Crufty old PIII testbox: 15.7 MB/s -> 20.3 MB/s Sony Vaio: 14.6 MB/s -> 33.3 MB/s I didn't implement the post-resume bio_set_pages_dirty(). I don't really understand why resume needs to run set_page_dirty() against these pages. It might be a worry that this code modifies PG_Uptodate, PG_Error and PG_Locked against the image pages. Can this possibly affect the resumed-into kernel? Hopefully not, if we're atomically restoring its mem_map? Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Jens Axboe Cc: Laurent Riffard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + kernel/power/power.h | 1 + kernel/power/snapshot.c | 11 ++-- kernel/power/swap.c | 138 ++++++++++++++++++++++++------------------------ mm/page_io.c | 2 +- 5 files changed, 81 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 3d434cbffe26..e7c36ba2a2db 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -220,6 +220,7 @@ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, struct bio **bio_chain); +extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); /* linux/mm/swap_state.c */ extern struct address_space swapper_space; diff --git a/kernel/power/power.h b/kernel/power/power.h index 57a792982fb9..59ce712f082d 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -58,6 +58,7 @@ struct snapshot_handle { struct pbe *pbe, *last_pbe; void *buffer; unsigned int buf_offset; + int sync_read; }; #define data_of(handle) ((handle).buffer + (handle).buf_offset) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 75d4886e648e..591301ae8b7d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -314,7 +314,7 @@ static unsigned int unsafe_pages; * and we count them using unsafe_pages */ -static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) +static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) { void *res; @@ -828,13 +828,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) } if (!handle->offset) handle->buffer = buffer; + handle->sync_read = 1; if (handle->prev < handle->page) { if (!handle->prev) { - error = load_header(handle, (struct swsusp_info *)buffer); + error = load_header(handle, + (struct swsusp_info *)buffer); if (error) return error; } else if (handle->prev <= nr_meta_pages) { - handle->pbe = unpack_orig_addresses(buffer, handle->pbe); + handle->pbe = unpack_orig_addresses(buffer, + handle->pbe); if (!handle->pbe) { error = prepare_image(handle); if (error) @@ -842,10 +845,12 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) handle->pbe = pagedir_nosave; handle->last_pbe = NULL; handle->buffer = get_buffer(handle); + handle->sync_read = 0; } } else { handle->pbe = handle->pbe->next; handle->buffer = get_buffer(handle); + handle->sync_read = 0; } handle->prev = handle->page; } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 9ab989572164..8309d20b2563 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -214,6 +215,8 @@ static int wait_on_bio_chain(struct bio **bio_chain) return 0; bio = *bio_chain; + if (bio == NULL) + return 0; while (bio) { struct page *page; @@ -349,7 +352,8 @@ int swsusp_write(void) int error; if ((error = swsusp_swap_check())) { - printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n"); + printk(KERN_ERR "swsusp: Cannot find swap device, try " + "swapon -a.\n"); return error; } memset(&snapshot, 0, sizeof(struct snapshot_handle)); @@ -381,27 +385,6 @@ int swsusp_write(void) return error; } -/* - * Using bio to read from swap. - * This code requires a bit more work than just using buffer heads - * but, it is the recommended way for 2.5/2.6. - * The following are to signal the beginning and end of I/O. Bios - * finish asynchronously, while we want them to happen synchronously. - * A simple atomic_t, and a wait loop take care of this problem. - */ - -static atomic_t io_done = ATOMIC_INIT(0); - -static int end_io(struct bio *bio, unsigned int num, int err) -{ - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk(KERN_ERR "I/O error reading swsusp image.\n"); - return -EIO; - } - atomic_set(&io_done, 0); - return 0; -} - static struct block_device *resume_bdev; /** @@ -409,15 +392,15 @@ static struct block_device *resume_bdev; * @rw: READ or WRITE. * @off physical offset of page. * @page: page we're reading or writing. + * @bio_chain: list of pending biod (for async reading) * * Straight from the textbook - allocate and initialize the bio. - * If we're writing, make sure the page is marked as dirty. - * Then submit it and wait. + * If we're reading, make sure the page is marked as dirty. + * Then submit it and, if @bio_chain == NULL, wait. */ - -static int submit(int rw, pgoff_t page_off, void *page) +static int submit(int rw, pgoff_t page_off, struct page *page, + struct bio **bio_chain) { - int error = 0; struct bio *bio; bio = bio_alloc(GFP_ATOMIC, 1); @@ -425,33 +408,40 @@ static int submit(int rw, pgoff_t page_off, void *page) return -ENOMEM; bio->bi_sector = page_off * (PAGE_SIZE >> 9); bio->bi_bdev = resume_bdev; - bio->bi_end_io = end_io; + bio->bi_end_io = end_swap_bio_read; - if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { - printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); - error = -EFAULT; - goto Done; + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); + bio_put(bio); + return -EFAULT; } - atomic_set(&io_done, 1); - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - while (atomic_read(&io_done)) - yield(); - if (rw == READ) - bio_set_pages_dirty(bio); - Done: - bio_put(bio); - return error; + lock_page(page); + bio_get(bio); + + if (bio_chain == NULL) { + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + wait_on_page_locked(page); + if (rw == READ) + bio_set_pages_dirty(bio); + bio_put(bio); + } else { + get_page(page); + bio->bi_private = *bio_chain; + *bio_chain = bio; + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + } + return 0; } -static int bio_read_page(pgoff_t page_off, void *page) +static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) { - return submit(READ, page_off, page); + return submit(READ, page_off, virt_to_page(addr), bio_chain); } -static int bio_write_page(pgoff_t page_off, void *page) +static int bio_write_page(pgoff_t page_off, void *addr) { - return submit(WRITE, page_off, page); + return submit(WRITE, page_off, virt_to_page(addr), NULL); } /** @@ -476,7 +466,7 @@ static int get_swap_reader(struct swap_map_handle *handle, handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); if (!handle->cur) return -ENOMEM; - error = bio_read_page(swp_offset(start), handle->cur); + error = bio_read_page(swp_offset(start), handle->cur, NULL); if (error) { release_swap_reader(handle); return error; @@ -485,7 +475,8 @@ static int get_swap_reader(struct swap_map_handle *handle, return 0; } -static int swap_read_page(struct swap_map_handle *handle, void *buf) +static int swap_read_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) { unsigned long offset; int error; @@ -495,16 +486,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf) offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = bio_read_page(offset, buf); + error = bio_read_page(offset, buf, bio_chain); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { + error = wait_on_bio_chain(bio_chain); handle->k = 0; offset = handle->cur->next_swap; if (!offset) release_swap_reader(handle); - else - error = bio_read_page(offset, handle->cur); + else if (!error) + error = bio_read_page(offset, handle->cur, NULL); } return error; } @@ -517,38 +509,48 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf) static int load_image(struct swap_map_handle *handle, struct snapshot_handle *snapshot, - unsigned int nr_pages) + unsigned int nr_to_read) { unsigned int m; - int ret; int error = 0; struct timeval start; struct timeval stop; + struct bio *bio; + int err2; + unsigned nr_pages; - printk("Loading image data pages (%u pages) ... ", nr_pages); - m = nr_pages / 100; + printk("Loading image data pages (%u pages) ... ", nr_to_read); + m = nr_to_read / 100; if (!m) m = 1; nr_pages = 0; + bio = NULL; do_gettimeofday(&start); - do { - ret = snapshot_write_next(snapshot, PAGE_SIZE); - if (ret > 0) { - error = swap_read_page(handle, data_of(*snapshot)); - if (error) - break; - if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; - } - } while (ret > 0); + for ( ; ; ) { + error = snapshot_write_next(snapshot, PAGE_SIZE); + if (error <= 0) + break; + error = swap_read_page(handle, data_of(*snapshot), &bio); + if (error) + break; + if (snapshot->sync_read) + error = wait_on_bio_chain(&bio); + if (error) + break; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + err2 = wait_on_bio_chain(&bio); do_gettimeofday(&stop); + if (!error) + error = err2; if (!error) { printk("\b\b\b\bdone\n"); if (!snapshot_image_loaded(snapshot)) error = -ENODATA; } - show_speed(&start, &stop, nr_pages, "Read"); + show_speed(&start, &stop, nr_to_read, "Read"); return error; } @@ -571,7 +573,7 @@ int swsusp_read(void) header = (struct swsusp_info *)data_of(snapshot); error = get_swap_reader(&handle, swsusp_header.image); if (!error) - error = swap_read_page(&handle, header); + error = swap_read_page(&handle, header, NULL); if (!error) error = load_image(&handle, &snapshot, header->pages - 1); release_swap_reader(&handle); @@ -597,7 +599,7 @@ int swsusp_check(void) if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); memset(&swsusp_header, 0, sizeof(swsusp_header)); - if ((error = bio_read_page(0, &swsusp_header))) + if ((error = bio_read_page(0, &swsusp_header, NULL))) return error; if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); diff --git a/mm/page_io.c b/mm/page_io.c index f46a9862b7ef..d4840ecbf8f9 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -74,7 +74,7 @@ static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err) return 0; } -static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err) +int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; -- cgit v1.2.3-71-gd317 From e3920fb42c8ddfe63befb54d95c0e13eabacea9b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Sep 2006 23:32:48 -0700 Subject: [PATCH] Disable CPU hotplug during suspend The current suspend code has to be run on one CPU, so we use the CPU hotplug to take the non-boot CPUs offline on SMP machines. However, we should also make sure that these CPUs will not be enabled by someone else after we have disabled them. The functions disable_nonboot_cpus() and enable_nonboot_cpus() are moved to kernel/cpu.c, because they now refer to some stuff in there that should better be static. Also it's better if disable_nonboot_cpus() returns an error instead of panicking if something goes wrong, and enable_nonboot_cpus() has no reason to panic(), because the CPUs may have been enabled by the userland before it tries to take them online. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 8 +++ include/linux/suspend.h | 8 --- kernel/cpu.c | 138 +++++++++++++++++++++++++++++++++++++++++------- kernel/power/Makefile | 2 - kernel/power/disk.c | 7 ++- kernel/power/main.c | 10 ++-- kernel/power/smp.c | 62 ---------------------- kernel/power/user.c | 14 +++-- 8 files changed, 145 insertions(+), 104 deletions(-) delete mode 100644 kernel/power/smp.c (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8fb344a9abd8..3fef7d67aedc 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu); static inline int cpu_is_offline(int cpu) { return 0; } #endif +#ifdef CONFIG_SUSPEND_SMP +extern int disable_nonboot_cpus(void); +extern void enable_nonboot_cpus(void); +#else +static inline int disable_nonboot_cpus(void) { return 0; } +static inline void enable_nonboot_cpus(void) {} +#endif + #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 96e31aa64cc7..6e8a06c950f4 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -57,14 +57,6 @@ static inline int software_suspend(void) } #endif /* CONFIG_PM */ -#ifdef CONFIG_SUSPEND_SMP -extern void disable_nonboot_cpus(void); -extern void enable_nonboot_cpus(void); -#else -static inline void disable_nonboot_cpus(void) {} -static inline void enable_nonboot_cpus(void) {} -#endif - void save_processor_state(void); void restore_processor_state(void); struct saved_context; diff --git a/kernel/cpu.c b/kernel/cpu.c index f230f9ae01c2..32c96628463e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); +/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. + * Should always be manipulated under cpu_add_remove_lock + */ +static int cpu_hotplug_disabled; + #ifdef CONFIG_HOTPLUG_CPU /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ @@ -108,30 +113,25 @@ static int take_cpu_down(void *unused) return 0; } -int cpu_down(unsigned int cpu) +/* Requires cpu_add_remove_lock to be held */ +static int _cpu_down(unsigned int cpu) { int err; struct task_struct *p; cpumask_t old_allowed, tmp; - mutex_lock(&cpu_add_remove_lock); - if (num_online_cpus() == 1) { - err = -EBUSY; - goto out; - } + if (num_online_cpus() == 1) + return -EBUSY; - if (!cpu_online(cpu)) { - err = -EINVAL; - goto out; - } + if (!cpu_online(cpu)) + return -EINVAL; err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { printk("%s: attempt to take down CPU %u failed\n", __FUNCTION__, cpu); - err = -EINVAL; - goto out; + return -EINVAL; } /* Ensure that we are not runnable on dying cpu */ @@ -179,22 +179,32 @@ out_thread: err = kthread_stop(p); out_allowed: set_cpus_allowed(current, old_allowed); -out: + return err; +} + +int cpu_down(unsigned int cpu) +{ + int err = 0; + + mutex_lock(&cpu_add_remove_lock); + if (cpu_hotplug_disabled) + err = -EBUSY; + else + err = _cpu_down(cpu); + mutex_unlock(&cpu_add_remove_lock); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ -int __devinit cpu_up(unsigned int cpu) +/* Requires cpu_add_remove_lock to be held */ +static int __devinit _cpu_up(unsigned int cpu) { int ret; void *hcpu = (void *)(long)cpu; - mutex_lock(&cpu_add_remove_lock); - if (cpu_online(cpu) || !cpu_present(cpu)) { - ret = -EINVAL; - goto out; - } + if (cpu_online(cpu) || !cpu_present(cpu)) + return -EINVAL; ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { @@ -219,7 +229,95 @@ out_notify: if (ret != 0) blocking_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); + + return ret; +} + +int __devinit cpu_up(unsigned int cpu) +{ + int err = 0; + + mutex_lock(&cpu_add_remove_lock); + if (cpu_hotplug_disabled) + err = -EBUSY; + else + err = _cpu_up(cpu); + + mutex_unlock(&cpu_add_remove_lock); + return err; +} + +#ifdef CONFIG_SUSPEND_SMP +static cpumask_t frozen_cpus; + +int disable_nonboot_cpus(void) +{ + int cpu, first_cpu, error; + + mutex_lock(&cpu_add_remove_lock); + first_cpu = first_cpu(cpu_present_map); + if (!cpu_online(first_cpu)) { + error = _cpu_up(first_cpu); + if (error) { + printk(KERN_ERR "Could not bring CPU%d up.\n", + first_cpu); + goto out; + } + } + error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu)); + if (error) { + printk(KERN_ERR "Could not run on CPU%d\n", first_cpu); + goto out; + } + /* We take down all of the non-boot CPUs in one shot to avoid races + * with the userspace trying to use the CPU hotplug at the same time + */ + cpus_clear(frozen_cpus); + printk("Disabling non-boot CPUs ...\n"); + for_each_online_cpu(cpu) { + if (cpu == first_cpu) + continue; + error = _cpu_down(cpu); + if (!error) { + cpu_set(cpu, frozen_cpus); + printk("CPU%d is down\n", cpu); + } else { + printk(KERN_ERR "Error taking CPU%d down: %d\n", + cpu, error); + break; + } + } + if (!error) { + BUG_ON(num_online_cpus() > 1); + /* Make sure the CPUs won't be enabled by someone else */ + cpu_hotplug_disabled = 1; + } else { + printk(KERN_ERR "Non-boot CPUs are not disabled"); + } out: mutex_unlock(&cpu_add_remove_lock); - return ret; + return error; +} + +void enable_nonboot_cpus(void) +{ + int cpu, error; + + /* Allow everyone to use the CPU hotplug again */ + mutex_lock(&cpu_add_remove_lock); + cpu_hotplug_disabled = 0; + mutex_unlock(&cpu_add_remove_lock); + + printk("Enabling non-boot CPUs ...\n"); + for_each_cpu_mask(cpu, frozen_cpus) { + error = cpu_up(cpu); + if (!error) { + printk("CPU%d is up\n", cpu); + continue; + } + printk(KERN_WARNING "Error taking CPU%d up: %d\n", + cpu, error); + } + cpus_clear(frozen_cpus); } +#endif diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 8d0af3d37a4b..38725f526afc 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,6 +7,4 @@ obj-y := main.o process.o console.o obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o -obj-$(CONFIG_SUSPEND_SMP) += smp.o - obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e13e74067845..7c7b9b65e365 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "power.h" @@ -72,7 +73,10 @@ static int prepare_processes(void) int error; pm_prepare_console(); - disable_nonboot_cpus(); + + error = disable_nonboot_cpus(); + if (error) + goto enable_cpus; if (freeze_processes()) { error = -EBUSY; @@ -84,6 +88,7 @@ static int prepare_processes(void) return 0; thaw: thaw_processes(); +enable_cpus: enable_nonboot_cpus(); pm_restore_console(); return error; diff --git a/kernel/power/main.c b/kernel/power/main.c index 6d295c776794..4d403323a7bb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "power.h" @@ -51,7 +52,7 @@ void pm_set_ops(struct pm_ops * ops) static int suspend_prepare(suspend_state_t state) { - int error = 0; + int error; unsigned int free_pages; if (!pm_ops || !pm_ops->enter) @@ -59,12 +60,9 @@ static int suspend_prepare(suspend_state_t state) pm_prepare_console(); - disable_nonboot_cpus(); - - if (num_online_cpus() != 1) { - error = -EPERM; + error = disable_nonboot_cpus(); + if (error) goto Enable_cpu; - } if (freeze_processes()) { error = -EAGAIN; diff --git a/kernel/power/smp.c b/kernel/power/smp.c deleted file mode 100644 index 5957312b2d68..000000000000 --- a/kernel/power/smp.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * drivers/power/smp.c - Functions for stopping other CPUs. - * - * Copyright 2004 Pavel Machek - * Copyright (C) 2002-2003 Nigel Cunningham - * - * This file is released under the GPLv2. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -/* This is protected by pm_sem semaphore */ -static cpumask_t frozen_cpus; - -void disable_nonboot_cpus(void) -{ - int cpu, error; - - error = 0; - cpus_clear(frozen_cpus); - printk("Freezing cpus ...\n"); - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - error = cpu_down(cpu); - if (!error) { - cpu_set(cpu, frozen_cpus); - printk("CPU%d is down\n", cpu); - continue; - } - printk("Error taking cpu %d down: %d\n", cpu, error); - } - BUG_ON(raw_smp_processor_id() != 0); - if (error) - panic("cpus not sleeping"); -} - -void enable_nonboot_cpus(void) -{ - int cpu, error; - - printk("Thawing cpus ...\n"); - for_each_cpu_mask(cpu, frozen_cpus) { - error = cpu_up(cpu); - if (!error) { - printk("CPU%d is up\n", cpu); - continue; - } - printk("Error taking cpu %d up: %d\n", cpu, error); - panic("Not enough cpus"); - } - cpus_clear(frozen_cpus); -} - diff --git a/kernel/power/user.c b/kernel/power/user.c index 3f1539fbe48a..0ef5e4ba39e5 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (data->frozen) break; down(&pm_sem); - disable_nonboot_cpus(); - if (freeze_processes()) { - thaw_processes(); - enable_nonboot_cpus(); - error = -EBUSY; + error = disable_nonboot_cpus(); + if (!error) { + error = freeze_processes(); + if (error) { + thaw_processes(); + error = -EBUSY; + } } + enable_nonboot_cpus(); up(&pm_sem); if (!error) data->frozen = 1; -- cgit v1.2.3-71-gd317 From dcbb5a54f6e3984efa24772394f2225b11495c55 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Sep 2006 23:32:51 -0700 Subject: [PATCH] swsusp: clean up suspend header Remove some things that are no longer used or defined elsewhere from suspend.h and make the inline version of software_suspend() return the right error code. Signed-off-by: Rafael J. Wysocki Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 6e8a06c950f4..c11cacf1a13b 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -10,11 +10,11 @@ #include /* page backup entry */ -typedef struct pbe { +struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ struct pbe *next; -} suspend_pagedir_t; +}; #define for_each_pbe(pbe, pblist) \ for (pbe = pblist ; pbe ; pbe = pbe->next) @@ -25,15 +25,6 @@ typedef struct pbe { #define for_each_pb_page(pbe, pblist) \ for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next) - -#define SWAP_FILENAME_MAXLENGTH 32 - - -extern dev_t swsusp_resume_device; - -/* mm/vmscan.c */ -extern int shrink_mem(void); - /* mm/page_alloc.c */ extern void drain_local_pages(void); extern void mark_free_pages(struct zone *zone); @@ -53,7 +44,7 @@ static inline void pm_restore_console(void) {} static inline int software_suspend(void) { printk("Warning: fake suspend called\n"); - return -EPERM; + return -ENOSYS; } #endif /* CONFIG_PM */ -- cgit v1.2.3-71-gd317 From 940864ddabdb180e02041c4dcd46ba6f9eee732f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Sep 2006 23:32:55 -0700 Subject: [PATCH] swsusp: Use memory bitmaps during resume Make swsusp use memory bitmaps to store its internal information during the resume phase of the suspend-resume cycle. If the pfns of saveable pages are saved during the suspend phase instead of the kernel virtual addresses of these pages, we can use them during the resume phase directly to set the corresponding bits in a memory bitmap. Then, this bitmap is used to mark the page frames corresponding to the pages that were saveable before the suspend (aka "unsafe" page frames). Next, we allocate as many page frames as needed to store the entire suspend image and make sure that there will be some extra free "safe" page frames for the list of PBEs constructed later. Subsequently, the image is loaded and, if possible, the data loaded from it are written into their "original" page frames (ie. the ones they had occupied before the suspend). The image data that cannot be written into their "original" page frames are loaded into "safe" page frames and their "original" kernel virtual addresses, as well as the addresses of the "safe" pages containing their copies, are stored in a list of PBEs. Finally, the list of PBEs is used to copy the remaining image data into their "original" page frames (this is done atomically, by the architecture-dependent parts of swsusp). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 9 -- kernel/power/power.h | 11 +- kernel/power/snapshot.c | 416 +++++++++++++++++++++--------------------------- kernel/power/swap.c | 4 +- kernel/power/user.c | 1 + 5 files changed, 186 insertions(+), 255 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index c11cacf1a13b..b1237f16ecde 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -16,15 +16,6 @@ struct pbe { struct pbe *next; }; -#define for_each_pbe(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = pbe->next) - -#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe)) -#define PB_PAGE_SKIP (PBES_PER_PAGE-1) - -#define for_each_pb_page(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next) - /* mm/page_alloc.c */ extern void drain_local_pages(void); extern void mark_free_pages(struct zone *zone); diff --git a/kernel/power/power.h b/kernel/power/power.h index 6e9e2acc34f8..bfe999f7b272 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -81,16 +81,6 @@ struct snapshot_handle { unsigned int prev; /* number of the block of PAGE_SIZE bytes that * was the current one previously */ - struct pbe *pbe; /* PBE that corresponds to 'buffer' */ - struct pbe *last_pbe; /* When the image is restored (eg. read - * from disk) we can store some image - * data directly in the page frames - * in which they were before suspend. - * In such a case the PBEs that - * correspond to them will be unused. - * This is the last PBE, so far, that - * does not correspond to such data. - */ void *buffer; /* address of the block to read from * or write to */ @@ -113,6 +103,7 @@ extern unsigned int snapshot_additional_pages(struct zone *zone); extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); extern int snapshot_image_loaded(struct snapshot_handle *handle); +extern void snapshot_free_unused_memory(struct snapshot_handle *handle); #define SNAPSHOT_IOC_MAGIC '3' #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 852e0df41719..1b84313cbab5 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -39,7 +39,7 @@ struct pbe *restore_pblist; static unsigned int nr_copy_pages; static unsigned int nr_meta_pages; -static unsigned long *buffer; +static void *buffer; #ifdef CONFIG_HIGHMEM unsigned int count_highmem_pages(void) @@ -172,7 +172,7 @@ static inline int restore_highmem(void) {return 0;} #define PG_UNSAFE_CLEAR 1 #define PG_UNSAFE_KEEP 0 -static unsigned int unsafe_pages; +static unsigned int allocated_unsafe_pages; static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) { @@ -183,7 +183,7 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) while (res && PageNosaveFree(virt_to_page(res))) { /* The page is unsafe, mark it for swsusp_free() */ SetPageNosave(virt_to_page(res)); - unsafe_pages++; + allocated_unsafe_pages++; res = (void *)get_zeroed_page(gfp_mask); } if (res) { @@ -772,101 +772,10 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) } /** - * free_pagedir - free pages allocated with alloc_pagedir() - */ - -static void free_pagedir(struct pbe *pblist, int clear_nosave_free) -{ - struct pbe *pbe; - - while (pblist) { - pbe = (pblist + PB_PAGE_SKIP)->next; - free_image_page(pblist, clear_nosave_free); - pblist = pbe; - } -} - -/** - * fill_pb_page - Create a list of PBEs on a given memory page - */ - -static inline void fill_pb_page(struct pbe *pbpage, unsigned int n) -{ - struct pbe *p; - - p = pbpage; - pbpage += n - 1; - do - p->next = p + 1; - while (++p < pbpage); -} - -/** - * create_pbe_list - Create a list of PBEs on top of a given chain - * of memory pages allocated with alloc_pagedir() + * swsusp_free - free pages allocated for the suspend. * - * This function assumes that pages allocated by alloc_image_page() will - * always be zeroed. - */ - -static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) -{ - struct pbe *pbpage; - unsigned int num = PBES_PER_PAGE; - - for_each_pb_page (pbpage, pblist) { - if (num >= nr_pages) - break; - - fill_pb_page(pbpage, PBES_PER_PAGE); - num += PBES_PER_PAGE; - } - if (pbpage) { - num -= PBES_PER_PAGE; - fill_pb_page(pbpage, nr_pages - num); - } -} - -/** - * alloc_pagedir - Allocate the page directory. - * - * First, determine exactly how many pages we need and - * allocate them. - * - * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE - * struct pbe elements (pbes) and the last element in the page points - * to the next page. - * - * On each page we set up a list of struct_pbe elements. - */ - -static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, - int safe_needed) -{ - unsigned int num; - struct pbe *pblist, *pbe; - - if (!nr_pages) - return NULL; - - pblist = alloc_image_page(gfp_mask, safe_needed); - pbe = pblist; - for (num = PBES_PER_PAGE; num < nr_pages; num += PBES_PER_PAGE) { - if (!pbe) { - free_pagedir(pblist, PG_UNSAFE_CLEAR); - return NULL; - } - pbe += PB_PAGE_SKIP; - pbe->next = alloc_image_page(gfp_mask, safe_needed); - pbe = pbe->next; - } - create_pbe_list(pblist, nr_pages); - return pblist; -} - -/** - * Free pages we allocated for suspend. Suspend pages are alocated - * before atomic copy, so we need to free them after resume. + * Suspend pages are alocated before the atomic copy is made, so we + * need to release them after the resume. */ void swsusp_free(void) @@ -904,14 +813,18 @@ void swsusp_free(void) static int enough_free_mem(unsigned int nr_pages) { struct zone *zone; - unsigned int n = 0; + unsigned int free = 0, meta = 0; for_each_zone (zone) - if (!is_highmem(zone)) - n += zone->free_pages; - pr_debug("swsusp: available memory: %u pages\n", n); - return n > (nr_pages + PAGES_FOR_IO + - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); + if (!is_highmem(zone)) { + free += zone->free_pages; + meta += snapshot_additional_pages(zone); + } + + pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", + nr_pages, PAGES_FOR_IO, meta, free); + + return free > nr_pages + PAGES_FOR_IO + meta; } static int @@ -961,11 +874,6 @@ asmlinkage int swsusp_save(void) nr_pages = count_data_pages(); printk("swsusp: Need to copy %u pages\n", nr_pages); - pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n", - nr_pages, - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE, - PAGES_FOR_IO, nr_free_pages()); - if (!enough_free_mem(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free memory\n"); return -ENOMEM; @@ -1007,22 +915,19 @@ static void init_header(struct swsusp_info *info) } /** - * pack_addresses - the addresses corresponding to pfns found in the - * bitmap @bm are stored in the array @buf[] (1 page) + * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm + * are stored in the array @buf[] (1 page at a time) */ static inline void -pack_addresses(unsigned long *buf, struct memory_bitmap *bm) +pack_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { - unsigned long pfn = memory_bm_next_pfn(bm); - - if (unlikely(pfn == BM_END_OF_MAP)) + buf[j] = memory_bm_next_pfn(bm); + if (unlikely(buf[j] == BM_END_OF_MAP)) break; - - buf[j] = (unsigned long)page_address(pfn_to_page(pfn)); } } @@ -1068,7 +973,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) if (handle->prev < handle->cur) { if (handle->cur <= nr_meta_pages) { memset(buffer, 0, PAGE_SIZE); - pack_addresses(buffer, &orig_bm); + pack_pfns(buffer, &orig_bm); } else { unsigned long pfn = memory_bm_next_pfn(©_bm); @@ -1094,14 +999,10 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) * had been used before suspend */ -static int mark_unsafe_pages(struct pbe *pblist) +static int mark_unsafe_pages(struct memory_bitmap *bm) { struct zone *zone; unsigned long pfn, max_zone_pfn; - struct pbe *p; - - if (!pblist) /* a sanity check */ - return -EINVAL; /* Clear page flags */ for_each_zone (zone) { @@ -1111,30 +1012,37 @@ static int mark_unsafe_pages(struct pbe *pblist) ClearPageNosaveFree(pfn_to_page(pfn)); } - /* Mark orig addresses */ - for_each_pbe (p, pblist) { - if (virt_addr_valid(p->orig_address)) - SetPageNosaveFree(virt_to_page(p->orig_address)); - else - return -EFAULT; - } + /* Mark pages that correspond to the "original" pfns as "unsafe" */ + memory_bm_position_reset(bm); + do { + pfn = memory_bm_next_pfn(bm); + if (likely(pfn != BM_END_OF_MAP)) { + if (likely(pfn_valid(pfn))) + SetPageNosaveFree(pfn_to_page(pfn)); + else + return -EFAULT; + } + } while (pfn != BM_END_OF_MAP); - unsafe_pages = 0; + allocated_unsafe_pages = 0; return 0; } -static void copy_page_backup_list(struct pbe *dst, struct pbe *src) +static void +duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) { - /* We assume both lists contain the same number of elements */ - while (src) { - dst->orig_address = src->orig_address; - dst = dst->next; - src = src->next; + unsigned long pfn; + + memory_bm_position_reset(src); + pfn = memory_bm_next_pfn(src); + while (pfn != BM_END_OF_MAP) { + memory_bm_set_bit(dst, pfn); + pfn = memory_bm_next_pfn(src); } } -static int check_header(struct swsusp_info *info) +static inline int check_header(struct swsusp_info *info) { char *reason = NULL; @@ -1161,19 +1069,14 @@ static int check_header(struct swsusp_info *info) * load header - check the image header and copy data from it */ -static int load_header(struct snapshot_handle *handle, - struct swsusp_info *info) +static int +load_header(struct swsusp_info *info) { int error; - struct pbe *pblist; + restore_pblist = NULL; error = check_header(info); if (!error) { - pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, PG_ANY); - if (!pblist) - return -ENOMEM; - restore_pblist = pblist; - handle->pbe = pblist; nr_copy_pages = info->image_pages; nr_meta_pages = info->pages - info->image_pages - 1; } @@ -1181,108 +1084,137 @@ static int load_header(struct snapshot_handle *handle, } /** - * unpack_orig_addresses - copy the elements of @buf[] (1 page) to - * the PBEs in the list starting at @pbe + * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set + * the corresponding bit in the memory bitmap @bm */ -static inline struct pbe *unpack_orig_addresses(unsigned long *buf, - struct pbe *pbe) +static inline void +unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; - for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { - pbe->orig_address = buf[j]; - pbe = pbe->next; + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; + + memory_bm_set_bit(bm, buf[j]); } - return pbe; } /** - * prepare_image - use metadata contained in the PBE list - * pointed to by restore_pblist to mark the pages that will - * be overwritten in the process of restoring the system - * memory state from the image ("unsafe" pages) and allocate - * memory for the image + * prepare_image - use the memory bitmap @bm to mark the pages that will + * be overwritten in the process of restoring the system memory state + * from the suspend image ("unsafe" pages) and allocate memory for the + * image. * - * The idea is to allocate the PBE list first and then - * allocate as many pages as it's needed for the image data, - * but not to assign these pages to the PBEs initially. - * Instead, we just mark them as allocated and create a list - * of "safe" which will be used later + * The idea is to allocate a new memory bitmap first and then allocate + * as many pages as needed for the image data, but not to assign these + * pages to specific tasks initially. Instead, we just mark them as + * allocated and create a list of "safe" pages that will be used later. */ -static struct linked_page *safe_pages; +#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) + +static struct linked_page *safe_pages_list; -static int prepare_image(struct snapshot_handle *handle) +static int +prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { - int error = 0; - unsigned int nr_pages = nr_copy_pages; - struct pbe *p, *pblist = NULL; + unsigned int nr_pages; + struct linked_page *sp_list, *lp; + int error; - p = restore_pblist; - error = mark_unsafe_pages(p); - if (!error) { - pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, PG_SAFE); - if (pblist) - copy_page_backup_list(pblist, p); - free_pagedir(p, PG_UNSAFE_KEEP); - if (!pblist) + error = mark_unsafe_pages(bm); + if (error) + goto Free; + + error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); + if (error) + goto Free; + + duplicate_memory_bitmap(new_bm, bm); + memory_bm_free(bm, PG_UNSAFE_KEEP); + /* Reserve some safe pages for potential later use. + * + * NOTE: This way we make sure there will be enough safe pages for the + * chain_alloc() in get_buffer(). It is a bit wasteful, but + * nr_copy_pages cannot be greater than 50% of the memory anyway. + */ + sp_list = NULL; + /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ + nr_pages = nr_copy_pages - allocated_unsafe_pages; + nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); + while (nr_pages > 0) { + lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); + if (!lp) { error = -ENOMEM; + goto Free; + } + lp->next = sp_list; + sp_list = lp; + nr_pages--; } - safe_pages = NULL; - if (!error && nr_pages > unsafe_pages) { - nr_pages -= unsafe_pages; - while (nr_pages--) { - struct linked_page *ptr; - - ptr = (void *)get_zeroed_page(GFP_ATOMIC); - if (!ptr) { - error = -ENOMEM; - break; - } - if (!PageNosaveFree(virt_to_page(ptr))) { - /* The page is "safe", add it to the list */ - ptr->next = safe_pages; - safe_pages = ptr; - } - /* Mark the page as allocated */ - SetPageNosave(virt_to_page(ptr)); - SetPageNosaveFree(virt_to_page(ptr)); + /* Preallocate memory for the image */ + safe_pages_list = NULL; + nr_pages = nr_copy_pages - allocated_unsafe_pages; + while (nr_pages > 0) { + lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); + if (!lp) { + error = -ENOMEM; + goto Free; + } + if (!PageNosaveFree(virt_to_page(lp))) { + /* The page is "safe", add it to the list */ + lp->next = safe_pages_list; + safe_pages_list = lp; } + /* Mark the page as allocated */ + SetPageNosave(virt_to_page(lp)); + SetPageNosaveFree(virt_to_page(lp)); + nr_pages--; } - if (!error) { - restore_pblist = pblist; - } else { - handle->pbe = NULL; - swsusp_free(); + /* Free the reserved safe pages so that chain_alloc() can use them */ + while (sp_list) { + lp = sp_list->next; + free_image_page(sp_list, PG_UNSAFE_CLEAR); + sp_list = lp; } + return 0; + +Free: + swsusp_free(); return error; } -static void *get_buffer(struct snapshot_handle *handle) +/** + * get_buffer - compute the address that snapshot_write_next() should + * set for its caller to write to. + */ + +static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) { - struct pbe *pbe = handle->pbe, *last = handle->last_pbe; - struct page *page = virt_to_page(pbe->orig_address); + struct pbe *pbe; + struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); - if (PageNosave(page) && PageNosaveFree(page)) { - /* - * We have allocated the "original" page frame and we can - * use it directly to store the read page + if (PageNosave(page) && PageNosaveFree(page)) + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. */ - pbe->address = 0; - if (last && last->next) - last->next = NULL; - return (void *)pbe->orig_address; - } - /* - * The "original" page frame has not been allocated and we have to - * use a "safe" page frame to store the read page + return page_address(page); + + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. */ - pbe->address = (unsigned long)safe_pages; - safe_pages = safe_pages->next; - if (last) - last->next = pbe; - handle->last_pbe = pbe; + pbe = chain_alloc(ca, sizeof(struct pbe)); + if (!pbe) { + swsusp_free(); + return NULL; + } + pbe->orig_address = (unsigned long)page_address(page); + pbe->address = (unsigned long)safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->next = restore_pblist; + restore_pblist = pbe; return (void *)pbe->address; } @@ -1310,10 +1242,13 @@ static void *get_buffer(struct snapshot_handle *handle) int snapshot_write_next(struct snapshot_handle *handle, size_t count) { + static struct chain_allocator ca; int error = 0; + /* Check if we have already loaded the entire image */ if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) return 0; + if (!buffer) { /* This makes the buffer be freed by swsusp_free() */ buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); @@ -1324,26 +1259,32 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) handle->buffer = buffer; handle->sync_read = 1; if (handle->prev < handle->cur) { - if (!handle->prev) { - error = load_header(handle, - (struct swsusp_info *)buffer); + if (handle->prev == 0) { + error = load_header(buffer); + if (error) + return error; + + error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); if (error) return error; + } else if (handle->prev <= nr_meta_pages) { - handle->pbe = unpack_orig_addresses(buffer, - handle->pbe); - if (!handle->pbe) { - error = prepare_image(handle); + unpack_orig_pfns(buffer, ©_bm); + if (handle->prev == nr_meta_pages) { + error = prepare_image(&orig_bm, ©_bm); if (error) return error; - handle->pbe = restore_pblist; - handle->last_pbe = NULL; - handle->buffer = get_buffer(handle); + + chain_init(&ca, GFP_ATOMIC, PG_SAFE); + memory_bm_position_reset(&orig_bm); + restore_pblist = NULL; + handle->buffer = get_buffer(&orig_bm, &ca); handle->sync_read = 0; + if (!handle->buffer) + return -ENOMEM; } } else { - handle->pbe = handle->pbe->next; - handle->buffer = get_buffer(handle); + handle->buffer = get_buffer(&orig_bm, &ca); handle->sync_read = 0; } handle->prev = handle->cur; @@ -1362,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) int snapshot_image_loaded(struct snapshot_handle *handle) { - return !(!handle->pbe || handle->pbe->next || !nr_copy_pages || - handle->cur <= nr_meta_pages + nr_copy_pages); + return !(!nr_copy_pages || + handle->cur <= nr_meta_pages + nr_copy_pages); +} + +void snapshot_free_unused_memory(struct snapshot_handle *handle) +{ + /* Free only if we have loaded the image entirely */ + if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) + memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8309d20b2563..9b2ee5344dee 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -331,8 +331,7 @@ static int enough_swap(unsigned int nr_pages) unsigned int free_swap = count_swap_pages(root_swap, 1); pr_debug("swsusp: free swap pages: %u\n", free_swap); - return free_swap > (nr_pages + PAGES_FOR_IO + - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); + return free_swap > nr_pages + PAGES_FOR_IO; } /** @@ -547,6 +546,7 @@ static int load_image(struct swap_map_handle *handle, error = err2; if (!error) { printk("\b\b\b\bdone\n"); + snapshot_free_unused_memory(snapshot); if (!snapshot_image_loaded(snapshot)) error = -ENODATA; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 0ef5e4ba39e5..2e4499f3e4d9 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -193,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } + snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); error = device_suspend(PMSG_FREEZE); -- cgit v1.2.3-71-gd317 From c8eb8b4025175f967af0ba8e933f23aa9954dc35 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Sep 2006 23:32:56 -0700 Subject: [PATCH] PM: make it possible to disable console suspending Change suspend_console() so that it waits for all consoles to flush the remaining messages and make it possible to switch the console suspending off with the help of a Kconfig option. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Stefan Seyfried Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/console.h | 5 +++++ kernel/power/Kconfig | 11 +++++++++++ kernel/printk.c | 3 +++ 3 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 3bdf2155e565..76a1807726eb 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -120,9 +120,14 @@ extern void console_stop(struct console *); extern void console_start(struct console *); extern int is_console_locked(void); +#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); +#else +static inline void suspend_console(void) {} +static inline void resume_console(void) {} +#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ /* Some debug stub to catch some of the obvious races in the VT code */ #if 1 diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 619ecabf7c58..4b6e2f18e056 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -36,6 +36,17 @@ config PM_DEBUG code. This is helpful when debugging and reporting various PM bugs, like suspend support. +config DISABLE_CONSOLE_SUSPEND + bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" + depends on PM && PM_DEBUG + default n + ---help--- + This option turns off the console suspend mechanism that prevents + debug messages from reaching the console during the suspend/resume + operations. This may be helpful when debugging device drivers' + suspend/resume routines, but may itself lead to problems, for example + if netconsole is used. + config PM_TRACE bool "Suspend/resume event tracing" depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL diff --git a/kernel/printk.c b/kernel/printk.c index 1149365e989e..771f5e861bcd 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options) return 0; } +#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND /** * suspend_console - suspend the console subsystem * @@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options) */ void suspend_console(void) { + printk("Suspending console(s)\n"); acquire_console_sem(); console_suspended = 1; } @@ -737,6 +739,7 @@ void resume_console(void) console_suspended = 0; release_console_sem(); } +#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ /** * acquire_console_sem - lock the console system for exclusive use. -- cgit v1.2.3-71-gd317 From c5c6ba4e08ab9c9e390a0f3a7d9a5c332f5cc6ef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Sep 2006 23:32:58 -0700 Subject: [PATCH] PM: Add pm_trace switch Add the pm_trace attribute in /sys/power which has to be explicitly set to one to really enable the "PM tracing" code compiled in when CONFIG_PM_TRACE is set (which modifies the machine's CMOS clock in unpredictable ways). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/interface.txt | 15 +++++++++++++++ include/linux/resume-trace.h | 24 ++++++++++++++---------- kernel/power/main.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index 4117802af0f8..a66bec222b16 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -52,3 +52,18 @@ suspend image will be as small as possible. Reading from this file will display the current image size limit, which is set to 500 MB by default. + +/sys/power/pm_trace controls the code which saves the last PM event point in +the RTC across reboots, so that you can debug a machine that just hangs +during suspend (or more commonly, during resume). Namely, the RTC is only +used to save the last PM event point if this file contains '1'. Initially it +contains '0' which may be changed to '1' by writing a string representing a +nonzero integer into it. + +To use this debugging feature you should attempt to suspend the machine, then +reboot it and run + + dmesg -s 1000000 | grep 'hash matches' + +CAUTION: Using it will cause your machine's real-time (CMOS) clock to be +set to a random invalid time after a resume. diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index a376bd4ade39..81e9299ca148 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,21 +3,25 @@ #ifdef CONFIG_PM_TRACE +extern int pm_trace_enabled; + struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) set_trace_device(dev) -#define TRACE_RESUME(user) do { \ - void *tracedata; \ - asm volatile("movl $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.long %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ +#define TRACE_RESUME(user) do { \ + if (pm_trace_enabled) { \ + void *tracedata; \ + asm volatile("movl $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n" \ + "\t.long %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_resume_trace(tracedata, user); \ + } \ } while (0) #else diff --git a/kernel/power/main.c b/kernel/power/main.c index 4d403323a7bb..873228c71dab 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "power.h" @@ -281,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n power_attr(state); +#ifdef CONFIG_PM_TRACE +int pm_trace_enabled; + +static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf, "%d\n", pm_trace_enabled); +} + +static ssize_t +pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) +{ + int val; + + if (sscanf(buf, "%d", &val) == 1) { + pm_trace_enabled = !!val; + return n; + } + return -EINVAL; +} + +power_attr(pm_trace); + +static struct attribute * g[] = { + &state_attr.attr, + &pm_trace_attr.attr, + NULL, +}; +#else static struct attribute * g[] = { &state_attr.attr, NULL, }; +#endif /* CONFIG_PM_TRACE */ static struct attribute_group attr_group = { .attrs = g, -- cgit v1.2.3-71-gd317 From eb2a2fd91f7c8a53b15063d6f08cf22b9a56cbfb Mon Sep 17 00:00:00 2001 From: Krzysztof Halasa Date: Tue, 26 Sep 2006 23:23:45 +0200 Subject: [PATCH] Modularize generic HDLC This patch enables building of individual WAN protocol support routines (parts of generic HDLC) as separate modules. All protocol-private definitions are moved from hdlc.h file to protocol drivers. User-space interface and interface between generic HDLC and underlying low-level HDLC drivers are unchanged. Signed-off-by: Krzysztof Halasa Signed-off-by: Jeff Garzik --- drivers/net/wan/Kconfig | 12 +- drivers/net/wan/Makefile | 19 +- drivers/net/wan/hdlc.c | 368 ++++++++++++++++++++++++++++++++++++++ drivers/net/wan/hdlc_cisco.c | 198 ++++++++++++++------- drivers/net/wan/hdlc_fr.c | 389 +++++++++++++++++++++++++---------------- drivers/net/wan/hdlc_generic.c | 339 ----------------------------------- drivers/net/wan/hdlc_ppp.c | 77 ++++++-- drivers/net/wan/hdlc_raw.c | 50 +++++- drivers/net/wan/hdlc_raw_eth.c | 49 +++++- drivers/net/wan/hdlc_x25.c | 54 ++++-- include/linux/hdlc.h | 201 +++++---------------- include/linux/hdlc/ioctl.h | 33 ++++ 12 files changed, 1012 insertions(+), 777 deletions(-) create mode 100644 drivers/net/wan/hdlc.c delete mode 100644 drivers/net/wan/hdlc_generic.c (limited to 'include/linux') diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 54b8e492ef97..58b7efbb0750 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -154,7 +154,7 @@ config HDLC If unsure, say N. config HDLC_RAW - bool "Raw HDLC support" + tristate "Raw HDLC support" depends on HDLC help Generic HDLC driver supporting raw HDLC over WAN connections. @@ -162,7 +162,7 @@ config HDLC_RAW If unsure, say N. config HDLC_RAW_ETH - bool "Raw HDLC Ethernet device support" + tristate "Raw HDLC Ethernet device support" depends on HDLC help Generic HDLC driver supporting raw HDLC Ethernet device emulation @@ -173,7 +173,7 @@ config HDLC_RAW_ETH If unsure, say N. config HDLC_CISCO - bool "Cisco HDLC support" + tristate "Cisco HDLC support" depends on HDLC help Generic HDLC driver supporting Cisco HDLC over WAN connections. @@ -181,7 +181,7 @@ config HDLC_CISCO If unsure, say N. config HDLC_FR - bool "Frame Relay support" + tristate "Frame Relay support" depends on HDLC help Generic HDLC driver supporting Frame Relay over WAN connections. @@ -189,7 +189,7 @@ config HDLC_FR If unsure, say N. config HDLC_PPP - bool "Synchronous Point-to-Point Protocol (PPP) support" + tristate "Synchronous Point-to-Point Protocol (PPP) support" depends on HDLC help Generic HDLC driver supporting PPP over WAN connections. @@ -197,7 +197,7 @@ config HDLC_PPP If unsure, say N. config HDLC_X25 - bool "X.25 protocol support" + tristate "X.25 protocol support" depends on HDLC && (LAPB=m && HDLC=m || LAPB=y) help Generic HDLC driver supporting X.25 over WAN connections. diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 316ca6869d5e..83ec2c87ba3f 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -9,14 +9,13 @@ cyclomx-y := cycx_main.o cyclomx-$(CONFIG_CYCLOMX_X25) += cycx_x25.o cyclomx-objs := $(cyclomx-y) -hdlc-y := hdlc_generic.o -hdlc-$(CONFIG_HDLC_RAW) += hdlc_raw.o -hdlc-$(CONFIG_HDLC_RAW_ETH) += hdlc_raw_eth.o -hdlc-$(CONFIG_HDLC_CISCO) += hdlc_cisco.o -hdlc-$(CONFIG_HDLC_FR) += hdlc_fr.o -hdlc-$(CONFIG_HDLC_PPP) += hdlc_ppp.o -hdlc-$(CONFIG_HDLC_X25) += hdlc_x25.o -hdlc-objs := $(hdlc-y) +obj-$(CONFIG_HDLC) += hdlc.o +obj-$(CONFIG_HDLC_RAW) += hdlc_raw.o +obj-$(CONFIG_HDLC_RAW_ETH) += hdlc_raw_eth.o +obj-$(CONFIG_HDLC_CISCO) += hdlc_cisco.o +obj-$(CONFIG_HDLC_FR) += hdlc_fr.o +obj-$(CONFIG_HDLC_PPP) += hdlc_ppp.o syncppp.o +obj-$(CONFIG_HDLC_X25) += hdlc_x25.o pc300-y := pc300_drv.o pc300-$(CONFIG_PC300_MLPPP) += pc300_tty.o @@ -38,10 +37,6 @@ obj-$(CONFIG_CYCLADES_SYNC) += cycx_drv.o cyclomx.o obj-$(CONFIG_LAPBETHER) += lapbether.o obj-$(CONFIG_SBNI) += sbni.o obj-$(CONFIG_PC300) += pc300.o -obj-$(CONFIG_HDLC) += hdlc.o -ifeq ($(CONFIG_HDLC_PPP),y) - obj-$(CONFIG_HDLC) += syncppp.o -endif obj-$(CONFIG_N2) += n2.o obj-$(CONFIG_C101) += c101.o obj-$(CONFIG_WANXL) += wanxl.o diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c new file mode 100644 index 000000000000..db354e0edbe5 --- /dev/null +++ b/drivers/net/wan/hdlc.c @@ -0,0 +1,368 @@ +/* + * Generic HDLC support routines for Linux + * + * Copyright (C) 1999 - 2006 Krzysztof Halasa + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * Currently supported: + * * raw IP-in-HDLC + * * Cisco HDLC + * * Frame Relay with ANSI or CCITT LMI (both user and network side) + * * PPP + * * X.25 + * + * Use sethdlc utility to set line parameters, protocol and PVCs + * + * How does it work: + * - proto->open(), close(), start(), stop() calls are serialized. + * The order is: open, [ start, stop ... ] close ... + * - proto->start() and stop() are called with spin_lock_irq held. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static const char* version = "HDLC support module revision 1.20"; + +#undef DEBUG_LINK + +static struct hdlc_proto *first_proto = NULL; + + +static int hdlc_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + + + +static struct net_device_stats *hdlc_get_stats(struct net_device *dev) +{ + return hdlc_stats(dev); +} + + + +static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *p, struct net_device *orig_dev) +{ + struct hdlc_device_desc *desc = dev_to_desc(dev); + if (desc->netif_rx) + return desc->netif_rx(skb); + + desc->stats.rx_dropped++; /* Shouldn't happen */ + dev_kfree_skb(skb); + return NET_RX_DROP; +} + + + +static inline void hdlc_proto_start(struct net_device *dev) +{ + hdlc_device *hdlc = dev_to_hdlc(dev); + if (hdlc->proto->start) + return hdlc->proto->start(dev); +} + + + +static inline void hdlc_proto_stop(struct net_device *dev) +{ + hdlc_device *hdlc = dev_to_hdlc(dev); + if (hdlc->proto->stop) + return hdlc->proto->stop(dev); +} + + + +static int hdlc_device_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + hdlc_device *hdlc; + unsigned long flags; + int on; + + if (dev->get_stats != hdlc_get_stats) + return NOTIFY_DONE; /* not an HDLC device */ + + if (event != NETDEV_CHANGE) + return NOTIFY_DONE; /* Only interrested in carrier changes */ + + on = netif_carrier_ok(dev); + +#ifdef DEBUG_LINK + printk(KERN_DEBUG "%s: hdlc_device_event NETDEV_CHANGE, carrier %i\n", + dev->name, on); +#endif + + hdlc = dev_to_hdlc(dev); + spin_lock_irqsave(&hdlc->state_lock, flags); + + if (hdlc->carrier == on) + goto carrier_exit; /* no change in DCD line level */ + + hdlc->carrier = on; + + if (!hdlc->open) + goto carrier_exit; + + if (hdlc->carrier) { + printk(KERN_INFO "%s: Carrier detected\n", dev->name); + hdlc_proto_start(dev); + } else { + printk(KERN_INFO "%s: Carrier lost\n", dev->name); + hdlc_proto_stop(dev); + } + +carrier_exit: + spin_unlock_irqrestore(&hdlc->state_lock, flags); + return NOTIFY_DONE; +} + + + +/* Must be called by hardware driver when HDLC device is being opened */ +int hdlc_open(struct net_device *dev) +{ + hdlc_device *hdlc = dev_to_hdlc(dev); +#ifdef DEBUG_LINK + printk(KERN_DEBUG "%s: hdlc_open() carrier %i open %i\n", dev->name, + hdlc->carrier, hdlc->open); +#endif + + if (hdlc->proto == NULL) + return -ENOSYS; /* no protocol attached */ + + if (hdlc->proto->open) { + int result = hdlc->proto->open(dev); + if (result) + return result; + } + + spin_lock_irq(&hdlc->state_lock); + + if (hdlc->carrier) { + printk(KERN_INFO "%s: Carrier detected\n", dev->name); + hdlc_proto_start(dev); + } else + printk(KERN_INFO "%s: No carrier\n", dev->name); + + hdlc->open = 1; + + spin_unlock_irq(&hdlc->state_lock); + return 0; +} + + + +/* Must be called by hardware driver when HDLC device is being closed */ +void hdlc_close(struct net_device *dev) +{ + hdlc_device *hdlc = dev_to_hdlc(dev); +#ifdef DEBUG_LINK + printk(KERN_DEBUG "%s: hdlc_close() carrier %i open %i\n", dev->name, + hdlc->carrier, hdlc->open); +#endif + + spin_lock_irq(&hdlc->state_lock); + + hdlc->open = 0; + if (hdlc->carrier) + hdlc_proto_stop(dev); + + spin_unlock_irq(&hdlc->state_lock); + + if (hdlc->proto->close) + hdlc->proto->close(dev); +} + + + +int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct hdlc_proto *proto = first_proto; + int result; + + if (cmd != SIOCWANDEV) + return -EINVAL; + + if (dev_to_hdlc(dev)->proto) { + result = dev_to_hdlc(dev)->proto->ioctl(dev, ifr); + if (result != -EINVAL) + return result; + } + + /* Not handled by currently attached protocol (if any) */ + + while (proto) { + if ((result = proto->ioctl(dev, ifr)) != -EINVAL) + return result; + proto = proto->next; + } + return -EINVAL; +} + +void hdlc_setup(struct net_device *dev) +{ + hdlc_device *hdlc = dev_to_hdlc(dev); + + dev->get_stats = hdlc_get_stats; + dev->change_mtu = hdlc_change_mtu; + dev->mtu = HDLC_MAX_MTU; + + dev->type = ARPHRD_RAWHDLC; + dev->hard_header_len = 16; + + dev->flags = IFF_POINTOPOINT | IFF_NOARP; + + hdlc->carrier = 1; + hdlc->open = 0; + spin_lock_init(&hdlc->state_lock); +} + +struct net_device *alloc_hdlcdev(void *priv) +{ + struct net_device *dev; + dev = alloc_netdev(sizeof(struct hdlc_device_desc) + + sizeof(hdlc_device), "hdlc%d", hdlc_setup); + if (dev) + dev_to_hdlc(dev)->priv = priv; + return dev; +} + +void unregister_hdlc_device(struct net_device *dev) +{ + rtnl_lock(); + unregister_netdevice(dev); + detach_hdlc_protocol(dev); + rtnl_unlock(); +} + + + +int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, + int (*rx)(struct sk_buff *skb), size_t size) +{ + detach_hdlc_protocol(dev); + + if (!try_module_get(proto->module)) + return -ENOSYS; + + if (size) + if ((dev_to_hdlc(dev)->state = kmalloc(size, + GFP_KERNEL)) == NULL) { + printk(KERN_WARNING "Memory squeeze on" + " hdlc_proto_attach()\n"); + module_put(proto->module); + return -ENOBUFS; + } + dev_to_hdlc(dev)->proto = proto; + dev_to_desc(dev)->netif_rx = rx; + return 0; +} + + +void detach_hdlc_protocol(struct net_device *dev) +{ + hdlc_device *hdlc = dev_to_hdlc(dev); + + if (hdlc->proto) { + if (hdlc->proto->detach) + hdlc->proto->detach(dev); + module_put(hdlc->proto->module); + hdlc->proto = NULL; + } + kfree(hdlc->state); + hdlc->state = NULL; +} + + +void register_hdlc_protocol(struct hdlc_proto *proto) +{ + proto->next = first_proto; + first_proto = proto; +} + + +void unregister_hdlc_protocol(struct hdlc_proto *proto) +{ + struct hdlc_proto **p = &first_proto; + while (*p) { + if (*p == proto) { + *p = proto->next; + return; + } + p = &((*p)->next); + } +} + + + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("HDLC support module"); +MODULE_LICENSE("GPL v2"); + +EXPORT_SYMBOL(hdlc_open); +EXPORT_SYMBOL(hdlc_close); +EXPORT_SYMBOL(hdlc_ioctl); +EXPORT_SYMBOL(hdlc_setup); +EXPORT_SYMBOL(alloc_hdlcdev); +EXPORT_SYMBOL(unregister_hdlc_device); +EXPORT_SYMBOL(register_hdlc_protocol); +EXPORT_SYMBOL(unregister_hdlc_protocol); +EXPORT_SYMBOL(attach_hdlc_protocol); +EXPORT_SYMBOL(detach_hdlc_protocol); + +static struct packet_type hdlc_packet_type = { + .type = __constant_htons(ETH_P_HDLC), + .func = hdlc_rcv, +}; + + +static struct notifier_block hdlc_notifier = { + .notifier_call = hdlc_device_event, +}; + + +static int __init hdlc_module_init(void) +{ + int result; + + printk(KERN_INFO "%s\n", version); + if ((result = register_netdevice_notifier(&hdlc_notifier)) != 0) + return result; + dev_add_pack(&hdlc_packet_type); + return 0; +} + + + +static void __exit hdlc_module_exit(void) +{ + dev_remove_pack(&hdlc_packet_type); + unregister_netdevice_notifier(&hdlc_notifier); +} + + +module_init(hdlc_module_init); +module_exit(hdlc_module_exit); diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index f289daba0c7b..7ec2b2f9b7ee 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -2,7 +2,7 @@ * Generic HDLC support routines for Linux * Cisco HDLC support * - * Copyright (C) 2000 - 2003 Krzysztof Halasa + * Copyright (C) 2000 - 2006 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -34,17 +34,56 @@ #define CISCO_KEEPALIVE_REQ 2 /* Cisco keepalive request */ +struct hdlc_header { + u8 address; + u8 control; + u16 protocol; +}__attribute__ ((packed)); + + +struct cisco_packet { + u32 type; /* code */ + u32 par1; + u32 par2; + u16 rel; /* reliability */ + u32 time; +}__attribute__ ((packed)); +#define CISCO_PACKET_LEN 18 +#define CISCO_BIG_PACKET_LEN 20 + + +struct cisco_state { + cisco_proto settings; + + struct timer_list timer; + unsigned long last_poll; + int up; + int request_sent; + u32 txseq; /* TX sequence number */ + u32 rxseq; /* RX sequence number */ +}; + + +static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr); + + +static inline struct cisco_state * state(hdlc_device *hdlc) +{ + return(struct cisco_state *)(hdlc->state); +} + + static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev, u16 type, void *daddr, void *saddr, unsigned int len) { - hdlc_header *data; + struct hdlc_header *data; #ifdef DEBUG_HARD_HEADER printk(KERN_DEBUG "%s: cisco_hard_header called\n", dev->name); #endif - skb_push(skb, sizeof(hdlc_header)); - data = (hdlc_header*)skb->data; + skb_push(skb, sizeof(struct hdlc_header)); + data = (struct hdlc_header*)skb->data; if (type == CISCO_KEEPALIVE) data->address = CISCO_MULTICAST; else @@ -52,7 +91,7 @@ static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev, data->control = 0; data->protocol = htons(type); - return sizeof(hdlc_header); + return sizeof(struct hdlc_header); } @@ -61,9 +100,10 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, u32 par1, u32 par2) { struct sk_buff *skb; - cisco_packet *data; + struct cisco_packet *data; - skb = dev_alloc_skb(sizeof(hdlc_header) + sizeof(cisco_packet)); + skb = dev_alloc_skb(sizeof(struct hdlc_header) + + sizeof(struct cisco_packet)); if (!skb) { printk(KERN_WARNING "%s: Memory squeeze on cisco_keepalive_send()\n", @@ -72,7 +112,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, } skb_reserve(skb, 4); cisco_hard_header(skb, dev, CISCO_KEEPALIVE, NULL, NULL, 0); - data = (cisco_packet*)(skb->data + 4); + data = (struct cisco_packet*)(skb->data + 4); data->type = htonl(type); data->par1 = htonl(par1); @@ -81,7 +121,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, /* we will need do_div here if 1000 % HZ != 0 */ data->time = htonl((jiffies - INITIAL_JIFFIES) * (1000 / HZ)); - skb_put(skb, sizeof(cisco_packet)); + skb_put(skb, sizeof(struct cisco_packet)); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; skb->nh.raw = skb->data; @@ -93,9 +133,9 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev) { - hdlc_header *data = (hdlc_header*)skb->data; + struct hdlc_header *data = (struct hdlc_header*)skb->data; - if (skb->len < sizeof(hdlc_header)) + if (skb->len < sizeof(struct hdlc_header)) return __constant_htons(ETH_P_HDLC); if (data->address != CISCO_MULTICAST && @@ -106,7 +146,7 @@ static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev) case __constant_htons(ETH_P_IP): case __constant_htons(ETH_P_IPX): case __constant_htons(ETH_P_IPV6): - skb_pull(skb, sizeof(hdlc_header)); + skb_pull(skb, sizeof(struct hdlc_header)); return data->protocol; default: return __constant_htons(ETH_P_HDLC); @@ -118,12 +158,12 @@ static int cisco_rx(struct sk_buff *skb) { struct net_device *dev = skb->dev; hdlc_device *hdlc = dev_to_hdlc(dev); - hdlc_header *data = (hdlc_header*)skb->data; - cisco_packet *cisco_data; + struct hdlc_header *data = (struct hdlc_header*)skb->data; + struct cisco_packet *cisco_data; struct in_device *in_dev; u32 addr, mask; - if (skb->len < sizeof(hdlc_header)) + if (skb->len < sizeof(struct hdlc_header)) goto rx_error; if (data->address != CISCO_MULTICAST && @@ -137,15 +177,17 @@ static int cisco_rx(struct sk_buff *skb) return NET_RX_SUCCESS; case CISCO_KEEPALIVE: - if (skb->len != sizeof(hdlc_header) + CISCO_PACKET_LEN && - skb->len != sizeof(hdlc_header) + CISCO_BIG_PACKET_LEN) { - printk(KERN_INFO "%s: Invalid length of Cisco " - "control packet (%d bytes)\n", - dev->name, skb->len); + if ((skb->len != sizeof(struct hdlc_header) + + CISCO_PACKET_LEN) && + (skb->len != sizeof(struct hdlc_header) + + CISCO_BIG_PACKET_LEN)) { + printk(KERN_INFO "%s: Invalid length of Cisco control" + " packet (%d bytes)\n", dev->name, skb->len); goto rx_error; } - cisco_data = (cisco_packet*)(skb->data + sizeof(hdlc_header)); + cisco_data = (struct cisco_packet*)(skb->data + sizeof + (struct hdlc_header)); switch(ntohl (cisco_data->type)) { case CISCO_ADDR_REQ: /* Stolen from syncppp.c :-) */ @@ -178,11 +220,11 @@ static int cisco_rx(struct sk_buff *skb) goto rx_error; case CISCO_KEEPALIVE_REQ: - hdlc->state.cisco.rxseq = ntohl(cisco_data->par1); - if (hdlc->state.cisco.request_sent && - ntohl(cisco_data->par2)==hdlc->state.cisco.txseq) { - hdlc->state.cisco.last_poll = jiffies; - if (!hdlc->state.cisco.up) { + state(hdlc)->rxseq = ntohl(cisco_data->par1); + if (state(hdlc)->request_sent && + ntohl(cisco_data->par2) == state(hdlc)->txseq) { + state(hdlc)->last_poll = jiffies; + if (!state(hdlc)->up) { u32 sec, min, hrs, days; sec = ntohl(cisco_data->time) / 1000; min = sec / 60; sec -= min * 60; @@ -193,7 +235,7 @@ static int cisco_rx(struct sk_buff *skb) dev->name, days, hrs, min, sec); netif_dormant_off(dev); - hdlc->state.cisco.up = 1; + state(hdlc)->up = 1; } } @@ -208,7 +250,7 @@ static int cisco_rx(struct sk_buff *skb) return NET_RX_DROP; rx_error: - hdlc->stats.rx_errors++; /* Mark error */ + dev_to_desc(dev)->stats.rx_errors++; /* Mark error */ dev_kfree_skb_any(skb); return NET_RX_DROP; } @@ -220,23 +262,22 @@ static void cisco_timer(unsigned long arg) struct net_device *dev = (struct net_device *)arg; hdlc_device *hdlc = dev_to_hdlc(dev); - if (hdlc->state.cisco.up && - time_after(jiffies, hdlc->state.cisco.last_poll + - hdlc->state.cisco.settings.timeout * HZ)) { - hdlc->state.cisco.up = 0; + if (state(hdlc)->up && + time_after(jiffies, state(hdlc)->last_poll + + state(hdlc)->settings.timeout * HZ)) { + state(hdlc)->up = 0; printk(KERN_INFO "%s: Link down\n", dev->name); netif_dormant_on(dev); } - cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ, - ++hdlc->state.cisco.txseq, - hdlc->state.cisco.rxseq); - hdlc->state.cisco.request_sent = 1; - hdlc->state.cisco.timer.expires = jiffies + - hdlc->state.cisco.settings.interval * HZ; - hdlc->state.cisco.timer.function = cisco_timer; - hdlc->state.cisco.timer.data = arg; - add_timer(&hdlc->state.cisco.timer); + cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ, ++state(hdlc)->txseq, + state(hdlc)->rxseq); + state(hdlc)->request_sent = 1; + state(hdlc)->timer.expires = jiffies + + state(hdlc)->settings.interval * HZ; + state(hdlc)->timer.function = cisco_timer; + state(hdlc)->timer.data = arg; + add_timer(&state(hdlc)->timer); } @@ -244,15 +285,15 @@ static void cisco_timer(unsigned long arg) static void cisco_start(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - hdlc->state.cisco.up = 0; - hdlc->state.cisco.request_sent = 0; - hdlc->state.cisco.txseq = hdlc->state.cisco.rxseq = 0; - - init_timer(&hdlc->state.cisco.timer); - hdlc->state.cisco.timer.expires = jiffies + HZ; /*First poll after 1s*/ - hdlc->state.cisco.timer.function = cisco_timer; - hdlc->state.cisco.timer.data = (unsigned long)dev; - add_timer(&hdlc->state.cisco.timer); + state(hdlc)->up = 0; + state(hdlc)->request_sent = 0; + state(hdlc)->txseq = state(hdlc)->rxseq = 0; + + init_timer(&state(hdlc)->timer); + state(hdlc)->timer.expires = jiffies + HZ; /*First poll after 1s*/ + state(hdlc)->timer.function = cisco_timer; + state(hdlc)->timer.data = (unsigned long)dev; + add_timer(&state(hdlc)->timer); } @@ -260,15 +301,24 @@ static void cisco_start(struct net_device *dev) static void cisco_stop(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - del_timer_sync(&hdlc->state.cisco.timer); + del_timer_sync(&state(hdlc)->timer); netif_dormant_on(dev); - hdlc->state.cisco.up = 0; - hdlc->state.cisco.request_sent = 0; + state(hdlc)->up = 0; + state(hdlc)->request_sent = 0; } -int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr) +static struct hdlc_proto proto = { + .start = cisco_start, + .stop = cisco_stop, + .type_trans = cisco_type_trans, + .ioctl = cisco_ioctl, + .module = THIS_MODULE, +}; + + +static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr) { cisco_proto __user *cisco_s = ifr->ifr_settings.ifs_ifsu.cisco; const size_t size = sizeof(cisco_proto); @@ -278,12 +328,14 @@ int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr) switch (ifr->ifr_settings.type) { case IF_GET_PROTO: + if (dev_to_hdlc(dev)->proto != &proto) + return -EINVAL; ifr->ifr_settings.type = IF_PROTO_CISCO; if (ifr->ifr_settings.size < size) { ifr->ifr_settings.size = size; /* data size wanted */ return -ENOBUFS; } - if (copy_to_user(cisco_s, &hdlc->state.cisco.settings, size)) + if (copy_to_user(cisco_s, &state(hdlc)->settings, size)) return -EFAULT; return 0; @@ -302,19 +354,15 @@ int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; result=hdlc->attach(dev, ENCODING_NRZ,PARITY_CRC16_PR1_CCITT); - if (result) return result; - hdlc_proto_detach(hdlc); - memcpy(&hdlc->state.cisco.settings, &new_settings, size); - memset(&hdlc->proto, 0, sizeof(hdlc->proto)); + result = attach_hdlc_protocol(dev, &proto, cisco_rx, + sizeof(struct cisco_state)); + if (result) + return result; - hdlc->proto.start = cisco_start; - hdlc->proto.stop = cisco_stop; - hdlc->proto.netif_rx = cisco_rx; - hdlc->proto.type_trans = cisco_type_trans; - hdlc->proto.id = IF_PROTO_CISCO; + memcpy(&state(hdlc)->settings, &new_settings, size); dev->hard_start_xmit = hdlc->xmit; dev->hard_header = cisco_hard_header; dev->hard_header_cache = NULL; @@ -327,3 +375,25 @@ int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } + + +static int __init mod_init(void) +{ + register_hdlc_protocol(&proto); + return 0; +} + + + +static void __exit mod_exit(void) +{ + unregister_hdlc_protocol(&proto); +} + + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("Cisco HDLC protocol support for generic HDLC"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 7bb737bbdeb9..b45ab680d2d6 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -2,7 +2,7 @@ * Generic HDLC support routines for Linux * Frame Relay support * - * Copyright (C) 1999 - 2005 Krzysztof Halasa + * Copyright (C) 1999 - 2006 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -52,6 +52,8 @@ #undef DEBUG_PKT #undef DEBUG_ECN #undef DEBUG_LINK +#undef DEBUG_PROTO +#undef DEBUG_PVC #define FR_UI 0x03 #define FR_PAD 0x00 @@ -115,13 +117,53 @@ typedef struct { }__attribute__ ((packed)) fr_hdr; +typedef struct pvc_device_struct { + struct net_device *frad; + struct net_device *main; + struct net_device *ether; /* bridged Ethernet interface */ + struct pvc_device_struct *next; /* Sorted in ascending DLCI order */ + int dlci; + int open_count; + + struct { + unsigned int new: 1; + unsigned int active: 1; + unsigned int exist: 1; + unsigned int deleted: 1; + unsigned int fecn: 1; + unsigned int becn: 1; + unsigned int bandwidth; /* Cisco LMI reporting only */ + }state; +}pvc_device; + + +struct frad_state { + fr_proto settings; + pvc_device *first_pvc; + int dce_pvc_count; + + struct timer_list timer; + unsigned long last_poll; + int reliable; + int dce_changed; + int request; + int fullrep_sent; + u32 last_errors; /* last errors bit list */ + u8 n391cnt; + u8 txseq; /* TX sequence number */ + u8 rxseq; /* RX sequence number */ +}; + + +static int fr_ioctl(struct net_device *dev, struct ifreq *ifr); + + static inline u16 q922_to_dlci(u8 *hdr) { return ((hdr[0] & 0xFC) << 2) | ((hdr[1] & 0xF0) >> 4); } - static inline void dlci_to_q922(u8 *hdr, u16 dlci) { hdr[0] = (dlci >> 2) & 0xFC; @@ -129,10 +171,21 @@ static inline void dlci_to_q922(u8 *hdr, u16 dlci) } +static inline struct frad_state * state(hdlc_device *hdlc) +{ + return(struct frad_state *)(hdlc->state); +} + + +static __inline__ pvc_device* dev_to_pvc(struct net_device *dev) +{ + return dev->priv; +} + static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci) { - pvc_device *pvc = hdlc->state.fr.first_pvc; + pvc_device *pvc = state(hdlc)->first_pvc; while (pvc) { if (pvc->dlci == dlci) @@ -146,10 +199,10 @@ static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci) } -static inline pvc_device* add_pvc(struct net_device *dev, u16 dlci) +static pvc_device* add_pvc(struct net_device *dev, u16 dlci) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc, **pvc_p = &hdlc->state.fr.first_pvc; + pvc_device *pvc, **pvc_p = &state(hdlc)->first_pvc; while (*pvc_p) { if ((*pvc_p)->dlci == dlci) @@ -160,12 +213,15 @@ static inline pvc_device* add_pvc(struct net_device *dev, u16 dlci) } pvc = kmalloc(sizeof(pvc_device), GFP_ATOMIC); +#ifdef DEBUG_PVC + printk(KERN_DEBUG "add_pvc: allocated pvc %p, frad %p\n", pvc, dev); +#endif if (!pvc) return NULL; memset(pvc, 0, sizeof(pvc_device)); pvc->dlci = dlci; - pvc->master = dev; + pvc->frad = dev; pvc->next = *pvc_p; /* Put it in the chain */ *pvc_p = pvc; return pvc; @@ -174,7 +230,7 @@ static inline pvc_device* add_pvc(struct net_device *dev, u16 dlci) static inline int pvc_is_used(pvc_device *pvc) { - return pvc->main != NULL || pvc->ether != NULL; + return pvc->main || pvc->ether; } @@ -200,11 +256,14 @@ static inline void pvc_carrier(int on, pvc_device *pvc) static inline void delete_unused_pvcs(hdlc_device *hdlc) { - pvc_device **pvc_p = &hdlc->state.fr.first_pvc; + pvc_device **pvc_p = &state(hdlc)->first_pvc; while (*pvc_p) { if (!pvc_is_used(*pvc_p)) { pvc_device *pvc = *pvc_p; +#ifdef DEBUG_PVC + printk(KERN_DEBUG "freeing unused pvc: %p\n", pvc); +#endif *pvc_p = pvc->next; kfree(pvc); continue; @@ -295,16 +354,16 @@ static int pvc_open(struct net_device *dev) { pvc_device *pvc = dev_to_pvc(dev); - if ((pvc->master->flags & IFF_UP) == 0) - return -EIO; /* Master must be UP in order to activate PVC */ + if ((pvc->frad->flags & IFF_UP) == 0) + return -EIO; /* Frad must be UP in order to activate PVC */ if (pvc->open_count++ == 0) { - hdlc_device *hdlc = dev_to_hdlc(pvc->master); - if (hdlc->state.fr.settings.lmi == LMI_NONE) - pvc->state.active = netif_carrier_ok(pvc->master); + hdlc_device *hdlc = dev_to_hdlc(pvc->frad); + if (state(hdlc)->settings.lmi == LMI_NONE) + pvc->state.active = netif_carrier_ok(pvc->frad); pvc_carrier(pvc->state.active, pvc); - hdlc->state.fr.dce_changed = 1; + state(hdlc)->dce_changed = 1; } return 0; } @@ -316,12 +375,12 @@ static int pvc_close(struct net_device *dev) pvc_device *pvc = dev_to_pvc(dev); if (--pvc->open_count == 0) { - hdlc_device *hdlc = dev_to_hdlc(pvc->master); - if (hdlc->state.fr.settings.lmi == LMI_NONE) + hdlc_device *hdlc = dev_to_hdlc(pvc->frad); + if (state(hdlc)->settings.lmi == LMI_NONE) pvc->state.active = 0; - if (hdlc->state.fr.settings.dce) { - hdlc->state.fr.dce_changed = 1; + if (state(hdlc)->settings.dce) { + state(hdlc)->dce_changed = 1; pvc->state.active = 0; } } @@ -348,7 +407,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } info.dlci = pvc->dlci; - memcpy(info.master, pvc->master->name, IFNAMSIZ); + memcpy(info.master, pvc->frad->name, IFNAMSIZ); if (copy_to_user(ifr->ifr_settings.ifs_ifsu.fr_pvc_info, &info, sizeof(info))) return -EFAULT; @@ -361,7 +420,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static inline struct net_device_stats *pvc_get_stats(struct net_device *dev) { - return netdev_priv(dev); + return &dev_to_desc(dev)->stats; } @@ -393,7 +452,7 @@ static int pvc_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_packets++; if (pvc->state.fecn) /* TX Congestion counter */ stats->tx_compressed++; - skb->dev = pvc->master; + skb->dev = pvc->frad; dev_queue_xmit(skb); return 0; } @@ -419,7 +478,7 @@ static int pvc_change_mtu(struct net_device *dev, int new_mtu) static inline void fr_log_dlci_active(pvc_device *pvc) { printk(KERN_INFO "%s: DLCI %d [%s%s%s]%s %s\n", - pvc->master->name, + pvc->frad->name, pvc->dlci, pvc->main ? pvc->main->name : "", pvc->main && pvc->ether ? " " : "", @@ -438,21 +497,20 @@ static inline u8 fr_lmi_nextseq(u8 x) } - static void fr_lmi_send(struct net_device *dev, int fullrep) { hdlc_device *hdlc = dev_to_hdlc(dev); struct sk_buff *skb; - pvc_device *pvc = hdlc->state.fr.first_pvc; - int lmi = hdlc->state.fr.settings.lmi; - int dce = hdlc->state.fr.settings.dce; + pvc_device *pvc = state(hdlc)->first_pvc; + int lmi = state(hdlc)->settings.lmi; + int dce = state(hdlc)->settings.dce; int len = lmi == LMI_ANSI ? LMI_ANSI_LENGTH : LMI_CCITT_CISCO_LENGTH; int stat_len = (lmi == LMI_CISCO) ? 6 : 3; u8 *data; int i = 0; if (dce && fullrep) { - len += hdlc->state.fr.dce_pvc_count * (2 + stat_len); + len += state(hdlc)->dce_pvc_count * (2 + stat_len); if (len > HDLC_MAX_MRU) { printk(KERN_WARNING "%s: Too many PVCs while sending " "LMI full report\n", dev->name); @@ -486,8 +544,9 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) data[i++] = fullrep ? LMI_FULLREP : LMI_INTEGRITY; data[i++] = lmi == LMI_CCITT ? LMI_CCITT_ALIVE : LMI_ANSI_CISCO_ALIVE; data[i++] = LMI_INTEG_LEN; - data[i++] = hdlc->state.fr.txseq =fr_lmi_nextseq(hdlc->state.fr.txseq); - data[i++] = hdlc->state.fr.rxseq; + data[i++] = state(hdlc)->txseq = + fr_lmi_nextseq(state(hdlc)->txseq); + data[i++] = state(hdlc)->rxseq; if (dce && fullrep) { while (pvc) { @@ -496,7 +555,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) data[i++] = stat_len; /* LMI start/restart */ - if (hdlc->state.fr.reliable && !pvc->state.exist) { + if (state(hdlc)->reliable && !pvc->state.exist) { pvc->state.exist = pvc->state.new = 1; fr_log_dlci_active(pvc); } @@ -541,15 +600,15 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) static void fr_set_link_state(int reliable, struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc = hdlc->state.fr.first_pvc; + pvc_device *pvc = state(hdlc)->first_pvc; - hdlc->state.fr.reliable = reliable; + state(hdlc)->reliable = reliable; if (reliable) { netif_dormant_off(dev); - hdlc->state.fr.n391cnt = 0; /* Request full status */ - hdlc->state.fr.dce_changed = 1; + state(hdlc)->n391cnt = 0; /* Request full status */ + state(hdlc)->dce_changed = 1; - if (hdlc->state.fr.settings.lmi == LMI_NONE) { + if (state(hdlc)->settings.lmi == LMI_NONE) { while (pvc) { /* Activate all PVCs */ pvc_carrier(1, pvc); pvc->state.exist = pvc->state.active = 1; @@ -563,7 +622,7 @@ static void fr_set_link_state(int reliable, struct net_device *dev) pvc_carrier(0, pvc); pvc->state.exist = pvc->state.active = 0; pvc->state.new = 0; - if (!hdlc->state.fr.settings.dce) + if (!state(hdlc)->settings.dce) pvc->state.bandwidth = 0; pvc = pvc->next; } @@ -571,7 +630,6 @@ static void fr_set_link_state(int reliable, struct net_device *dev) } - static void fr_timer(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; @@ -579,62 +637,61 @@ static void fr_timer(unsigned long arg) int i, cnt = 0, reliable; u32 list; - if (hdlc->state.fr.settings.dce) { - reliable = hdlc->state.fr.request && - time_before(jiffies, hdlc->state.fr.last_poll + - hdlc->state.fr.settings.t392 * HZ); - hdlc->state.fr.request = 0; + if (state(hdlc)->settings.dce) { + reliable = state(hdlc)->request && + time_before(jiffies, state(hdlc)->last_poll + + state(hdlc)->settings.t392 * HZ); + state(hdlc)->request = 0; } else { - hdlc->state.fr.last_errors <<= 1; /* Shift the list */ - if (hdlc->state.fr.request) { - if (hdlc->state.fr.reliable) + state(hdlc)->last_errors <<= 1; /* Shift the list */ + if (state(hdlc)->request) { + if (state(hdlc)->reliable) printk(KERN_INFO "%s: No LMI status reply " "received\n", dev->name); - hdlc->state.fr.last_errors |= 1; + state(hdlc)->last_errors |= 1; } - list = hdlc->state.fr.last_errors; - for (i = 0; i < hdlc->state.fr.settings.n393; i++, list >>= 1) + list = state(hdlc)->last_errors; + for (i = 0; i < state(hdlc)->settings.n393; i++, list >>= 1) cnt += (list & 1); /* errors count */ - reliable = (cnt < hdlc->state.fr.settings.n392); + reliable = (cnt < state(hdlc)->settings.n392); } - if (hdlc->state.fr.reliable != reliable) { + if (state(hdlc)->reliable != reliable) { printk(KERN_INFO "%s: Link %sreliable\n", dev->name, reliable ? "" : "un"); fr_set_link_state(reliable, dev); } - if (hdlc->state.fr.settings.dce) - hdlc->state.fr.timer.expires = jiffies + - hdlc->state.fr.settings.t392 * HZ; + if (state(hdlc)->settings.dce) + state(hdlc)->timer.expires = jiffies + + state(hdlc)->settings.t392 * HZ; else { - if (hdlc->state.fr.n391cnt) - hdlc->state.fr.n391cnt--; + if (state(hdlc)->n391cnt) + state(hdlc)->n391cnt--; - fr_lmi_send(dev, hdlc->state.fr.n391cnt == 0); + fr_lmi_send(dev, state(hdlc)->n391cnt == 0); - hdlc->state.fr.last_poll = jiffies; - hdlc->state.fr.request = 1; - hdlc->state.fr.timer.expires = jiffies + - hdlc->state.fr.settings.t391 * HZ; + state(hdlc)->last_poll = jiffies; + state(hdlc)->request = 1; + state(hdlc)->timer.expires = jiffies + + state(hdlc)->settings.t391 * HZ; } - hdlc->state.fr.timer.function = fr_timer; - hdlc->state.fr.timer.data = arg; - add_timer(&hdlc->state.fr.timer); + state(hdlc)->timer.function = fr_timer; + state(hdlc)->timer.data = arg; + add_timer(&state(hdlc)->timer); } - static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) { hdlc_device *hdlc = dev_to_hdlc(dev); pvc_device *pvc; u8 rxseq, txseq; - int lmi = hdlc->state.fr.settings.lmi; - int dce = hdlc->state.fr.settings.dce; + int lmi = state(hdlc)->settings.lmi; + int dce = state(hdlc)->settings.dce; int stat_len = (lmi == LMI_CISCO) ? 6 : 3, reptype, error, no_ram, i; if (skb->len < (lmi == LMI_ANSI ? LMI_ANSI_LENGTH : @@ -645,8 +702,8 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) if (skb->data[3] != (lmi == LMI_CISCO ? NLPID_CISCO_LMI : NLPID_CCITT_ANSI_LMI)) { - printk(KERN_INFO "%s: Received non-LMI frame with LMI" - " DLCI\n", dev->name); + printk(KERN_INFO "%s: Received non-LMI frame with LMI DLCI\n", + dev->name); return 1; } @@ -706,53 +763,53 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) } i++; - hdlc->state.fr.rxseq = skb->data[i++]; /* TX sequence from peer */ + state(hdlc)->rxseq = skb->data[i++]; /* TX sequence from peer */ rxseq = skb->data[i++]; /* Should confirm our sequence */ - txseq = hdlc->state.fr.txseq; + txseq = state(hdlc)->txseq; if (dce) - hdlc->state.fr.last_poll = jiffies; + state(hdlc)->last_poll = jiffies; error = 0; - if (!hdlc->state.fr.reliable) + if (!state(hdlc)->reliable) error = 1; - if (rxseq == 0 || rxseq != txseq) { - hdlc->state.fr.n391cnt = 0; /* Ask for full report next time */ + if (rxseq == 0 || rxseq != txseq) { /* Ask for full report next time */ + state(hdlc)->n391cnt = 0; error = 1; } if (dce) { - if (hdlc->state.fr.fullrep_sent && !error) { + if (state(hdlc)->fullrep_sent && !error) { /* Stop sending full report - the last one has been confirmed by DTE */ - hdlc->state.fr.fullrep_sent = 0; - pvc = hdlc->state.fr.first_pvc; + state(hdlc)->fullrep_sent = 0; + pvc = state(hdlc)->first_pvc; while (pvc) { if (pvc->state.new) { pvc->state.new = 0; /* Tell DTE that new PVC is now active */ - hdlc->state.fr.dce_changed = 1; + state(hdlc)->dce_changed = 1; } pvc = pvc->next; } } - if (hdlc->state.fr.dce_changed) { + if (state(hdlc)->dce_changed) { reptype = LMI_FULLREP; - hdlc->state.fr.fullrep_sent = 1; - hdlc->state.fr.dce_changed = 0; + state(hdlc)->fullrep_sent = 1; + state(hdlc)->dce_changed = 0; } - hdlc->state.fr.request = 1; /* got request */ + state(hdlc)->request = 1; /* got request */ fr_lmi_send(dev, reptype == LMI_FULLREP ? 1 : 0); return 0; } /* DTE */ - hdlc->state.fr.request = 0; /* got response, no request pending */ + state(hdlc)->request = 0; /* got response, no request pending */ if (error) return 0; @@ -760,7 +817,7 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) if (reptype != LMI_FULLREP) return 0; - pvc = hdlc->state.fr.first_pvc; + pvc = state(hdlc)->first_pvc; while (pvc) { pvc->state.deleted = 1; @@ -827,7 +884,7 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) i += stat_len; } - pvc = hdlc->state.fr.first_pvc; + pvc = state(hdlc)->first_pvc; while (pvc) { if (pvc->state.deleted && pvc->state.exist) { @@ -841,17 +898,16 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) } /* Next full report after N391 polls */ - hdlc->state.fr.n391cnt = hdlc->state.fr.settings.n391; + state(hdlc)->n391cnt = state(hdlc)->settings.n391; return 0; } - static int fr_rx(struct sk_buff *skb) { - struct net_device *ndev = skb->dev; - hdlc_device *hdlc = dev_to_hdlc(ndev); + struct net_device *frad = skb->dev; + hdlc_device *hdlc = dev_to_hdlc(frad); fr_hdr *fh = (fr_hdr*)skb->data; u8 *data = skb->data; u16 dlci; @@ -864,11 +920,11 @@ static int fr_rx(struct sk_buff *skb) dlci = q922_to_dlci(skb->data); if ((dlci == LMI_CCITT_ANSI_DLCI && - (hdlc->state.fr.settings.lmi == LMI_ANSI || - hdlc->state.fr.settings.lmi == LMI_CCITT)) || + (state(hdlc)->settings.lmi == LMI_ANSI || + state(hdlc)->settings.lmi == LMI_CCITT)) || (dlci == LMI_CISCO_DLCI && - hdlc->state.fr.settings.lmi == LMI_CISCO)) { - if (fr_lmi_recv(ndev, skb)) + state(hdlc)->settings.lmi == LMI_CISCO)) { + if (fr_lmi_recv(frad, skb)) goto rx_error; dev_kfree_skb_any(skb); return NET_RX_SUCCESS; @@ -878,7 +934,7 @@ static int fr_rx(struct sk_buff *skb) if (!pvc) { #ifdef DEBUG_PKT printk(KERN_INFO "%s: No PVC for received frame's DLCI %d\n", - ndev->name, dlci); + frad->name, dlci); #endif dev_kfree_skb_any(skb); return NET_RX_DROP; @@ -886,7 +942,7 @@ static int fr_rx(struct sk_buff *skb) if (pvc->state.fecn != fh->fecn) { #ifdef DEBUG_ECN - printk(KERN_DEBUG "%s: DLCI %d FECN O%s\n", ndev->name, + printk(KERN_DEBUG "%s: DLCI %d FECN O%s\n", frad->name, dlci, fh->fecn ? "N" : "FF"); #endif pvc->state.fecn ^= 1; @@ -894,7 +950,7 @@ static int fr_rx(struct sk_buff *skb) if (pvc->state.becn != fh->becn) { #ifdef DEBUG_ECN - printk(KERN_DEBUG "%s: DLCI %d BECN O%s\n", ndev->name, + printk(KERN_DEBUG "%s: DLCI %d BECN O%s\n", frad->name, dlci, fh->becn ? "N" : "FF"); #endif pvc->state.becn ^= 1; @@ -902,7 +958,7 @@ static int fr_rx(struct sk_buff *skb) if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - hdlc->stats.rx_dropped++; + dev_to_desc(frad)->stats.rx_dropped++; return NET_RX_DROP; } @@ -938,13 +994,13 @@ static int fr_rx(struct sk_buff *skb) default: printk(KERN_INFO "%s: Unsupported protocol, OUI=%x " - "PID=%x\n", ndev->name, oui, pid); + "PID=%x\n", frad->name, oui, pid); dev_kfree_skb_any(skb); return NET_RX_DROP; } } else { printk(KERN_INFO "%s: Unsupported protocol, NLPID=%x " - "length = %i\n", ndev->name, data[3], skb->len); + "length = %i\n", frad->name, data[3], skb->len); dev_kfree_skb_any(skb); return NET_RX_DROP; } @@ -964,7 +1020,7 @@ static int fr_rx(struct sk_buff *skb) } rx_error: - hdlc->stats.rx_errors++; /* Mark error */ + dev_to_desc(frad)->stats.rx_errors++; /* Mark error */ dev_kfree_skb_any(skb); return NET_RX_DROP; } @@ -977,44 +1033,42 @@ static void fr_start(struct net_device *dev) #ifdef DEBUG_LINK printk(KERN_DEBUG "fr_start\n"); #endif - if (hdlc->state.fr.settings.lmi != LMI_NONE) { - hdlc->state.fr.reliable = 0; - hdlc->state.fr.dce_changed = 1; - hdlc->state.fr.request = 0; - hdlc->state.fr.fullrep_sent = 0; - hdlc->state.fr.last_errors = 0xFFFFFFFF; - hdlc->state.fr.n391cnt = 0; - hdlc->state.fr.txseq = hdlc->state.fr.rxseq = 0; - - init_timer(&hdlc->state.fr.timer); + if (state(hdlc)->settings.lmi != LMI_NONE) { + state(hdlc)->reliable = 0; + state(hdlc)->dce_changed = 1; + state(hdlc)->request = 0; + state(hdlc)->fullrep_sent = 0; + state(hdlc)->last_errors = 0xFFFFFFFF; + state(hdlc)->n391cnt = 0; + state(hdlc)->txseq = state(hdlc)->rxseq = 0; + + init_timer(&state(hdlc)->timer); /* First poll after 1 s */ - hdlc->state.fr.timer.expires = jiffies + HZ; - hdlc->state.fr.timer.function = fr_timer; - hdlc->state.fr.timer.data = (unsigned long)dev; - add_timer(&hdlc->state.fr.timer); + state(hdlc)->timer.expires = jiffies + HZ; + state(hdlc)->timer.function = fr_timer; + state(hdlc)->timer.data = (unsigned long)dev; + add_timer(&state(hdlc)->timer); } else fr_set_link_state(1, dev); } - static void fr_stop(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); #ifdef DEBUG_LINK printk(KERN_DEBUG "fr_stop\n"); #endif - if (hdlc->state.fr.settings.lmi != LMI_NONE) - del_timer_sync(&hdlc->state.fr.timer); + if (state(hdlc)->settings.lmi != LMI_NONE) + del_timer_sync(&state(hdlc)->timer); fr_set_link_state(0, dev); } - static void fr_close(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc = hdlc->state.fr.first_pvc; + pvc_device *pvc = state(hdlc)->first_pvc; while (pvc) { /* Shutdown all PVCs for this FRAD */ if (pvc->main) @@ -1025,7 +1079,8 @@ static void fr_close(struct net_device *dev) } } -static void dlci_setup(struct net_device *dev) + +static void pvc_setup(struct net_device *dev) { dev->type = ARPHRD_DLCI; dev->flags = IFF_POINTOPOINT; @@ -1033,9 +1088,9 @@ static void dlci_setup(struct net_device *dev) dev->addr_len = 2; } -static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type) +static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) { - hdlc_device *hdlc = dev_to_hdlc(master); + hdlc_device *hdlc = dev_to_hdlc(frad); pvc_device *pvc = NULL; struct net_device *dev; int result, used; @@ -1044,9 +1099,9 @@ static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type) if (type == ARPHRD_ETHER) prefix = "pvceth%d"; - if ((pvc = add_pvc(master, dlci)) == NULL) { + if ((pvc = add_pvc(frad, dlci)) == NULL) { printk(KERN_WARNING "%s: Memory squeeze on fr_add_pvc()\n", - master->name); + frad->name); return -ENOBUFS; } @@ -1060,11 +1115,11 @@ static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type) "pvceth%d", ether_setup); else dev = alloc_netdev(sizeof(struct net_device_stats), - "pvc%d", dlci_setup); + "pvc%d", pvc_setup); if (!dev) { printk(KERN_WARNING "%s: Memory squeeze on fr_pvc()\n", - master->name); + frad->name); delete_unused_pvcs(hdlc); return -ENOBUFS; } @@ -1102,8 +1157,8 @@ static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type) dev->destructor = free_netdev; *get_dev_p(pvc, type) = dev; if (!used) { - hdlc->state.fr.dce_changed = 1; - hdlc->state.fr.dce_pvc_count++; + state(hdlc)->dce_changed = 1; + state(hdlc)->dce_pvc_count++; } return 0; } @@ -1128,8 +1183,8 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type) *get_dev_p(pvc, type) = NULL; if (!pvc_is_used(pvc)) { - hdlc->state.fr.dce_pvc_count--; - hdlc->state.fr.dce_changed = 1; + state(hdlc)->dce_pvc_count--; + state(hdlc)->dce_changed = 1; } delete_unused_pvcs(hdlc); return 0; @@ -1137,14 +1192,13 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type) -static void fr_destroy(hdlc_device *hdlc) +static void fr_destroy(struct net_device *frad) { - pvc_device *pvc; - - pvc = hdlc->state.fr.first_pvc; - hdlc->state.fr.first_pvc = NULL; /* All PVCs destroyed */ - hdlc->state.fr.dce_pvc_count = 0; - hdlc->state.fr.dce_changed = 1; + hdlc_device *hdlc = dev_to_hdlc(frad); + pvc_device *pvc = state(hdlc)->first_pvc; + state(hdlc)->first_pvc = NULL; /* All PVCs destroyed */ + state(hdlc)->dce_pvc_count = 0; + state(hdlc)->dce_changed = 1; while (pvc) { pvc_device *next = pvc->next; @@ -1161,8 +1215,17 @@ static void fr_destroy(hdlc_device *hdlc) } +static struct hdlc_proto proto = { + .close = fr_close, + .start = fr_start, + .stop = fr_stop, + .detach = fr_destroy, + .ioctl = fr_ioctl, + .module = THIS_MODULE, +}; + -int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr) +static int fr_ioctl(struct net_device *dev, struct ifreq *ifr) { fr_proto __user *fr_s = ifr->ifr_settings.ifs_ifsu.fr; const size_t size = sizeof(fr_proto); @@ -1173,12 +1236,14 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr) switch (ifr->ifr_settings.type) { case IF_GET_PROTO: + if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */ + return -EINVAL; ifr->ifr_settings.type = IF_PROTO_FR; if (ifr->ifr_settings.size < size) { ifr->ifr_settings.size = size; /* data size wanted */ return -ENOBUFS; } - if (copy_to_user(fr_s, &hdlc->state.fr.settings, size)) + if (copy_to_user(fr_s, &state(hdlc)->settings, size)) return -EFAULT; return 0; @@ -1213,20 +1278,16 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr) if (result) return result; - if (hdlc->proto.id != IF_PROTO_FR) { - hdlc_proto_detach(hdlc); - hdlc->state.fr.first_pvc = NULL; - hdlc->state.fr.dce_pvc_count = 0; + if (dev_to_hdlc(dev)->proto != &proto) { /* Different proto */ + result = attach_hdlc_protocol(dev, &proto, fr_rx, + sizeof(struct frad_state)); + if (result) + return result; + state(hdlc)->first_pvc = NULL; + state(hdlc)->dce_pvc_count = 0; } - memcpy(&hdlc->state.fr.settings, &new_settings, size); - memset(&hdlc->proto, 0, sizeof(hdlc->proto)); - - hdlc->proto.close = fr_close; - hdlc->proto.start = fr_start; - hdlc->proto.stop = fr_stop; - hdlc->proto.detach = fr_destroy; - hdlc->proto.netif_rx = fr_rx; - hdlc->proto.id = IF_PROTO_FR; + memcpy(&state(hdlc)->settings, &new_settings, size); + dev->hard_start_xmit = hdlc->xmit; dev->hard_header = NULL; dev->type = ARPHRD_FRAD; @@ -1238,6 +1299,9 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr) case IF_PROTO_FR_DEL_PVC: case IF_PROTO_FR_ADD_ETH_PVC: case IF_PROTO_FR_DEL_ETH_PVC: + if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */ + return -EINVAL; + if(!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1263,3 +1327,24 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } + + +static int __init mod_init(void) +{ + register_hdlc_protocol(&proto); + return 0; +} + + +static void __exit mod_exit(void) +{ + unregister_hdlc_protocol(&proto); +} + + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("Frame-Relay protocol support for generic HDLC"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c deleted file mode 100644 index 04ca1f7b6424..000000000000 --- a/drivers/net/wan/hdlc_generic.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Generic HDLC support routines for Linux - * - * Copyright (C) 1999 - 2005 Krzysztof Halasa - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * Currently supported: - * * raw IP-in-HDLC - * * Cisco HDLC - * * Frame Relay with ANSI or CCITT LMI (both user and network side) - * * PPP - * * X.25 - * - * Use sethdlc utility to set line parameters, protocol and PVCs - * - * How does it work: - * - proto.open(), close(), start(), stop() calls are serialized. - * The order is: open, [ start, stop ... ] close ... - * - proto.start() and stop() are called with spin_lock_irq held. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -static const char* version = "HDLC support module revision 1.19"; - -#undef DEBUG_LINK - - -static int hdlc_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - - - -static struct net_device_stats *hdlc_get_stats(struct net_device *dev) -{ - return hdlc_stats(dev); -} - - - -static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *p, struct net_device *orig_dev) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); - if (hdlc->proto.netif_rx) - return hdlc->proto.netif_rx(skb); - - hdlc->stats.rx_dropped++; /* Shouldn't happen */ - dev_kfree_skb(skb); - return NET_RX_DROP; -} - - - -static inline void hdlc_proto_start(struct net_device *dev) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); - if (hdlc->proto.start) - return hdlc->proto.start(dev); -} - - - -static inline void hdlc_proto_stop(struct net_device *dev) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); - if (hdlc->proto.stop) - return hdlc->proto.stop(dev); -} - - - -static int hdlc_device_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = ptr; - hdlc_device *hdlc; - unsigned long flags; - int on; - - if (dev->get_stats != hdlc_get_stats) - return NOTIFY_DONE; /* not an HDLC device */ - - if (event != NETDEV_CHANGE) - return NOTIFY_DONE; /* Only interrested in carrier changes */ - - on = netif_carrier_ok(dev); - -#ifdef DEBUG_LINK - printk(KERN_DEBUG "%s: hdlc_device_event NETDEV_CHANGE, carrier %i\n", - dev->name, on); -#endif - - hdlc = dev_to_hdlc(dev); - spin_lock_irqsave(&hdlc->state_lock, flags); - - if (hdlc->carrier == on) - goto carrier_exit; /* no change in DCD line level */ - - hdlc->carrier = on; - - if (!hdlc->open) - goto carrier_exit; - - if (hdlc->carrier) { - printk(KERN_INFO "%s: Carrier detected\n", dev->name); - hdlc_proto_start(dev); - } else { - printk(KERN_INFO "%s: Carrier lost\n", dev->name); - hdlc_proto_stop(dev); - } - -carrier_exit: - spin_unlock_irqrestore(&hdlc->state_lock, flags); - return NOTIFY_DONE; -} - - - -/* Must be called by hardware driver when HDLC device is being opened */ -int hdlc_open(struct net_device *dev) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); -#ifdef DEBUG_LINK - printk(KERN_DEBUG "hdlc_open() carrier %i open %i\n", - hdlc->carrier, hdlc->open); -#endif - - if (hdlc->proto.id == -1) - return -ENOSYS; /* no protocol attached */ - - if (hdlc->proto.open) { - int result = hdlc->proto.open(dev); - if (result) - return result; - } - - spin_lock_irq(&hdlc->state_lock); - - if (hdlc->carrier) { - printk(KERN_INFO "%s: Carrier detected\n", dev->name); - hdlc_proto_start(dev); - } else - printk(KERN_INFO "%s: No carrier\n", dev->name); - - hdlc->open = 1; - - spin_unlock_irq(&hdlc->state_lock); - return 0; -} - - - -/* Must be called by hardware driver when HDLC device is being closed */ -void hdlc_close(struct net_device *dev) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); -#ifdef DEBUG_LINK - printk(KERN_DEBUG "hdlc_close() carrier %i open %i\n", - hdlc->carrier, hdlc->open); -#endif - - spin_lock_irq(&hdlc->state_lock); - - hdlc->open = 0; - if (hdlc->carrier) - hdlc_proto_stop(dev); - - spin_unlock_irq(&hdlc->state_lock); - - if (hdlc->proto.close) - hdlc->proto.close(dev); -} - - - -#ifndef CONFIG_HDLC_RAW -#define hdlc_raw_ioctl(dev, ifr) -ENOSYS -#endif - -#ifndef CONFIG_HDLC_RAW_ETH -#define hdlc_raw_eth_ioctl(dev, ifr) -ENOSYS -#endif - -#ifndef CONFIG_HDLC_PPP -#define hdlc_ppp_ioctl(dev, ifr) -ENOSYS -#endif - -#ifndef CONFIG_HDLC_CISCO -#define hdlc_cisco_ioctl(dev, ifr) -ENOSYS -#endif - -#ifndef CONFIG_HDLC_FR -#define hdlc_fr_ioctl(dev, ifr) -ENOSYS -#endif - -#ifndef CONFIG_HDLC_X25 -#define hdlc_x25_ioctl(dev, ifr) -ENOSYS -#endif - - -int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); - unsigned int proto; - - if (cmd != SIOCWANDEV) - return -EINVAL; - - switch(ifr->ifr_settings.type) { - case IF_PROTO_HDLC: - case IF_PROTO_HDLC_ETH: - case IF_PROTO_PPP: - case IF_PROTO_CISCO: - case IF_PROTO_FR: - case IF_PROTO_X25: - proto = ifr->ifr_settings.type; - break; - - default: - proto = hdlc->proto.id; - } - - switch(proto) { - case IF_PROTO_HDLC: return hdlc_raw_ioctl(dev, ifr); - case IF_PROTO_HDLC_ETH: return hdlc_raw_eth_ioctl(dev, ifr); - case IF_PROTO_PPP: return hdlc_ppp_ioctl(dev, ifr); - case IF_PROTO_CISCO: return hdlc_cisco_ioctl(dev, ifr); - case IF_PROTO_FR: return hdlc_fr_ioctl(dev, ifr); - case IF_PROTO_X25: return hdlc_x25_ioctl(dev, ifr); - default: return -EINVAL; - } -} - -void hdlc_setup(struct net_device *dev) -{ - hdlc_device *hdlc = dev_to_hdlc(dev); - - dev->get_stats = hdlc_get_stats; - dev->change_mtu = hdlc_change_mtu; - dev->mtu = HDLC_MAX_MTU; - - dev->type = ARPHRD_RAWHDLC; - dev->hard_header_len = 16; - - dev->flags = IFF_POINTOPOINT | IFF_NOARP; - - hdlc->proto.id = -1; - hdlc->proto.detach = NULL; - hdlc->carrier = 1; - hdlc->open = 0; - spin_lock_init(&hdlc->state_lock); -} - -struct net_device *alloc_hdlcdev(void *priv) -{ - struct net_device *dev; - dev = alloc_netdev(sizeof(hdlc_device), "hdlc%d", hdlc_setup); - if (dev) - dev_to_hdlc(dev)->priv = priv; - return dev; -} - -void unregister_hdlc_device(struct net_device *dev) -{ - rtnl_lock(); - hdlc_proto_detach(dev_to_hdlc(dev)); - unregister_netdevice(dev); - rtnl_unlock(); -} - - - -MODULE_AUTHOR("Krzysztof Halasa "); -MODULE_DESCRIPTION("HDLC support module"); -MODULE_LICENSE("GPL v2"); - -EXPORT_SYMBOL(hdlc_open); -EXPORT_SYMBOL(hdlc_close); -EXPORT_SYMBOL(hdlc_ioctl); -EXPORT_SYMBOL(hdlc_setup); -EXPORT_SYMBOL(alloc_hdlcdev); -EXPORT_SYMBOL(unregister_hdlc_device); - -static struct packet_type hdlc_packet_type = { - .type = __constant_htons(ETH_P_HDLC), - .func = hdlc_rcv, -}; - - -static struct notifier_block hdlc_notifier = { - .notifier_call = hdlc_device_event, -}; - - -static int __init hdlc_module_init(void) -{ - int result; - - printk(KERN_INFO "%s\n", version); - if ((result = register_netdevice_notifier(&hdlc_notifier)) != 0) - return result; - dev_add_pack(&hdlc_packet_type); - return 0; -} - - - -static void __exit hdlc_module_exit(void) -{ - dev_remove_pack(&hdlc_packet_type); - unregister_netdevice_notifier(&hdlc_notifier); -} - - -module_init(hdlc_module_init); -module_exit(hdlc_module_exit); diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index fbaab5bf71eb..e9f717070fde 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -2,7 +2,7 @@ * Generic HDLC support routines for Linux * Point-to-point protocol support * - * Copyright (C) 1999 - 2003 Krzysztof Halasa + * Copyright (C) 1999 - 2006 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -22,6 +22,21 @@ #include #include #include +#include + +struct ppp_state { + struct ppp_device pppdev; + struct ppp_device *syncppp_ptr; + int (*old_change_mtu)(struct net_device *dev, int new_mtu); +}; + +static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr); + + +static inline struct ppp_state* state(hdlc_device *hdlc) +{ + return(struct ppp_state *)(hdlc->state); +} static int ppp_open(struct net_device *dev) @@ -30,16 +45,16 @@ static int ppp_open(struct net_device *dev) void *old_ioctl; int result; - dev->priv = &hdlc->state.ppp.syncppp_ptr; - hdlc->state.ppp.syncppp_ptr = &hdlc->state.ppp.pppdev; - hdlc->state.ppp.pppdev.dev = dev; + dev->priv = &state(hdlc)->syncppp_ptr; + state(hdlc)->syncppp_ptr = &state(hdlc)->pppdev; + state(hdlc)->pppdev.dev = dev; old_ioctl = dev->do_ioctl; - hdlc->state.ppp.old_change_mtu = dev->change_mtu; - sppp_attach(&hdlc->state.ppp.pppdev); + state(hdlc)->old_change_mtu = dev->change_mtu; + sppp_attach(&state(hdlc)->pppdev); /* sppp_attach nukes them. We don't need syncppp's ioctl */ dev->do_ioctl = old_ioctl; - hdlc->state.ppp.pppdev.sppp.pp_flags &= ~PP_CISCO; + state(hdlc)->pppdev.sppp.pp_flags &= ~PP_CISCO; dev->type = ARPHRD_PPP; result = sppp_open(dev); if (result) { @@ -59,7 +74,7 @@ static void ppp_close(struct net_device *dev) sppp_close(dev); sppp_detach(dev); dev->rebuild_header = NULL; - dev->change_mtu = hdlc->state.ppp.old_change_mtu; + dev->change_mtu = state(hdlc)->old_change_mtu; dev->mtu = HDLC_MAX_MTU; dev->hard_header_len = 16; } @@ -73,13 +88,24 @@ static __be16 ppp_type_trans(struct sk_buff *skb, struct net_device *dev) -int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr) +static struct hdlc_proto proto = { + .open = ppp_open, + .close = ppp_close, + .type_trans = ppp_type_trans, + .ioctl = ppp_ioctl, + .module = THIS_MODULE, +}; + + +static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr) { hdlc_device *hdlc = dev_to_hdlc(dev); int result; switch (ifr->ifr_settings.type) { case IF_GET_PROTO: + if (dev_to_hdlc(dev)->proto != &proto) + return -EINVAL; ifr->ifr_settings.type = IF_PROTO_PPP; return 0; /* return protocol only, no settable parameters */ @@ -96,13 +122,10 @@ int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr) if (result) return result; - hdlc_proto_detach(hdlc); - memset(&hdlc->proto, 0, sizeof(hdlc->proto)); - - hdlc->proto.open = ppp_open; - hdlc->proto.close = ppp_close; - hdlc->proto.type_trans = ppp_type_trans; - hdlc->proto.id = IF_PROTO_PPP; + result = attach_hdlc_protocol(dev, &proto, NULL, + sizeof(struct ppp_state)); + if (result) + return result; dev->hard_start_xmit = hdlc->xmit; dev->hard_header = NULL; dev->type = ARPHRD_PPP; @@ -113,3 +136,25 @@ int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } + + +static int __init mod_init(void) +{ + register_hdlc_protocol(&proto); + return 0; +} + + + +static void __exit mod_exit(void) +{ + unregister_hdlc_protocol(&proto); +} + + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("PPP protocol support for generic HDLC"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c index f15aa6ba77f1..fe3cae5c6b9d 100644 --- a/drivers/net/wan/hdlc_raw.c +++ b/drivers/net/wan/hdlc_raw.c @@ -2,7 +2,7 @@ * Generic HDLC support routines for Linux * HDLC support * - * Copyright (C) 1999 - 2003 Krzysztof Halasa + * Copyright (C) 1999 - 2006 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -24,6 +24,8 @@ #include +static int raw_ioctl(struct net_device *dev, struct ifreq *ifr); + static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev) { return __constant_htons(ETH_P_IP); @@ -31,7 +33,14 @@ static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev) -int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr) +static struct hdlc_proto proto = { + .type_trans = raw_type_trans, + .ioctl = raw_ioctl, + .module = THIS_MODULE, +}; + + +static int raw_ioctl(struct net_device *dev, struct ifreq *ifr) { raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc; const size_t size = sizeof(raw_hdlc_proto); @@ -41,12 +50,14 @@ int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr) switch (ifr->ifr_settings.type) { case IF_GET_PROTO: + if (dev_to_hdlc(dev)->proto != &proto) + return -EINVAL; ifr->ifr_settings.type = IF_PROTO_HDLC; if (ifr->ifr_settings.size < size) { ifr->ifr_settings.size = size; /* data size wanted */ return -ENOBUFS; } - if (copy_to_user(raw_s, &hdlc->state.raw_hdlc.settings, size)) + if (copy_to_user(raw_s, hdlc->state, size)) return -EFAULT; return 0; @@ -71,12 +82,11 @@ int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr) if (result) return result; - hdlc_proto_detach(hdlc); - memcpy(&hdlc->state.raw_hdlc.settings, &new_settings, size); - memset(&hdlc->proto, 0, sizeof(hdlc->proto)); - - hdlc->proto.type_trans = raw_type_trans; - hdlc->proto.id = IF_PROTO_HDLC; + result = attach_hdlc_protocol(dev, &proto, NULL, + sizeof(raw_hdlc_proto)); + if (result) + return result; + memcpy(hdlc->state, &new_settings, size); dev->hard_start_xmit = hdlc->xmit; dev->hard_header = NULL; dev->type = ARPHRD_RAWHDLC; @@ -88,3 +98,25 @@ int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } + + +static int __init mod_init(void) +{ + register_hdlc_protocol(&proto); + return 0; +} + + + +static void __exit mod_exit(void) +{ + unregister_hdlc_protocol(&proto); +} + + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("Raw HDLC protocol support for generic HDLC"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index d1884987f94e..1a69a9aaa9b9 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -2,7 +2,7 @@ * Generic HDLC support routines for Linux * HDLC Ethernet emulation support * - * Copyright (C) 2002-2003 Krzysztof Halasa + * Copyright (C) 2002-2006 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -25,6 +25,7 @@ #include #include +static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr); static int eth_tx(struct sk_buff *skb, struct net_device *dev) { @@ -44,7 +45,14 @@ static int eth_tx(struct sk_buff *skb, struct net_device *dev) } -int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) +static struct hdlc_proto proto = { + .type_trans = eth_type_trans, + .ioctl = raw_eth_ioctl, + .module = THIS_MODULE, +}; + + +static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) { raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc; const size_t size = sizeof(raw_hdlc_proto); @@ -56,12 +64,14 @@ int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) switch (ifr->ifr_settings.type) { case IF_GET_PROTO: + if (dev_to_hdlc(dev)->proto != &proto) + return -EINVAL; ifr->ifr_settings.type = IF_PROTO_HDLC_ETH; if (ifr->ifr_settings.size < size) { ifr->ifr_settings.size = size; /* data size wanted */ return -ENOBUFS; } - if (copy_to_user(raw_s, &hdlc->state.raw_hdlc.settings, size)) + if (copy_to_user(raw_s, hdlc->state, size)) return -EFAULT; return 0; @@ -86,12 +96,11 @@ int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) if (result) return result; - hdlc_proto_detach(hdlc); - memcpy(&hdlc->state.raw_hdlc.settings, &new_settings, size); - memset(&hdlc->proto, 0, sizeof(hdlc->proto)); - - hdlc->proto.type_trans = eth_type_trans; - hdlc->proto.id = IF_PROTO_HDLC_ETH; + result = attach_hdlc_protocol(dev, &proto, NULL, + sizeof(raw_hdlc_proto)); + if (result) + return result; + memcpy(hdlc->state, &new_settings, size); dev->hard_start_xmit = eth_tx; old_ch_mtu = dev->change_mtu; old_qlen = dev->tx_queue_len; @@ -106,3 +115,25 @@ int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } + + +static int __init mod_init(void) +{ + register_hdlc_protocol(&proto); + return 0; +} + + + +static void __exit mod_exit(void) +{ + unregister_hdlc_protocol(&proto); +} + + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("Ethernet encapsulation support for generic HDLC"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index a867fb411f89..e4bb9f8ad433 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -2,7 +2,7 @@ * Generic HDLC support routines for Linux * X.25 support * - * Copyright (C) 1999 - 2003 Krzysztof Halasa + * Copyright (C) 1999 - 2006 Krzysztof Halasa * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -25,6 +25,8 @@ #include +static int x25_ioctl(struct net_device *dev, struct ifreq *ifr); + /* These functions are callbacks called by LAPB layer */ static void x25_connect_disconnect(struct net_device *dev, int reason, int code) @@ -162,30 +164,39 @@ static void x25_close(struct net_device *dev) static int x25_rx(struct sk_buff *skb) { - hdlc_device *hdlc = dev_to_hdlc(skb->dev); + struct hdlc_device_desc *desc = dev_to_desc(skb->dev); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - hdlc->stats.rx_dropped++; + desc->stats.rx_dropped++; return NET_RX_DROP; } if (lapb_data_received(skb->dev, skb) == LAPB_OK) return NET_RX_SUCCESS; - hdlc->stats.rx_errors++; + desc->stats.rx_errors++; dev_kfree_skb_any(skb); return NET_RX_DROP; } +static struct hdlc_proto proto = { + .open = x25_open, + .close = x25_close, + .ioctl = x25_ioctl, + .module = THIS_MODULE, +}; + -int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr) +static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) { hdlc_device *hdlc = dev_to_hdlc(dev); int result; switch (ifr->ifr_settings.type) { case IF_GET_PROTO: + if (dev_to_hdlc(dev)->proto != &proto) + return -EINVAL; ifr->ifr_settings.type = IF_PROTO_X25; return 0; /* return protocol only, no settable parameters */ @@ -200,14 +211,9 @@ int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr) if (result) return result; - hdlc_proto_detach(hdlc); - memset(&hdlc->proto, 0, sizeof(hdlc->proto)); - - hdlc->proto.open = x25_open; - hdlc->proto.close = x25_close; - hdlc->proto.netif_rx = x25_rx; - hdlc->proto.type_trans = NULL; - hdlc->proto.id = IF_PROTO_X25; + if ((result = attach_hdlc_protocol(dev, &proto, + x25_rx, 0)) != 0) + return result; dev->hard_start_xmit = x25_xmit; dev->hard_header = NULL; dev->type = ARPHRD_X25; @@ -218,3 +224,25 @@ int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } + + +static int __init mod_init(void) +{ + register_hdlc_protocol(&proto); + return 0; +} + + + +static void __exit mod_exit(void) +{ + unregister_hdlc_protocol(&proto); +} + + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_AUTHOR("Krzysztof Halasa "); +MODULE_DESCRIPTION("X.25 protocol support for generic HDLC"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index d5ebbb29aeae..d4b333938f73 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -11,95 +11,46 @@ #ifndef __HDLC_H #define __HDLC_H -#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ - -#define CLOCK_DEFAULT 0 /* Default setting */ -#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ -#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ -#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ -#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ - - -#define ENCODING_DEFAULT 0 /* Default setting */ -#define ENCODING_NRZ 1 -#define ENCODING_NRZI 2 -#define ENCODING_FM_MARK 3 -#define ENCODING_FM_SPACE 4 -#define ENCODING_MANCHESTER 5 - - -#define PARITY_DEFAULT 0 /* Default setting */ -#define PARITY_NONE 1 /* No parity */ -#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ -#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ -#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ -#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ -#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ -#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ - -#define LMI_DEFAULT 0 /* Default setting */ -#define LMI_NONE 1 /* No LMI, all PVCs are static */ -#define LMI_ANSI 2 /* ANSI Annex D */ -#define LMI_CCITT 3 /* ITU-T Annex A */ -#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ #define HDLC_MAX_MTU 1500 /* Ethernet 1500 bytes */ +#if 0 #define HDLC_MAX_MRU (HDLC_MAX_MTU + 10 + 14 + 4) /* for ETH+VLAN over FR */ +#else +#define HDLC_MAX_MRU 1600 /* as required for FR network */ +#endif #ifdef __KERNEL__ #include #include -#include #include -typedef struct { /* Used in Cisco and PPP mode */ - u8 address; - u8 control; - u16 protocol; -}__attribute__ ((packed)) hdlc_header; - - - -typedef struct { - u32 type; /* code */ - u32 par1; - u32 par2; - u16 rel; /* reliability */ - u32 time; -}__attribute__ ((packed)) cisco_packet; -#define CISCO_PACKET_LEN 18 -#define CISCO_BIG_PACKET_LEN 20 - - - -typedef struct pvc_device_struct { - struct net_device *master; - struct net_device *main; - struct net_device *ether; /* bridged Ethernet interface */ - struct pvc_device_struct *next; /* Sorted in ascending DLCI order */ - int dlci; - int open_count; - - struct { - unsigned int new: 1; - unsigned int active: 1; - unsigned int exist: 1; - unsigned int deleted: 1; - unsigned int fecn: 1; - unsigned int becn: 1; - unsigned int bandwidth; /* Cisco LMI reporting only */ - }state; -}pvc_device; - - - -typedef struct hdlc_device_struct { - /* To be initialized by hardware driver */ +/* Used by all network devices here, pointed to by netdev_priv(dev) */ +struct hdlc_device_desc { + int (*netif_rx)(struct sk_buff *skb); struct net_device_stats stats; - +}; + +/* This structure is a private property of HDLC protocols. + Hardware drivers have no interest here */ + +struct hdlc_proto { + int (*open)(struct net_device *dev); + void (*close)(struct net_device *dev); + void (*start)(struct net_device *dev); /* if open & DCD */ + void (*stop)(struct net_device *dev); /* if open & !DCD */ + void (*detach)(struct net_device *dev); + int (*ioctl)(struct net_device *dev, struct ifreq *ifr); + unsigned short (*type_trans)(struct sk_buff *skb, + struct net_device *dev); + struct module *module; + struct hdlc_proto *next; /* next protocol in the list */ +}; + + +typedef struct hdlc_device { /* used by HDLC layer to take control over HDLC device from hw driver*/ int (*attach)(struct net_device *dev, unsigned short encoding, unsigned short parity); @@ -107,82 +58,18 @@ typedef struct hdlc_device_struct { /* hardware driver must handle this instead of dev->hard_start_xmit */ int (*xmit)(struct sk_buff *skb, struct net_device *dev); - /* Things below are for HDLC layer internal use only */ - struct { - int (*open)(struct net_device *dev); - void (*close)(struct net_device *dev); - - /* if open & DCD */ - void (*start)(struct net_device *dev); - /* if open & !DCD */ - void (*stop)(struct net_device *dev); - - void (*detach)(struct hdlc_device_struct *hdlc); - int (*netif_rx)(struct sk_buff *skb); - unsigned short (*type_trans)(struct sk_buff *skb, - struct net_device *dev); - int id; /* IF_PROTO_HDLC/CISCO/FR/etc. */ - }proto; - + const struct hdlc_proto *proto; int carrier; int open; spinlock_t state_lock; - - union { - struct { - fr_proto settings; - pvc_device *first_pvc; - int dce_pvc_count; - - struct timer_list timer; - unsigned long last_poll; - int reliable; - int dce_changed; - int request; - int fullrep_sent; - u32 last_errors; /* last errors bit list */ - u8 n391cnt; - u8 txseq; /* TX sequence number */ - u8 rxseq; /* RX sequence number */ - }fr; - - struct { - cisco_proto settings; - - struct timer_list timer; - unsigned long last_poll; - int up; - int request_sent; - u32 txseq; /* TX sequence number */ - u32 rxseq; /* RX sequence number */ - }cisco; - - struct { - raw_hdlc_proto settings; - }raw_hdlc; - - struct { - struct ppp_device pppdev; - struct ppp_device *syncppp_ptr; - int (*old_change_mtu)(struct net_device *dev, - int new_mtu); - }ppp; - }state; + void *state; void *priv; }hdlc_device; -int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr); -int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr); -int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr); -int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr); -int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr); -int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr); - - -/* Exported from hdlc.o */ +/* Exported from hdlc module */ /* Called by hardware driver when a user requests HDLC service */ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); @@ -191,17 +78,21 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); #define register_hdlc_device(dev) register_netdev(dev) void unregister_hdlc_device(struct net_device *dev); + +void register_hdlc_protocol(struct hdlc_proto *proto); +void unregister_hdlc_protocol(struct hdlc_proto *proto); + struct net_device *alloc_hdlcdev(void *priv); -static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev) + +static __inline__ struct hdlc_device_desc* dev_to_desc(struct net_device *dev) { return netdev_priv(dev); } - -static __inline__ pvc_device* dev_to_pvc(struct net_device *dev) +static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev) { - return (pvc_device*)dev->priv; + return netdev_priv(dev) + sizeof(struct hdlc_device_desc); } @@ -225,18 +116,14 @@ int hdlc_open(struct net_device *dev); /* Must be called by hardware driver when HDLC device is being closed */ void hdlc_close(struct net_device *dev); +int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, + int (*rx)(struct sk_buff *skb), size_t size); /* May be used by hardware driver to gain control over HDLC device */ -static __inline__ void hdlc_proto_detach(hdlc_device *hdlc) -{ - if (hdlc->proto.detach) - hdlc->proto.detach(hdlc); - hdlc->proto.detach = NULL; -} - +void detach_hdlc_protocol(struct net_device *dev); static __inline__ struct net_device_stats *hdlc_stats(struct net_device *dev) { - return &dev_to_hdlc(dev)->stats; + return &dev_to_desc(dev)->stats; } @@ -248,8 +135,8 @@ static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb, skb->mac.raw = skb->data; skb->dev = dev; - if (hdlc->proto.type_trans) - return hdlc->proto.type_trans(skb, dev); + if (hdlc->proto->type_trans) + return hdlc->proto->type_trans(skb, dev); else return htons(ETH_P_HDLC); } diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h index 78430ba3ea69..583972364357 100644 --- a/include/linux/hdlc/ioctl.h +++ b/include/linux/hdlc/ioctl.h @@ -1,6 +1,39 @@ #ifndef __HDLC_IOCTL_H__ #define __HDLC_IOCTL_H__ + +#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ + +#define CLOCK_DEFAULT 0 /* Default setting */ +#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ +#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ +#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ +#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ + + +#define ENCODING_DEFAULT 0 /* Default setting */ +#define ENCODING_NRZ 1 +#define ENCODING_NRZI 2 +#define ENCODING_FM_MARK 3 +#define ENCODING_FM_SPACE 4 +#define ENCODING_MANCHESTER 5 + + +#define PARITY_DEFAULT 0 /* Default setting */ +#define PARITY_NONE 1 /* No parity */ +#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ +#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ +#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ +#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ +#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ +#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ + +#define LMI_DEFAULT 0 /* Default setting */ +#define LMI_NONE 1 /* No LMI, all PVCs are static */ +#define LMI_ANSI 2 /* ANSI Annex D */ +#define LMI_CCITT 3 /* ITU-T Annex A */ +#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ + typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ -- cgit v1.2.3-71-gd317 From 51c3711704b66986373408cbc0540abea43d2380 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 13 Aug 2006 23:33:16 +0200 Subject: i2c-algo-sibyte: Merge into i2c-sibyte i2c-algo-sibyte: Merge into i2c-sibyte Merge i2c-algo-sibyte into i2c-sibyte, as this is a complete, hardware-dependent SMBus implementation and not a reusable algorithm. Perform some basic coding style cleanups while we're here (mainly space-based indentation replaced by tabulations.) Signed-off-by: Jean Delvare Cc: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/algos/Kconfig | 6 -- drivers/i2c/algos/Makefile | 1 - drivers/i2c/algos/i2c-algo-sibyte.c | 186 ------------------------------------ drivers/i2c/busses/i2c-sibyte.c | 158 +++++++++++++++++++++++++++++- include/linux/i2c-algo-sibyte.h | 33 ------- 5 files changed, 155 insertions(+), 229 deletions(-) delete mode 100644 drivers/i2c/algos/i2c-algo-sibyte.c delete mode 100644 include/linux/i2c-algo-sibyte.h (limited to 'include/linux') diff --git a/drivers/i2c/algos/Kconfig b/drivers/i2c/algos/Kconfig index 30408015d231..c034820615bb 100644 --- a/drivers/i2c/algos/Kconfig +++ b/drivers/i2c/algos/Kconfig @@ -53,12 +53,6 @@ config I2C_ALGO8XX tristate "MPC8xx CPM I2C interface" depends on 8xx && I2C -config I2C_ALGO_SIBYTE - tristate "SiByte SMBus interface" - depends on SIBYTE_SB1xxx_SOC && I2C - help - Supports the SiByte SOC on-chip I2C interfaces (2 channels). - config I2C_ALGO_SGI tristate "I2C SGI interfaces" depends on I2C && (SGI_IP22 || SGI_IP32 || X86_VISWS) diff --git a/drivers/i2c/algos/Makefile b/drivers/i2c/algos/Makefile index 867fe1f67401..208be04a3dbd 100644 --- a/drivers/i2c/algos/Makefile +++ b/drivers/i2c/algos/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_I2C_ALGOBIT) += i2c-algo-bit.o obj-$(CONFIG_I2C_ALGOPCF) += i2c-algo-pcf.o obj-$(CONFIG_I2C_ALGOPCA) += i2c-algo-pca.o obj-$(CONFIG_I2C_ALGOITE) += i2c-algo-ite.o -obj-$(CONFIG_I2C_ALGO_SIBYTE) += i2c-algo-sibyte.o obj-$(CONFIG_I2C_ALGO_SGI) += i2c-algo-sgi.o ifeq ($(CONFIG_I2C_DEBUG_ALGO),y) diff --git a/drivers/i2c/algos/i2c-algo-sibyte.c b/drivers/i2c/algos/i2c-algo-sibyte.c deleted file mode 100644 index 16d666fa16f3..000000000000 --- a/drivers/i2c/algos/i2c-algo-sibyte.c +++ /dev/null @@ -1,186 +0,0 @@ -/* ------------------------------------------------------------------------- */ -/* i2c-algo-sibyte.c i2c driver algorithms for bit-shift adapters */ -/* ------------------------------------------------------------------------- */ -/* Copyright (C) 2001,2002,2003 Broadcom Corporation - Copyright (C) 1995-2000 Simon G. Vogl - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* ------------------------------------------------------------------------- */ - -/* With some changes from Kyösti Mälkki and even - Frodo Looijaard . */ - -/* Ported for SiByte SOCs by Broadcom Corporation. */ - -#include -#include -#include - -#include -#include -#include - -#include -#include - -/* ----- global defines ----------------------------------------------- */ -#define SMB_CSR(a,r) ((long)(a->reg_base + r)) - -/* ----- global variables --------------------------------------------- */ - -/* module parameters: - */ -static int bit_scan; /* have a look at what's hanging 'round */ - - -static int smbus_xfer(struct i2c_adapter *i2c_adap, u16 addr, - unsigned short flags, char read_write, - u8 command, int size, union i2c_smbus_data * data) -{ - struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data; - int data_bytes = 0; - int error; - - while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY) - ; - - switch (size) { - case I2C_SMBUS_QUICK: - csr_out32((V_SMB_ADDR(addr) | (read_write == I2C_SMBUS_READ ? M_SMB_QDATA : 0) | - V_SMB_TT_QUICKCMD), SMB_CSR(adap, R_SMB_START)); - break; - case I2C_SMBUS_BYTE: - if (read_write == I2C_SMBUS_READ) { - csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_RD1BYTE), - SMB_CSR(adap, R_SMB_START)); - data_bytes = 1; - } else { - csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD)); - csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR1BYTE), - SMB_CSR(adap, R_SMB_START)); - } - break; - case I2C_SMBUS_BYTE_DATA: - csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD)); - if (read_write == I2C_SMBUS_READ) { - csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD1BYTE), - SMB_CSR(adap, R_SMB_START)); - data_bytes = 1; - } else { - csr_out32(V_SMB_LB(data->byte), SMB_CSR(adap, R_SMB_DATA)); - csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE), - SMB_CSR(adap, R_SMB_START)); - } - break; - case I2C_SMBUS_WORD_DATA: - csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD)); - if (read_write == I2C_SMBUS_READ) { - csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD2BYTE), - SMB_CSR(adap, R_SMB_START)); - data_bytes = 2; - } else { - csr_out32(V_SMB_LB(data->word & 0xff), SMB_CSR(adap, R_SMB_DATA)); - csr_out32(V_SMB_MB(data->word >> 8), SMB_CSR(adap, R_SMB_DATA)); - csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE), - SMB_CSR(adap, R_SMB_START)); - } - break; - default: - return -1; /* XXXKW better error code? */ - } - - while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY) - ; - - error = csr_in32(SMB_CSR(adap, R_SMB_STATUS)); - if (error & M_SMB_ERROR) { - /* Clear error bit by writing a 1 */ - csr_out32(M_SMB_ERROR, SMB_CSR(adap, R_SMB_STATUS)); - return -1; /* XXXKW better error code? */ - } - - if (data_bytes == 1) - data->byte = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xff; - if (data_bytes == 2) - data->word = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xffff; - - return 0; -} - -static u32 bit_func(struct i2c_adapter *adap) -{ - return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | - I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA); -} - - -/* -----exported algorithm data: ------------------------------------- */ - -static struct i2c_algorithm i2c_sibyte_algo = { - .smbus_xfer = smbus_xfer, - .functionality = bit_func, -}; - -/* - * registering functions to load algorithms at runtime - */ -int i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed) -{ - int i; - struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data; - - /* register new adapter to i2c module... */ - i2c_adap->algo = &i2c_sibyte_algo; - - /* Set the frequency to 100 kHz */ - csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ)); - csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL)); - - /* scan bus */ - if (bit_scan) { - union i2c_smbus_data data; - int rc; - printk(KERN_INFO " i2c-algo-sibyte.o: scanning bus %s.\n", - i2c_adap->name); - for (i = 0x00; i < 0x7f; i++) { - /* XXXKW is this a realistic probe? */ - rc = smbus_xfer(i2c_adap, i, 0, I2C_SMBUS_READ, 0, - I2C_SMBUS_BYTE_DATA, &data); - if (!rc) { - printk("(%02x)",i); - } else - printk("."); - } - printk("\n"); - } - - return i2c_add_adapter(i2c_adap); -} - - -int i2c_sibyte_del_bus(struct i2c_adapter *adap) -{ - return i2c_del_adapter(adap); -} - - -EXPORT_SYMBOL(i2c_sibyte_add_bus); -EXPORT_SYMBOL(i2c_sibyte_del_bus); - -MODULE_AUTHOR("Kip Walker, Broadcom Corp."); -MODULE_DESCRIPTION("SiByte I2C-Bus algorithm"); -module_param(bit_scan, int, 0); -MODULE_PARM_DESC(bit_scan, "Scan for active chips on the bus"); -MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c index fa503ed9f86d..f516eb7a23f7 100644 --- a/drivers/i2c/busses/i2c-sibyte.c +++ b/drivers/i2c/busses/i2c-sibyte.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2004 Steven J. Hill * Copyright (C) 2001,2002,2003 Broadcom Corporation + * Copyright (C) 1995-2000 Simon G. Vogl * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -17,11 +18,162 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include -#include +#include +#include +#include #include #include + +struct i2c_algo_sibyte_data { + void *data; /* private data */ + int bus; /* which bus */ + void *reg_base; /* CSR base */ +}; + +/* ----- global defines ----------------------------------------------- */ +#define SMB_CSR(a,r) ((long)(a->reg_base + r)) + +/* ----- global variables --------------------------------------------- */ + +/* module parameters: + */ +static int bit_scan; /* have a look at what's hanging 'round */ +module_param(bit_scan, int, 0); +MODULE_PARM_DESC(bit_scan, "Scan for active chips on the bus"); + + +static int smbus_xfer(struct i2c_adapter *i2c_adap, u16 addr, + unsigned short flags, char read_write, + u8 command, int size, union i2c_smbus_data * data) +{ + struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data; + int data_bytes = 0; + int error; + + while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY) + ; + + switch (size) { + case I2C_SMBUS_QUICK: + csr_out32((V_SMB_ADDR(addr) | + (read_write == I2C_SMBUS_READ ? M_SMB_QDATA : 0) | + V_SMB_TT_QUICKCMD), SMB_CSR(adap, R_SMB_START)); + break; + case I2C_SMBUS_BYTE: + if (read_write == I2C_SMBUS_READ) { + csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_RD1BYTE), + SMB_CSR(adap, R_SMB_START)); + data_bytes = 1; + } else { + csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD)); + csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR1BYTE), + SMB_CSR(adap, R_SMB_START)); + } + break; + case I2C_SMBUS_BYTE_DATA: + csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD)); + if (read_write == I2C_SMBUS_READ) { + csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD1BYTE), + SMB_CSR(adap, R_SMB_START)); + data_bytes = 1; + } else { + csr_out32(V_SMB_LB(data->byte), + SMB_CSR(adap, R_SMB_DATA)); + csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE), + SMB_CSR(adap, R_SMB_START)); + } + break; + case I2C_SMBUS_WORD_DATA: + csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD)); + if (read_write == I2C_SMBUS_READ) { + csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD2BYTE), + SMB_CSR(adap, R_SMB_START)); + data_bytes = 2; + } else { + csr_out32(V_SMB_LB(data->word & 0xff), + SMB_CSR(adap, R_SMB_DATA)); + csr_out32(V_SMB_MB(data->word >> 8), + SMB_CSR(adap, R_SMB_DATA)); + csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE), + SMB_CSR(adap, R_SMB_START)); + } + break; + default: + return -1; /* XXXKW better error code? */ + } + + while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY) + ; + + error = csr_in32(SMB_CSR(adap, R_SMB_STATUS)); + if (error & M_SMB_ERROR) { + /* Clear error bit by writing a 1 */ + csr_out32(M_SMB_ERROR, SMB_CSR(adap, R_SMB_STATUS)); + return -1; /* XXXKW better error code? */ + } + + if (data_bytes == 1) + data->byte = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xff; + if (data_bytes == 2) + data->word = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xffff; + + return 0; +} + +static u32 bit_func(struct i2c_adapter *adap) +{ + return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA); +} + + +/* -----exported algorithm data: ------------------------------------- */ + +static struct i2c_algorithm i2c_sibyte_algo = { + .smbus_xfer = smbus_xfer, + .functionality = bit_func, +}; + +/* + * registering functions to load algorithms at runtime + */ +int i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed) +{ + int i; + struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data; + + /* register new adapter to i2c module... */ + i2c_adap->algo = &i2c_sibyte_algo; + + /* Set the frequency to 100 kHz */ + csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ)); + csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL)); + + /* scan bus */ + if (bit_scan) { + union i2c_smbus_data data; + int rc; + printk(KERN_INFO " i2c-algo-sibyte.o: scanning bus %s.\n", + i2c_adap->name); + for (i = 0x00; i < 0x7f; i++) { + /* XXXKW is this a realistic probe? */ + rc = smbus_xfer(i2c_adap, i, 0, I2C_SMBUS_READ, 0, + I2C_SMBUS_BYTE_DATA, &data); + if (!rc) { + printk("(%02x)",i); + } else + printk("."); + } + printk("\n"); + } + + return i2c_add_adapter(i2c_adap); +} + + static struct i2c_algo_sibyte_data sibyte_board_data[2] = { { NULL, 0, (void *) (CKSEG1+A_SMB_BASE(0)) }, { NULL, 1, (void *) (CKSEG1+A_SMB_BASE(1)) } @@ -58,8 +210,8 @@ static int __init i2c_sibyte_init(void) static void __exit i2c_sibyte_exit(void) { - i2c_sibyte_del_bus(&sibyte_board_adapter[0]); - i2c_sibyte_del_bus(&sibyte_board_adapter[1]); + i2c_del_bus(&sibyte_board_adapter[0]); + i2c_del_bus(&sibyte_board_adapter[1]); } module_init(i2c_sibyte_init); diff --git a/include/linux/i2c-algo-sibyte.h b/include/linux/i2c-algo-sibyte.h deleted file mode 100644 index 03914ded8614..000000000000 --- a/include/linux/i2c-algo-sibyte.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2001,2002,2003 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef I2C_ALGO_SIBYTE_H -#define I2C_ALGO_SIBYTE_H 1 - -#include - -struct i2c_algo_sibyte_data { - void *data; /* private data */ - int bus; /* which bus */ - void *reg_base; /* CSR base */ -}; - -int i2c_sibyte_add_bus(struct i2c_adapter *, int speed); -int i2c_sibyte_del_bus(struct i2c_adapter *); - -#endif /* I2C_ALGO_SIBYTE_H */ -- cgit v1.2.3-71-gd317 From a0d9c63d3640bd4fc90a408e8334754ef44bcf48 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 27 Aug 2006 11:46:49 +0200 Subject: i2c-algo-bit: Discard the mdelay data struct member i2c-algo-bit: Discard the mdelay data struct member The i2c_algo_bit_data structure has an mdelay member, which is not used by the algorithm code (the code has always been ifdef'd out.) Let's discard it to save some code and memory. Signed-off-by: Jean Delvare Acked-by: Mauro Carvalho Chehab Cc: Adrian Bunk Signed-off-by: Greg Kroah-Hartman --- drivers/acorn/char/i2c.c | 1 - drivers/i2c/algos/i2c-algo-bit.c | 4 ---- drivers/i2c/busses/i2c-hydra.c | 1 - drivers/i2c/busses/i2c-i810.c | 2 -- drivers/i2c/busses/i2c-ixp2000.c | 1 - drivers/i2c/busses/i2c-ixp4xx.c | 1 - drivers/i2c/busses/i2c-parport-light.c | 1 - drivers/i2c/busses/i2c-parport.c | 1 - drivers/i2c/busses/i2c-prosavage.c | 1 - drivers/i2c/busses/i2c-savage4.c | 1 - drivers/i2c/busses/i2c-via.c | 1 - drivers/i2c/busses/i2c-voodoo3.c | 2 -- drivers/i2c/busses/scx200_i2c.c | 12 ++++++------ drivers/ieee1394/pcilynx.c | 1 - drivers/media/video/bt8xx/bttv-i2c.c | 1 - drivers/media/video/cx88/cx88-i2c.c | 1 - drivers/media/video/cx88/cx88-vp3054-i2c.c | 1 - drivers/media/video/zoran_card.c | 1 - drivers/video/i810/i810-i2c.c | 1 - drivers/video/matrox/i2c-matroxfb.c | 1 - drivers/video/savage/savagefb-i2c.c | 1 - include/linux/i2c-algo-bit.h | 1 - 22 files changed, 6 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/acorn/char/i2c.c b/drivers/acorn/char/i2c.c index c26c08b36829..bdb9c8b78ed8 100644 --- a/drivers/acorn/char/i2c.c +++ b/drivers/acorn/char/i2c.c @@ -308,7 +308,6 @@ static struct i2c_algo_bit_data ioc_data = { .getsda = ioc_getsda, .getscl = ioc_getscl, .udelay = 80, - .mdelay = 80, .timeout = 100 }; diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index ab230c033f99..761df16838b4 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -354,10 +354,6 @@ static int sendbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msg) return (retval<0)? retval : -EFAULT; /* got a better one ?? */ } -#if 0 - /* from asm/delay.h */ - __delay(adap->mdelay * (loops_per_sec / 1000) ); -#endif } return wrcount; } diff --git a/drivers/i2c/busses/i2c-hydra.c b/drivers/i2c/busses/i2c-hydra.c index e0cb3b0f92fa..457d48a0ab9d 100644 --- a/drivers/i2c/busses/i2c-hydra.c +++ b/drivers/i2c/busses/i2c-hydra.c @@ -99,7 +99,6 @@ static struct i2c_algo_bit_data hydra_bit_data = { .getsda = hydra_bit_getsda, .getscl = hydra_bit_getscl, .udelay = 5, - .mdelay = 5, .timeout = HZ }; diff --git a/drivers/i2c/busses/i2c-i810.c b/drivers/i2c/busses/i2c-i810.c index 748be30f2bae..b66fb6bb1870 100644 --- a/drivers/i2c/busses/i2c-i810.c +++ b/drivers/i2c/busses/i2c-i810.c @@ -166,7 +166,6 @@ static struct i2c_algo_bit_data i810_i2c_bit_data = { .getsda = bit_i810i2c_getsda, .getscl = bit_i810i2c_getscl, .udelay = CYCLE_DELAY, - .mdelay = CYCLE_DELAY, .timeout = TIMEOUT, }; @@ -182,7 +181,6 @@ static struct i2c_algo_bit_data i810_ddc_bit_data = { .getsda = bit_i810ddc_getsda, .getscl = bit_i810ddc_getscl, .udelay = CYCLE_DELAY, - .mdelay = CYCLE_DELAY, .timeout = TIMEOUT, }; diff --git a/drivers/i2c/busses/i2c-ixp2000.c b/drivers/i2c/busses/i2c-ixp2000.c index cd6f45d186ab..dd3f4cd3aa68 100644 --- a/drivers/i2c/busses/i2c-ixp2000.c +++ b/drivers/i2c/busses/i2c-ixp2000.c @@ -114,7 +114,6 @@ static int ixp2000_i2c_probe(struct platform_device *plat_dev) drv_data->algo_data.getsda = ixp2000_bit_getsda; drv_data->algo_data.getscl = ixp2000_bit_getscl; drv_data->algo_data.udelay = 6; - drv_data->algo_data.mdelay = 6; drv_data->algo_data.timeout = 100; drv_data->adapter.id = I2C_HW_B_IXP2000, diff --git a/drivers/i2c/busses/i2c-ixp4xx.c b/drivers/i2c/busses/i2c-ixp4xx.c index 2ed07112d683..ab573254a8aa 100644 --- a/drivers/i2c/busses/i2c-ixp4xx.c +++ b/drivers/i2c/busses/i2c-ixp4xx.c @@ -122,7 +122,6 @@ static int ixp4xx_i2c_probe(struct platform_device *plat_dev) drv_data->algo_data.getsda = ixp4xx_bit_getsda; drv_data->algo_data.getscl = ixp4xx_bit_getscl; drv_data->algo_data.udelay = 10; - drv_data->algo_data.mdelay = 10; drv_data->algo_data.timeout = 100; drv_data->adapter.id = I2C_HW_B_IXP4XX; diff --git a/drivers/i2c/busses/i2c-parport-light.c b/drivers/i2c/busses/i2c-parport-light.c index e09ebbb2f9f0..5eb2bd294fd9 100644 --- a/drivers/i2c/busses/i2c-parport-light.c +++ b/drivers/i2c/busses/i2c-parport-light.c @@ -103,7 +103,6 @@ static struct i2c_algo_bit_data parport_algo_data = { .getsda = parport_getsda, .getscl = parport_getscl, .udelay = 50, - .mdelay = 50, .timeout = HZ, }; diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c index 934bd55bae15..48a829431c7b 100644 --- a/drivers/i2c/busses/i2c-parport.c +++ b/drivers/i2c/busses/i2c-parport.c @@ -138,7 +138,6 @@ static struct i2c_algo_bit_data parport_algo_data = { .getsda = parport_getsda, .getscl = parport_getscl, .udelay = 60, - .mdelay = 60, .timeout = HZ, }; diff --git a/drivers/i2c/busses/i2c-prosavage.c b/drivers/i2c/busses/i2c-prosavage.c index 9479525892e3..7745e21874a8 100644 --- a/drivers/i2c/busses/i2c-prosavage.c +++ b/drivers/i2c/busses/i2c-prosavage.c @@ -180,7 +180,6 @@ static int i2c_register_bus(struct pci_dev *dev, struct s_i2c_bus *p, void __iom p->algo.getsda = bit_s3via_getsda; p->algo.getscl = bit_s3via_getscl; p->algo.udelay = CYCLE_DELAY; - p->algo.mdelay = CYCLE_DELAY; p->algo.timeout = TIMEOUT; p->algo.data = p; p->mmvga = mmvga; diff --git a/drivers/i2c/busses/i2c-savage4.c b/drivers/i2c/busses/i2c-savage4.c index 0c8518298e4d..209f47ea1750 100644 --- a/drivers/i2c/busses/i2c-savage4.c +++ b/drivers/i2c/busses/i2c-savage4.c @@ -140,7 +140,6 @@ static struct i2c_algo_bit_data sav_i2c_bit_data = { .getsda = bit_savi2c_getsda, .getscl = bit_savi2c_getscl, .udelay = CYCLE_DELAY, - .mdelay = CYCLE_DELAY, .timeout = TIMEOUT }; diff --git a/drivers/i2c/busses/i2c-via.c b/drivers/i2c/busses/i2c-via.c index 484bbacfce6b..910e200ad500 100644 --- a/drivers/i2c/busses/i2c-via.c +++ b/drivers/i2c/busses/i2c-via.c @@ -81,7 +81,6 @@ static struct i2c_algo_bit_data bit_data = { .getsda = bit_via_getsda, .getscl = bit_via_getscl, .udelay = 5, - .mdelay = 5, .timeout = HZ }; diff --git a/drivers/i2c/busses/i2c-voodoo3.c b/drivers/i2c/busses/i2c-voodoo3.c index b675773b0cc1..6c8d25183382 100644 --- a/drivers/i2c/busses/i2c-voodoo3.c +++ b/drivers/i2c/busses/i2c-voodoo3.c @@ -160,7 +160,6 @@ static struct i2c_algo_bit_data voo_i2c_bit_data = { .getsda = bit_vooi2c_getsda, .getscl = bit_vooi2c_getscl, .udelay = CYCLE_DELAY, - .mdelay = CYCLE_DELAY, .timeout = TIMEOUT }; @@ -177,7 +176,6 @@ static struct i2c_algo_bit_data voo_ddc_bit_data = { .getsda = bit_vooddc_getsda, .getscl = bit_vooddc_getscl, .udelay = CYCLE_DELAY, - .mdelay = CYCLE_DELAY, .timeout = TIMEOUT }; diff --git a/drivers/i2c/busses/scx200_i2c.c b/drivers/i2c/busses/scx200_i2c.c index cb3ef5ac99fd..8b65a5cf8251 100644 --- a/drivers/i2c/busses/scx200_i2c.c +++ b/drivers/i2c/busses/scx200_i2c.c @@ -71,12 +71,12 @@ static int scx200_i2c_getsda(void *data) */ static struct i2c_algo_bit_data scx200_i2c_data = { - NULL, - scx200_i2c_setsda, - scx200_i2c_setscl, - scx200_i2c_getsda, - scx200_i2c_getscl, - 10, 10, 100, /* waits, timeout */ + .setsda = scx200_i2c_setsda, + .setscl = scx200_i2c_setscl, + .getsda = scx200_i2c_getsda, + .getscl = scx200_i2c_getscl, + .udelay = 10, + .timeout = 100, }; static struct i2c_adapter scx200_i2c_ops = { diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c index e6f41238f5e8..b4f146f2c951 100644 --- a/drivers/ieee1394/pcilynx.c +++ b/drivers/ieee1394/pcilynx.c @@ -137,7 +137,6 @@ static struct i2c_algo_bit_data bit_data = { .getsda = bit_getsda, .getscl = bit_getscl, .udelay = 5, - .mdelay = 5, .timeout = 100, }; diff --git a/drivers/media/video/bt8xx/bttv-i2c.c b/drivers/media/video/bt8xx/bttv-i2c.c index 4b562b386fcf..0dfbcc85ebb9 100644 --- a/drivers/media/video/bt8xx/bttv-i2c.c +++ b/drivers/media/video/bt8xx/bttv-i2c.c @@ -100,7 +100,6 @@ static struct i2c_algo_bit_data bttv_i2c_algo_bit_template = { .getsda = bttv_bit_getsda, .getscl = bttv_bit_getscl, .udelay = 16, - .mdelay = 10, .timeout = 200, }; diff --git a/drivers/media/video/cx88/cx88-i2c.c b/drivers/media/video/cx88/cx88-i2c.c index 70663805cc30..7bea34714861 100644 --- a/drivers/media/video/cx88/cx88-i2c.c +++ b/drivers/media/video/cx88/cx88-i2c.c @@ -155,7 +155,6 @@ static struct i2c_algo_bit_data cx8800_i2c_algo_template = { .getsda = cx8800_bit_getsda, .getscl = cx8800_bit_getscl, .udelay = 16, - .mdelay = 10, .timeout = 200, }; diff --git a/drivers/media/video/cx88/cx88-vp3054-i2c.c b/drivers/media/video/cx88/cx88-vp3054-i2c.c index 751a754a45e9..2b4f1970c7df 100644 --- a/drivers/media/video/cx88/cx88-vp3054-i2c.c +++ b/drivers/media/video/cx88/cx88-vp3054-i2c.c @@ -100,7 +100,6 @@ static struct i2c_algo_bit_data vp3054_i2c_algo_template = { .getsda = vp3054_bit_getsda, .getscl = vp3054_bit_getscl, .udelay = 16, - .mdelay = 10, .timeout = 200, }; diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c index f2249ed25273..29f59c36f001 100644 --- a/drivers/media/video/zoran_card.c +++ b/drivers/media/video/zoran_card.c @@ -820,7 +820,6 @@ static struct i2c_algo_bit_data zoran_i2c_bit_data_template = { .getsda = zoran_i2c_getsda, .getscl = zoran_i2c_getscl, .udelay = 10, - .mdelay = 0, .timeout = 100, }; diff --git a/drivers/video/i810/i810-i2c.c b/drivers/video/i810/i810-i2c.c index c1f7b49975dd..7d06b38e80a0 100644 --- a/drivers/video/i810/i810-i2c.c +++ b/drivers/video/i810/i810-i2c.c @@ -98,7 +98,6 @@ static int i810_setup_i2c_bus(struct i810fb_i2c_chan *chan, const char *name) chan->algo.getsda = i810i2c_getsda; chan->algo.getscl = i810i2c_getscl; chan->algo.udelay = 10; - chan->algo.mdelay = 10; chan->algo.timeout = (HZ/2); chan->algo.data = chan; diff --git a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c index 9842042d2af5..795c1a99a680 100644 --- a/drivers/video/matrox/i2c-matroxfb.c +++ b/drivers/video/matrox/i2c-matroxfb.c @@ -100,7 +100,6 @@ static struct i2c_algo_bit_data matrox_i2c_algo_template = .getsda = matroxfb_gpio_getsda, .getscl = matroxfb_gpio_getscl, .udelay = 10, - .mdelay = 10, .timeout = 100, }; diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c index e83befd16d63..d7d810dbf0bd 100644 --- a/drivers/video/savage/savagefb-i2c.c +++ b/drivers/video/savage/savagefb-i2c.c @@ -148,7 +148,6 @@ static int savage_setup_i2c_bus(struct savagefb_i2c_chan *chan, chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pcidev->dev; chan->algo.udelay = 40; - chan->algo.mdelay = 5; chan->algo.timeout = 20; chan->algo.data = chan; diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h index c0e7fab28ce3..c8f8df25c7e0 100644 --- a/include/linux/i2c-algo-bit.h +++ b/include/linux/i2c-algo-bit.h @@ -40,7 +40,6 @@ struct i2c_algo_bit_data { /* local settings */ int udelay; /* half-clock-cycle time in microsecs */ /* i.e. clock is (500 / udelay) KHz */ - int mdelay; /* in millisecs, unused */ int timeout; /* in jiffies */ }; -- cgit v1.2.3-71-gd317 From 9b4ccb86b4abe644ffd218720da2f942b6a20fc2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 3 Sep 2006 22:22:50 +0200 Subject: i2c-algo-pcf: Discard the mdelay data struct member i2c-algo-pcf: Discard the mdelay data struct member Just as i2c-algo-bit, i2c-algo-pcf has an unused mdelay struct member, which we can get rid of to spare some code and memory. Signed-off-by: Adrian Bunk Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-elektor.c | 1 - include/linux/i2c-algo-pcf.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c index 59f8308c2356..caa8e5c8bfbb 100644 --- a/drivers/i2c/busses/i2c-elektor.c +++ b/drivers/i2c/busses/i2c-elektor.c @@ -196,7 +196,6 @@ static struct i2c_algo_pcf_data pcf_isa_data = { .getclock = pcf_isa_getclock, .waitforpin = pcf_isa_waitforpin, .udelay = 10, - .mdelay = 10, .timeout = 100, }; diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 18b0adf57a3d..9908f3fc4839 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -35,7 +35,6 @@ struct i2c_algo_pcf_data { /* local settings */ int udelay; - int mdelay; int timeout; }; -- cgit v1.2.3-71-gd317 From af71ff690b92894f66ccede27f731150dc10d80d Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 3 Sep 2006 22:37:11 +0200 Subject: i2c: Let drivers constify i2c_algorithm data i2c: Let drivers constify i2c_algorithm data Let drivers constify I2C algorithm method operations tables, moving them from ".data" to ".rodata". Signed-off-by: David Brownell Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index eb0628a7ecc6..23ad1ee42a4c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -220,7 +220,7 @@ struct i2c_adapter { struct module *owner; unsigned int id; unsigned int class; - struct i2c_algorithm *algo;/* the algorithm to access the bus */ + const struct i2c_algorithm *algo; /* the algorithm to access the bus */ void *algo_data; /* --- administration stuff. */ -- cgit v1.2.3-71-gd317 From 6d3aae9d74221b00e2cbf50a353527e5a71a58ba Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 3 Sep 2006 22:41:08 +0200 Subject: i2c: Drop unimplemented slave functions i2c: Drop unimplemented slave functions Drop the function declarations for slave mode support of i2c adapters. This was never implemented, and by the time it is I bet we will want something different anyway. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 23ad1ee42a4c..9b5d04768c2c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -64,14 +64,6 @@ extern int i2c_master_recv(struct i2c_client *,char* ,int); */ extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); -/* - * Some adapter types (i.e. PCF 8584 based ones) may support slave behaviuor. - * This is not tested/implemented yet and will change in the future. - */ -extern int i2c_slave_send(struct i2c_client *,char*,int); -extern int i2c_slave_recv(struct i2c_client *,char*,int); - - /* This is the very generalized SMBus access routine. You probably do not want to use this, though; one of the functions below may be much easier, @@ -201,10 +193,6 @@ struct i2c_algorithm { unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data * data); - /* --- these optional/future use for some adapter types.*/ - int (*slave_send)(struct i2c_adapter *,char*,int); - int (*slave_recv)(struct i2c_adapter *,char*,int); - /* --- ioctl like call to set div. parameters. */ int (*algo_control)(struct i2c_adapter *, unsigned int, unsigned long); -- cgit v1.2.3-71-gd317 From 46ff34633ed09f36ebc4b5c40ac37e592172df74 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 31 Aug 2006 01:55:24 -0400 Subject: MSI: Rename PCI_CAP_ID_HT_IRQCONF into PCI_CAP_ID_HT 0x08 is the HT capability, while PCI_CAP_ID_HT_IRQCONF would be the subtype 0x80 that mpic_scan_ht_pic() uses. Rename PCI_CAP_ID_HT_IRQCONF into PCI_CAP_ID_HT. And by the way, use it in the ipath driver instead of defining its own HT_CAPABILITY_ID. Signed-off-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/mpic.c | 2 +- drivers/infiniband/hw/ipath/ipath_iba6110.c | 5 ++--- include/linux/pci_regs.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index b604926401f5..723972bb5bd9 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -339,7 +339,7 @@ static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase, for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0; pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) { u8 id = readb(devbase + pos + PCI_CAP_LIST_ID); - if (id == PCI_CAP_ID_HT_IRQCONF) { + if (id == PCI_CAP_ID_HT) { id = readb(devbase + pos + 3); if (id == 0x80) break; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index bf2455a6d562..5c9b509e40e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -742,7 +742,6 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd) return 0; } -#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */ #define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ #define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ @@ -973,7 +972,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, * do this early, before we ever enable errors or hardware errors, * mostly to avoid causing the chip to enter freeze mode. */ - pos = pci_find_capability(pdev, HT_CAPABILITY_ID); + pos = pci_find_capability(pdev, PCI_CAP_ID_HT); if (!pos) { ipath_dev_err(dd, "Couldn't find HyperTransport " "capability; no interrupts\n"); @@ -996,7 +995,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, else if (cap_type == HT_INTR_DISC_CONFIG) ihandler = set_int_handler(dd, pdev, pos); } while ((pos = pci_find_next_capability(pdev, pos, - HT_CAPABILITY_ID))); + PCI_CAP_ID_HT))); if (!ihandler) { ipath_dev_err(dd, "Couldn't find interrupt handler in " diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 96930cb5927c..7d0e26cba420 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -196,7 +196,7 @@ #define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ #define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ #define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ -#define PCI_CAP_ID_HT_IRQCONF 0x08 /* HyperTransport IRQ Configuration */ +#define PCI_CAP_ID_HT 0x08 /* HyperTransport */ #define PCI_CAP_ID_VNDR 0x09 /* Vendor specific capability */ #define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ -- cgit v1.2.3-71-gd317 From 6397c75cbc4d7dbc3d07278b57c82a47dafb21b5 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 31 Aug 2006 01:55:32 -0400 Subject: MSI: Blacklist PCI-E chipsets depending on Hypertransport MSI capability Introduce msi_ht_cap_enabled() to check the MSI capability in the Hypertransport configuration space. It is used in a generic quirk quirk_msi_ht_cap() to check whether MSI is enabled on hypertransport chipset, and a nVidia specific quirk quirk_nvidia_ck804_msi_ht_cap() where two 2 HT MSI mappings have to be checked. Both quirks set the PCI_BUS_FLAGS_NO_MSI bus flag when MSI is disabled. Signed-off-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 1 + 2 files changed, 60 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8385b815ecb1..08cd86a6dd66 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1703,6 +1703,65 @@ static void __devinit quirk_disable_msi(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_msi); + +/* Go through the list of Hypertransport capabilities and + * return 1 if a HT MSI capability is found and enabled */ +static int __devinit msi_ht_cap_enabled(struct pci_dev *dev) +{ + u8 pos; + int ttl; + for (pos = pci_find_capability(dev, PCI_CAP_ID_HT), ttl = 48; + pos && ttl; + pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_HT), ttl--) { + u32 cap_hdr; + /* MSI mapping section according to Hypertransport spec */ + if (pci_read_config_dword(dev, pos, &cap_hdr) == 0 + && (cap_hdr & 0xf8000000) == 0xa8000000 /* MSI mapping */) { + printk(KERN_INFO "PCI: Found HT MSI mapping on %s with capability %s\n", + pci_name(dev), cap_hdr & 0x10000 ? "enabled" : "disabled"); + return (cap_hdr & 0x10000) != 0; /* MSI mapping cap enabled */ + } + } + return 0; +} + +/* Check the hypertransport MSI mapping to know whether MSI is enabled or not */ +static void __devinit quirk_msi_ht_cap(struct pci_dev *dev) +{ + if (dev->subordinate && !msi_ht_cap_enabled(dev)) { + printk(KERN_WARNING "PCI: MSI quirk detected. " + "MSI disabled on chipset %s.\n", + pci_name(dev)); + dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE, + quirk_msi_ht_cap); + +/* The nVidia CK804 chipset may have 2 HT MSI mappings. + * MSI are supported if the MSI capability set in any of these mappings. + */ +static void __devinit quirk_nvidia_ck804_msi_ht_cap(struct pci_dev *dev) +{ + struct pci_dev *pdev; + + if (!dev->subordinate) + return; + + /* check HT MSI cap on this chipset and the root one. + * a single one having MSI is enough to be sure that MSI are supported. + */ + pdev = pci_find_slot(dev->bus->number, 0); + if (dev->subordinate && !msi_ht_cap_enabled(dev) + && !msi_ht_cap_enabled(pdev)) { + printk(KERN_WARNING "PCI: MSI quirk detected. " + "MSI disabled on chipset %s.\n", + pci_name(dev)); + dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, + quirk_nvidia_ck804_msi_ht_cap); #endif /* CONFIG_PCI_MSI */ EXPORT_SYMBOL(pcie_mch_quirk); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6a1e09834559..b9e263adebab 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1411,6 +1411,7 @@ #define PCI_DEVICE_ID_SERVERWORKS_LE 0x0009 #define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017 #define PCI_DEVICE_ID_SERVERWORKS_EPB 0x0103 +#define PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE 0x0132 #define PCI_DEVICE_ID_SERVERWORKS_OSB4 0x0200 #define PCI_DEVICE_ID_SERVERWORKS_CSB5 0x0201 #define PCI_DEVICE_ID_SERVERWORKS_CSB6 0x0203 -- cgit v1.2.3-71-gd317 From 6c2b374d74857e892080ee726184ec1d15e7d4e4 Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Mon, 31 Jul 2006 15:21:33 +0800 Subject: PCI-Express AER implemetation: AER core and aerdriver Patch 3 implements the core part of PCI-Express AER and aerdrv port service driver. When a root port service device is probed, the aerdrv will call request_irq to register irq handler for AER error interrupt. When a device sends an PCI-Express error message to the root port, the root port will trigger an interrupt, by either MSI or IO-APIC, then kernel would run the irq handler. The handler collects root error status register and schedules a work. The work will call the core part to process the error based on its type (Correctable/non-fatal/fatal). As for Correctable errors, the patch chooses to just clear the correctable error status register of the device. As for the non-fatal error, the patch follows generic PCI error handler rules to call the error callback functions of the endpoint's driver. If the device is a bridge, the patch chooses to broadcast the error to downstream devices. As for the fatal error, the patch resets the pci-express link and follows generic PCI error handler rules to call the error callback functions of the endpoint's driver. If the device is a bridge, the patch chooses to broadcast the error to downstream devices. Signed-off-by: Zhang Yanmin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/Kconfig | 1 + drivers/pci/pcie/Makefile | 3 + drivers/pci/pcie/aer/Kconfig | 12 + drivers/pci/pcie/aer/Makefile | 8 + drivers/pci/pcie/aer/aerdrv.c | 346 +++++++++++++++ drivers/pci/pcie/aer/aerdrv.h | 125 ++++++ drivers/pci/pcie/aer/aerdrv_acpi.c | 68 +++ drivers/pci/pcie/aer/aerdrv_core.c | 757 +++++++++++++++++++++++++++++++++ drivers/pci/pcie/aer/aerdrv_errprint.c | 248 +++++++++++ include/linux/aer.h | 24 ++ include/linux/pcieport_if.h | 6 + 11 files changed, 1598 insertions(+) create mode 100644 drivers/pci/pcie/aer/Kconfig create mode 100644 drivers/pci/pcie/aer/Makefile create mode 100644 drivers/pci/pcie/aer/aerdrv.c create mode 100644 drivers/pci/pcie/aer/aerdrv.h create mode 100644 drivers/pci/pcie/aer/aerdrv_acpi.c create mode 100644 drivers/pci/pcie/aer/aerdrv_core.c create mode 100644 drivers/pci/pcie/aer/aerdrv_errprint.c create mode 100644 include/linux/aer.h (limited to 'include/linux') diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 1012db8b8b2c..0ad92a8ad8b1 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -34,3 +34,4 @@ config HOTPLUG_PCI_PCIE_POLL_EVENT_MODE When in doubt, say N. +source "drivers/pci/pcie/aer/Kconfig" diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 984fa87283e3..e00fb99acf44 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -5,3 +5,6 @@ pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o + +# Build PCI Express AER if needed +obj-$(CONFIG_PCIEAER) += aer/ diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig new file mode 100644 index 000000000000..3f37a60a6438 --- /dev/null +++ b/drivers/pci/pcie/aer/Kconfig @@ -0,0 +1,12 @@ +# +# PCI Express Root Port Device AER Configuration +# + +config PCIEAER + boolean "Root Port Advanced Error Reporting support" + depends on PCIEPORTBUS && ACPI + default y + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) driver support. Error reporting messages sent to Root + Port will be handled by PCI Express AER driver. diff --git a/drivers/pci/pcie/aer/Makefile b/drivers/pci/pcie/aer/Makefile new file mode 100644 index 000000000000..15a4f40d520b --- /dev/null +++ b/drivers/pci/pcie/aer/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for PCI-Express Root Port Advanced Error Reporting Driver +# + +obj-$(CONFIG_PCIEAER) += aerdriver.o + +aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o aerdrv_acpi.o + diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c new file mode 100644 index 000000000000..0d4ac027d53e --- /dev/null +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -0,0 +1,346 @@ +/* + * drivers/pci/pcie/aer/aerdrv.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file implements the AER root port service driver. The driver will + * register an irq handler. When root port triggers an AER interrupt, the irq + * handler will collect root port status and schedule a work. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "aerdrv.h" + +/* + * Version Information + */ +#define DRIVER_VERSION "v1.0" +#define DRIVER_AUTHOR "tom.l.nguyen@intel.com" +#define DRIVER_DESC "Root Port Advanced Error Reporting Driver" +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); + +static int __devinit aer_probe (struct pcie_device *dev, + const struct pcie_port_service_id *id ); +static void aer_remove(struct pcie_device *dev); +static int aer_suspend(struct pcie_device *dev, pm_message_t state) +{return 0;} +static int aer_resume(struct pcie_device *dev) {return 0;} +static pci_ers_result_t aer_error_detected(struct pci_dev *dev, + enum pci_channel_state error); +static void aer_error_resume(struct pci_dev *dev); +static pci_ers_result_t aer_root_reset(struct pci_dev *dev); + +/* + * PCI Express bus's AER Root service driver data structure + */ +static struct pcie_port_service_id aer_id[] = { + { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .port_type = PCIE_RC_PORT, + .service_type = PCIE_PORT_SERVICE_AER, + }, + { /* end: all zeroes */ } +}; + +static struct pci_error_handlers aer_error_handlers = { + .error_detected = aer_error_detected, + .resume = aer_error_resume, +}; + +static struct pcie_port_service_driver aerdrv = { + .name = "aer", + .id_table = &aer_id[0], + + .probe = aer_probe, + .remove = aer_remove, + + .suspend = aer_suspend, + .resume = aer_resume, + + .err_handler = &aer_error_handlers, + + .reset_link = aer_root_reset, +}; + +/** + * aer_irq - Root Port's ISR + * @irq: IRQ assigned to Root Port + * @context: pointer to Root Port data structure + * @r: pointer struct pt_regs + * + * Invoked when Root Port detects AER messages. + **/ +static irqreturn_t aer_irq(int irq, void *context, struct pt_regs * r) +{ + unsigned int status, id; + struct pcie_device *pdev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(pdev); + int next_prod_idx; + unsigned long flags; + int pos; + + pos = pci_find_aer_capability(pdev->port); + /* + * Must lock access to Root Error Status Reg, Root Error ID Reg, + * and Root error producer/consumer index + */ + spin_lock_irqsave(&rpc->e_lock, flags); + + /* Read error status */ + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); + if (!(status & ROOT_ERR_STATUS_MASKS)) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return IRQ_NONE; + } + + /* Read error source and clear error status */ + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_COR_SRC, &id); + pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); + + /* Store error source for later DPC handler */ + next_prod_idx = rpc->prod_idx + 1; + if (next_prod_idx == AER_ERROR_SOURCES_MAX) + next_prod_idx = 0; + if (next_prod_idx == rpc->cons_idx) { + /* + * Error Storm Condition - possibly the same error occurred. + * Drop the error. + */ + spin_unlock_irqrestore(&rpc->e_lock, flags); + return IRQ_HANDLED; + } + rpc->e_sources[rpc->prod_idx].status = status; + rpc->e_sources[rpc->prod_idx].id = id; + rpc->prod_idx = next_prod_idx; + spin_unlock_irqrestore(&rpc->e_lock, flags); + + /* Invoke DPC handler */ + schedule_work(&rpc->dpc_handler); + + return IRQ_HANDLED; +} + +/** + * aer_alloc_rpc - allocate Root Port data structure + * @dev: pointer to the pcie_dev data structure + * + * Invoked when Root Port's AER service is loaded. + **/ +static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev) +{ + struct aer_rpc *rpc; + + if (!(rpc = (struct aer_rpc *)kmalloc(sizeof(struct aer_rpc), + GFP_KERNEL))) + return NULL; + + memset(rpc, 0, sizeof(struct aer_rpc)); + /* + * Initialize Root lock access, e_lock, to Root Error Status Reg, + * Root Error ID Reg, and Root error producer/consumer index. + */ + rpc->e_lock = SPIN_LOCK_UNLOCKED; + + rpc->rpd = dev; + INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev); + rpc->prod_idx = rpc->cons_idx = 0; + mutex_init(&rpc->rpc_mutex); + init_waitqueue_head(&rpc->wait_release); + + /* Use PCIE bus function to store rpc into PCIE device */ + set_service_data(dev, rpc); + + return rpc; +} + +/** + * aer_remove - clean up resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus unloads or AER probe fails. + **/ +static void aer_remove(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + if (rpc) { + /* If register interrupt service, it must be free. */ + if (rpc->isr) + free_irq(dev->irq, dev); + + wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx); + + aer_delete_rootport(rpc); + set_service_data(dev, NULL); + } +} + +/** + * aer_probe - initialize resources + * @dev: pointer to the pcie_dev data structure + * @id: pointer to the service id data structure + * + * Invoked when PCI Express bus loads AER service driver. + **/ +static int __devinit aer_probe (struct pcie_device *dev, + const struct pcie_port_service_id *id ) +{ + int status; + struct aer_rpc *rpc; + struct device *device = &dev->device; + + /* Init */ + if ((status = aer_init(dev))) + return status; + + /* Alloc rpc data structure */ + if (!(rpc = aer_alloc_rpc(dev))) { + printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n", + __FUNCTION__, device->bus_id); + aer_remove(dev); + return -ENOMEM; + } + + /* Request IRQ ISR */ + if ((status = request_irq(dev->irq, aer_irq, SA_SHIRQ, "aerdrv", + dev))) { + printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n", + __FUNCTION__, device->bus_id); + aer_remove(dev); + return status; + } + + rpc->isr = 1; + + aer_enable_rootport(rpc); + + return status; +} + +/** + * aer_root_reset - reset link on Root Port + * @dev: pointer to Root Port's pci_dev data structure + * + * Invoked by Port Bus driver when performing link reset at Root Port. + **/ +static pci_ers_result_t aer_root_reset(struct pci_dev *dev) +{ + u16 p2p_ctrl; + u32 status; + int pos; + + pos = pci_find_aer_capability(dev); + + /* Disable Root's interrupt in response to error messages */ + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0); + + /* Assert Secondary Bus Reset */ + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl); + p2p_ctrl |= PCI_CB_BRIDGE_CTL_CB_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); + + /* De-assert Secondary Bus Reset */ + p2p_ctrl &= ~PCI_CB_BRIDGE_CTL_CB_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); + + /* + * System software must wait for at least 100ms from the end + * of a reset of one or more device before it is permitted + * to issue Configuration Requests to those devices. + */ + msleep(200); + printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); + pci_write_config_dword(dev, + pos + PCI_ERR_ROOT_COMMAND, + ROOT_PORT_INTR_ON_MESG_MASK); + + return PCI_ERS_RESULT_RECOVERED; +} + +/** + * aer_error_detected - update severity status + * @dev: pointer to Root Port's pci_dev data structure + * @error: error severity being notified by port bus + * + * Invoked by Port Bus driver during error recovery. + **/ +static pci_ers_result_t aer_error_detected(struct pci_dev *dev, + enum pci_channel_state error) +{ + /* Root Port has no impact. Always recovers. */ + return PCI_ERS_RESULT_CAN_RECOVER; +} + +/** + * aer_error_resume - clean up corresponding error status bits + * @dev: pointer to Root Port's pci_dev data structure + * + * Invoked by Port Bus driver during nonfatal recovery. + **/ +static void aer_error_resume(struct pci_dev *dev) +{ + int pos; + u32 status, mask; + u16 reg16; + + /* Clean up Root device status */ + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, ®16); + pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16); + + /* Clean AER Root Error Status */ + pos = pci_find_aer_capability(dev); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); + if (dev->error_state == pci_channel_io_normal) + status &= ~mask; /* Clear corresponding nonfatal bits */ + else + status &= mask; /* Clear corresponding fatal bits */ + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); +} + +/** + * aer_service_init - register AER root service driver + * + * Invoked when AER root service driver is loaded. + **/ +static int __init aer_service_init(void) +{ + return pcie_port_service_register(&aerdrv); +} + +/** + * aer_service_exit - unregister AER root service driver + * + * Invoked when AER root service driver is unloaded. + **/ +static void __exit aer_service_exit(void) +{ + pcie_port_service_unregister(&aerdrv); +} + +module_init(aer_service_init); +module_exit(aer_service_exit); diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h new file mode 100644 index 000000000000..daf0cad88fc8 --- /dev/null +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + */ + +#ifndef _AERDRV_H_ +#define _AERDRV_H_ + +#include +#include + +#define AER_NONFATAL 0 +#define AER_FATAL 1 +#define AER_CORRECTABLE 2 +#define AER_UNCORRECTABLE 4 +#define AER_ERROR_MASK 0x001fffff +#define AER_ERROR(d) (d & AER_ERROR_MASK) + +#define OSC_METHOD_RUN_SUCCESS 0 +#define OSC_METHOD_NOT_SUPPORTED 1 +#define OSC_METHOD_RUN_FAILURE 2 + +/* Root Error Status Register Bits */ +#define ROOT_ERR_STATUS_MASKS 0x0f + +#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ + PCI_EXP_RTCTL_SENFEE| \ + PCI_EXP_RTCTL_SEFEE) +#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ + PCI_ERR_ROOT_CMD_NONFATAL_EN| \ + PCI_ERR_ROOT_CMD_FATAL_EN) +#define ERR_COR_ID(d) (d & 0xffff) +#define ERR_UNCOR_ID(d) (d >> 16) + +#define AER_SUCCESS 0 +#define AER_UNSUCCESS 1 +#define AER_ERROR_SOURCES_MAX 100 + +#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ + PCI_ERR_UNC_ECRC| \ + PCI_ERR_UNC_UNSUP| \ + PCI_ERR_UNC_COMP_ABORT| \ + PCI_ERR_UNC_UNX_COMP| \ + PCI_ERR_UNC_MALF_TLP) + +/* AER Error Info Flags */ +#define AER_TLP_HEADER_VALID_FLAG 0x00000001 +#define AER_MULTI_ERROR_VALID_FLAG 0x00000002 + +#define ERR_CORRECTABLE_ERROR_MASK 0x000031c1 +#define ERR_UNCORRECTABLE_ERROR_MASK 0x001ff010 + +struct header_log_regs { + unsigned int dw0; + unsigned int dw1; + unsigned int dw2; + unsigned int dw3; +}; + +struct aer_err_info { + int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ + int flags; + unsigned int status; /* COR/UNCOR Error Status */ + struct header_log_regs tlp; /* TLP Header */ +}; + +struct aer_err_source { + unsigned int status; + unsigned int id; +}; + +struct aer_rpc { + struct pcie_device *rpd; /* Root Port device */ + struct work_struct dpc_handler; + struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; + unsigned short prod_idx; /* Error Producer Index */ + unsigned short cons_idx; /* Error Consumer Index */ + int isr; + spinlock_t e_lock; /* + * Lock access to Error Status/ID Regs + * and error producer/consumer index + */ + struct mutex rpc_mutex; /* + * only one thread could do + * recovery on the same + * root port hierachy + */ + wait_queue_head_t wait_release; +}; + +struct aer_broadcast_data { + enum pci_channel_state state; + enum pci_ers_result result; +}; + +static inline pci_ers_result_t merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + switch (orig) { + case PCI_ERS_RESULT_CAN_RECOVER: + case PCI_ERS_RESULT_RECOVERED: + orig = new; + break; + case PCI_ERS_RESULT_DISCONNECT: + if (new == PCI_ERS_RESULT_NEED_RESET) + orig = new; + break; + default: + break; + } + + return orig; +} + +extern struct bus_type pcie_port_bus_type; +extern void aer_enable_rootport(struct aer_rpc *rpc); +extern void aer_delete_rootport(struct aer_rpc *rpc); +extern int aer_init(struct pcie_device *dev); +extern void aer_isr(void *context); +extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); +extern int aer_osc_setup(struct pci_dev *dev); + +#endif //_AERDRV_H_ diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c new file mode 100644 index 000000000000..fa68e89ebec9 --- /dev/null +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -0,0 +1,68 @@ +/* + * Access ACPI _OSC method + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "aerdrv.h" + +/** + * aer_osc_setup - run ACPI _OSC method + * + * Return: + * Zero if success. Nonzero for otherwise. + * + * Invoked when PCIE bus loads AER service driver. To avoid conflict with + * BIOS AER support requires BIOS to yield AER control to OS native driver. + **/ +int aer_osc_setup(struct pci_dev *dev) +{ + int retval = OSC_METHOD_RUN_SUCCESS; + acpi_status status; + acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); + struct pci_dev *pdev = dev; + struct pci_bus *parent; + + while (!handle) { + if (!pdev || !pdev->bus->parent) + break; + parent = pdev->bus->parent; + if (!parent->self) + /* Parent must be a host bridge */ + handle = acpi_get_pci_rootbridge_handle( + pci_domain_nr(parent), + parent->number); + else + handle = DEVICE_ACPI_HANDLE( + &(parent->self->dev)); + pdev = parent->self; + } + + if (!handle) + return OSC_METHOD_NOT_SUPPORTED; + + pci_osc_support_set(OSC_EXT_PCI_CONFIG_SUPPORT); + status = pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + if (ACPI_FAILURE(status)) { + if (status == AE_SUPPORT) + retval = OSC_METHOD_NOT_SUPPORTED; + else + retval = OSC_METHOD_RUN_FAILURE; + } + + return retval; +} + diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c new file mode 100644 index 000000000000..5591043acea7 --- /dev/null +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -0,0 +1,757 @@ +/* + * drivers/pci/pcie/aer/aerdrv_core.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file implements the core part of PCI-Express AER. When an pci-express + * error is delivered, an error message will be collected and printed to + * console, then, an error recovery procedure will be executed by following + * the pci error recovery rules. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "aerdrv.h" + +static int forceload; +module_param(forceload, bool, 0); + +#define PCI_CFG_SPACE_SIZE (0x100) +int pci_find_aer_capability(struct pci_dev *dev) +{ + int pos; + u32 reg32 = 0; + + /* Check if it's a pci-express device */ + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + + /* Check if it supports pci-express AER */ + pos = PCI_CFG_SPACE_SIZE; + while (pos) { + if (pci_read_config_dword(dev, pos, ®32)) + return 0; + + /* some broken boards return ~0 */ + if (reg32 == 0xffffffff) + return 0; + + if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR) + break; + + pos = reg32 >> 20; + } + + return pos; +} + +int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + u16 reg16 = 0; + int pos; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return -EIO; + + pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + reg16 = reg16 | + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE; + pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, + reg16); + return 0; +} + +int pci_disable_pcie_error_reporting(struct pci_dev *dev) +{ + u16 reg16 = 0; + int pos; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return -EIO; + + pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, + reg16); + return 0; +} + +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +{ + int pos; + u32 status, mask; + + pos = pci_find_aer_capability(dev); + if (!pos) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); + if (dev->error_state == pci_channel_io_normal) + status &= ~mask; /* Clear corresponding nonfatal bits */ + else + status &= mask; /* Clear corresponding fatal bits */ + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} + +static int find_device_iter(struct device *device, void *data) +{ + struct pci_dev *dev; + u16 id = *(unsigned long *)data; + u8 secondary, subordinate, d_bus = id >> 8; + + if (device->bus == &pci_bus_type) { + dev = to_pci_dev(device); + if (id == ((dev->bus->number << 8) | dev->devfn)) { + /* + * Device ID match + */ + *(unsigned long*)data = (unsigned long)device; + return 1; + } + + /* + * If device is P2P, check if it is an upstream? + */ + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { + pci_read_config_byte(dev, PCI_SECONDARY_BUS, + &secondary); + pci_read_config_byte(dev, PCI_SUBORDINATE_BUS, + &subordinate); + if (d_bus >= secondary && d_bus <= subordinate) { + *(unsigned long*)data = (unsigned long)device; + return 1; + } + } + } + + return 0; +} + +/** + * find_source_device - search through device hierarchy for source device + * @p_dev: pointer to Root Port pci_dev data structure + * @id: device ID of agent who sends an error message to this Root Port + * + * Invoked when error is detected at the Root Port. + **/ +static struct device* find_source_device(struct pci_dev *parent, u16 id) +{ + struct pci_dev *dev = parent; + struct device *device; + unsigned long device_addr; + int status; + + /* Is Root Port an agent that sends error message? */ + if (id == ((dev->bus->number << 8) | dev->devfn)) + return &dev->dev; + + do { + device_addr = id; + if ((status = device_for_each_child(&dev->dev, + &device_addr, find_device_iter))) { + device = (struct device*)device_addr; + dev = to_pci_dev(device); + if (id == ((dev->bus->number << 8) | dev->devfn)) + return device; + } + }while (status); + + return NULL; +} + +static void report_error_detected(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + result_data = (struct aer_broadcast_data *) data; + + dev->error_state = result_data->state; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->error_detected) { + if (result_data->state == pci_channel_io_frozen && + !(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) { + /* + * In case of fatal recovery, if one of down- + * stream device has no driver. We might be + * unable to recover because a later insmod + * of a driver for this device is unaware of + * its hw state. + */ + printk(KERN_DEBUG "Device ID[%s] has %s\n", + dev->dev.bus_id, (dev->driver) ? + "no AER-aware driver" : "no driver"); + } + return; + } + + err_handler = dev->driver->err_handler; + vote = err_handler->error_detected(dev, result_data->state); + result_data->result = merge_result(result_data->result, vote); + return; +} + +static void report_mmio_enabled(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + result_data = (struct aer_broadcast_data *) data; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->mmio_enabled) + return; + + err_handler = dev->driver->err_handler; + vote = err_handler->mmio_enabled(dev); + result_data->result = merge_result(result_data->result, vote); + return; +} + +static void report_slot_reset(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + result_data = (struct aer_broadcast_data *) data; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->slot_reset) + return; + + err_handler = dev->driver->err_handler; + vote = err_handler->slot_reset(dev); + result_data->result = merge_result(result_data->result, vote); + return; +} + +static void report_resume(struct pci_dev *dev, void *data) +{ + struct pci_error_handlers *err_handler; + + dev->error_state = pci_channel_io_normal; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->slot_reset) + return; + + err_handler = dev->driver->err_handler; + err_handler->resume(dev); + return; +} + +/** + * broadcast_error_message - handle message broadcast to downstream drivers + * @device: pointer to from where in a hierarchy message is broadcasted down + * @api: callback to be broadcasted + * @state: error state + * + * Invoked during error recovery process. Once being invoked, the content + * of error severity will be broadcasted to all downstream drivers in a + * hierarchy in question. + **/ +static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, + enum pci_channel_state state, + char *error_mesg, + void (*cb)(struct pci_dev *, void *)) +{ + struct aer_broadcast_data result_data; + + printk(KERN_DEBUG "Broadcast %s message\n", error_mesg); + result_data.state = state; + if (cb == report_error_detected) + result_data.result = PCI_ERS_RESULT_CAN_RECOVER; + else + result_data.result = PCI_ERS_RESULT_RECOVERED; + + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + if (cb == report_error_detected) + dev->error_state = state; + pci_walk_bus(dev->subordinate, cb, &result_data); + if (cb == report_resume) { + pci_cleanup_aer_uncorrect_error_status(dev); + dev->error_state = pci_channel_io_normal; + } + } + else { + /* + * If the error is reported by an end point, we think this + * error is related to the upstream link of the end point. + */ + pci_walk_bus(dev->bus, cb, &result_data); + } + + return result_data.result; +} + +struct find_aer_service_data { + struct pcie_port_service_driver *aer_driver; + int is_downstream; +}; + +static int find_aer_service_iter(struct device *device, void *data) +{ + struct device_driver *driver; + struct pcie_port_service_driver *service_driver; + struct pcie_device *pcie_dev; + struct find_aer_service_data *result; + + result = (struct find_aer_service_data *) data; + + if (device->bus == &pcie_port_bus_type) { + pcie_dev = to_pcie_device(device); + if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT) + result->is_downstream = 1; + + driver = device->driver; + if (driver) { + service_driver = to_service_driver(driver); + if (service_driver->id_table->service_type == + PCIE_PORT_SERVICE_AER) { + result->aer_driver = service_driver; + return 1; + } + } + } + + return 0; +} + +static void find_aer_service(struct pci_dev *dev, + struct find_aer_service_data *data) +{ + device_for_each_child(&dev->dev, data, find_aer_service_iter); +} + +static pci_ers_result_t reset_link(struct pcie_device *aerdev, + struct pci_dev *dev) +{ + struct pci_dev *udev; + pci_ers_result_t status; + struct find_aer_service_data data; + + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev= dev->bus->self; + + data.is_downstream = 0; + data.aer_driver = NULL; + find_aer_service(udev, &data); + + /* + * Use the aer driver of the error agent firstly. + * If it hasn't the aer driver, use the root port's + */ + if (!data.aer_driver || !data.aer_driver->reset_link) { + if (data.is_downstream && + aerdev->device.driver && + to_service_driver(aerdev->device.driver)->reset_link) { + data.aer_driver = + to_service_driver(aerdev->device.driver); + } else { + printk(KERN_DEBUG "No link-reset support to Device ID" + "[%s]\n", + dev->dev.bus_id); + return PCI_ERS_RESULT_DISCONNECT; + } + } + + status = data.aer_driver->reset_link(udev); + if (status != PCI_ERS_RESULT_RECOVERED) { + printk(KERN_DEBUG "Link reset at upstream Device ID" + "[%s] failed\n", + udev->dev.bus_id); + return PCI_ERS_RESULT_DISCONNECT; + } + + return status; +} + +/** + * do_recovery - handle nonfatal/fatal error recovery process + * @aerdev: pointer to a pcie_device data structure of root port + * @dev: pointer to a pci_dev data structure of agent detecting an error + * @severity: error severity type + * + * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast + * error detected message to all downstream drivers within a hierarchy in + * question and return the returned code. + **/ +static pci_ers_result_t do_recovery(struct pcie_device *aerdev, + struct pci_dev *dev, + int severity) +{ + pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; + enum pci_channel_state state; + + if (severity == AER_FATAL) + state = pci_channel_io_frozen; + else + state = pci_channel_io_normal; + + status = broadcast_error_message(dev, + state, + "error_detected", + report_error_detected); + + if (severity == AER_FATAL) { + result = reset_link(aerdev, dev); + if (result != PCI_ERS_RESULT_RECOVERED) { + /* TODO: Should panic here? */ + return result; + } + } + + if (status == PCI_ERS_RESULT_CAN_RECOVER) + status = broadcast_error_message(dev, + state, + "mmio_enabled", + report_mmio_enabled); + + if (status == PCI_ERS_RESULT_NEED_RESET) { + /* + * TODO: Should call platform-specific + * functions to reset slot before calling + * drivers' slot_reset callbacks? + */ + status = broadcast_error_message(dev, + state, + "slot_reset", + report_slot_reset); + } + + if (status == PCI_ERS_RESULT_RECOVERED) + broadcast_error_message(dev, + state, + "resume", + report_resume); + + return status; +} + +/** + * handle_error_source - handle logging error into an event log + * @aerdev: pointer to pcie_device data structure of the root port + * @dev: pointer to pci_dev data structure of error source device + * @info: comprehensive error information + * + * Invoked when an error being detected by Root Port. + **/ +static void handle_error_source(struct pcie_device * aerdev, + struct pci_dev *dev, + struct aer_err_info info) +{ + pci_ers_result_t status = 0; + int pos; + + if (info.severity == AER_CORRECTABLE) { + /* + * Correctable error does not need software intevention. + * No need to go through error recovery process. + */ + pos = pci_find_aer_capability(dev); + if (pos) + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, + info.status); + } else { + status = do_recovery(aerdev, dev, info.severity); + if (status == PCI_ERS_RESULT_RECOVERED) { + printk(KERN_DEBUG "AER driver successfully recovered\n"); + } else { + /* TODO: Should kernel panic here? */ + printk(KERN_DEBUG "AER driver didn't recover\n"); + } + } +} + +/** + * aer_enable_rootport - enable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIE bus loads AER service driver. + **/ +void aer_enable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd->port; + int pos, aer_pos; + u16 reg16; + u32 reg32; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + /* Clear PCIE Capability's Device Status */ + pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, ®16); + pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16); + + /* Disable system error generation in response to error messages */ + pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, ®16); + reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK); + pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16); + + aer_pos = pci_find_aer_capability(pdev); + /* Clear error status */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); + + /* Enable Root Port device reporting error itself */ + pci_read_config_word(pdev, pos+PCI_EXP_DEVCTL, ®16); + reg16 = reg16 | + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE; + pci_write_config_word(pdev, pos+PCI_EXP_DEVCTL, + reg16); + + /* Enable Root Port's interrupt in response to error messages */ + pci_write_config_dword(pdev, + aer_pos + PCI_ERR_ROOT_COMMAND, + ROOT_PORT_INTR_ON_MESG_MASK); +} + +/** + * disable_root_aer - disable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIE bus unloads AER service driver. + **/ +static void disable_root_aer(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd->port; + u32 reg32; + int pos; + + pos = pci_find_aer_capability(pdev); + /* Disable Root's interrupt in response to error messages */ + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0); + + /* Clear Root's error status reg */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); +} + +/** + * get_e_source - retrieve an error source + * @rpc: pointer to the root port which holds an error + * + * Invoked by DPC handler to consume an error. + **/ +static struct aer_err_source* get_e_source(struct aer_rpc *rpc) +{ + struct aer_err_source *e_source; + unsigned long flags; + + /* Lock access to Root error producer/consumer index */ + spin_lock_irqsave(&rpc->e_lock, flags); + if (rpc->prod_idx == rpc->cons_idx) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return NULL; + } + e_source = &rpc->e_sources[rpc->cons_idx]; + rpc->cons_idx++; + if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) + rpc->cons_idx = 0; + spin_unlock_irqrestore(&rpc->e_lock, flags); + + return e_source; +} + +static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) +{ + int pos; + + pos = pci_find_aer_capability(dev); + + /* The device might not support AER */ + if (!pos) + return AER_SUCCESS; + + if (info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, + &info->status); + if (!(info->status & ERR_CORRECTABLE_ERROR_MASK)) + return AER_UNSUCCESS; + } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || + info->severity == AER_NONFATAL) { + + /* Link is still healthy for IO reads */ + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, + &info->status); + if (!(info->status & ERR_UNCORRECTABLE_ERROR_MASK)) + return AER_UNSUCCESS; + + if (info->status & AER_LOG_TLP_MASKS) { + info->flags |= AER_TLP_HEADER_VALID_FLAG; + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + } + } + + return AER_SUCCESS; +} + +/** + * aer_isr_one_error - consume an error detected by root port + * @p_device: pointer to error root port service device + * @e_src: pointer to an error source + **/ +static void aer_isr_one_error(struct pcie_device *p_device, + struct aer_err_source *e_src) +{ + struct device *s_device; + struct aer_err_info e_info = {0, 0, 0,}; + int i; + u16 id; + + /* + * There is a possibility that both correctable error and + * uncorrectable error being logged. Report correctable error first. + */ + for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) { + if (i > 4) + break; + if (!(e_src->status & i)) + continue; + + /* Init comprehensive error information */ + if (i & PCI_ERR_ROOT_COR_RCV) { + id = ERR_COR_ID(e_src->id); + e_info.severity = AER_CORRECTABLE; + } else { + id = ERR_UNCOR_ID(e_src->id); + e_info.severity = ((e_src->status >> 6) & 1); + } + if (e_src->status & + (PCI_ERR_ROOT_MULTI_COR_RCV | + PCI_ERR_ROOT_MULTI_UNCOR_RCV)) + e_info.flags |= AER_MULTI_ERROR_VALID_FLAG; + if (!(s_device = find_source_device(p_device->port, id))) { + printk(KERN_DEBUG "%s->can't find device of ID%04x\n", + __FUNCTION__, id); + continue; + } + if (get_device_error_info(to_pci_dev(s_device), &e_info) == + AER_SUCCESS) { + aer_print_error(to_pci_dev(s_device), &e_info); + handle_error_source(p_device, + to_pci_dev(s_device), + e_info); + } + } +} + +/** + * aer_isr - consume errors detected by root port + * @context: pointer to a private data of pcie device + * + * Invoked, as DPC, when root port records new detected error + **/ +void aer_isr(void *context) +{ + struct pcie_device *p_device = (struct pcie_device *) context; + struct aer_rpc *rpc = get_service_data(p_device); + struct aer_err_source *e_src; + + mutex_lock(&rpc->rpc_mutex); + e_src = get_e_source(rpc); + while (e_src) { + aer_isr_one_error(p_device, e_src); + e_src = get_e_source(rpc); + } + mutex_unlock(&rpc->rpc_mutex); + + wake_up(&rpc->wait_release); +} + +/** + * aer_delete_rootport - disable root port aer and delete service data + * @rpc: pointer to a root port device being deleted + * + * Invoked when AER service unloaded on a specific Root Port + **/ +void aer_delete_rootport(struct aer_rpc *rpc) +{ + /* Disable root port AER itself */ + disable_root_aer(rpc); + + kfree(rpc); +} + +/** + * aer_init - provide AER initialization + * @dev: pointer to AER pcie device + * + * Invoked when AER service driver is loaded. + **/ +int aer_init(struct pcie_device *dev) +{ + int status; + + /* Run _OSC Method */ + status = aer_osc_setup(dev->port); + + if(status != OSC_METHOD_RUN_SUCCESS) { + printk(KERN_DEBUG "%s: AER service init fails - %s\n", + __FUNCTION__, + (status == OSC_METHOD_NOT_SUPPORTED) ? + "No ACPI _OSC support" : "Run ACPI _OSC fails"); + + if (!forceload) + return status; + } + + return AER_SUCCESS; +} + +EXPORT_SYMBOL_GPL(pci_find_aer_capability); +EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); +EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); +EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); + diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c new file mode 100644 index 000000000000..3933d4f30e8c --- /dev/null +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -0,0 +1,248 @@ +/* + * drivers/pci/pcie/aer/aerdrv_errprint.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Format error messages and print them to console. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + */ + +#include +#include +#include +#include +#include +#include + +#include "aerdrv.h" + +#define AER_AGENT_RECEIVER 0 +#define AER_AGENT_REQUESTER 1 +#define AER_AGENT_COMPLETER 2 +#define AER_AGENT_TRANSMITTER 3 + +#define AER_AGENT_REQUESTER_MASK (PCI_ERR_UNC_COMP_TIME| \ + PCI_ERR_UNC_UNSUP) + +#define AER_AGENT_COMPLETER_MASK PCI_ERR_UNC_COMP_ABORT + +#define AER_AGENT_TRANSMITTER_MASK(t, e) (e & (PCI_ERR_COR_REP_ROLL| \ + ((t == AER_CORRECTABLE) ? PCI_ERR_COR_REP_TIMER: 0))) + +#define AER_GET_AGENT(t, e) \ + ((e & AER_AGENT_COMPLETER_MASK) ? AER_AGENT_COMPLETER : \ + (e & AER_AGENT_REQUESTER_MASK) ? AER_AGENT_REQUESTER : \ + (AER_AGENT_TRANSMITTER_MASK(t, e)) ? AER_AGENT_TRANSMITTER : \ + AER_AGENT_RECEIVER) + +#define AER_PHYSICAL_LAYER_ERROR_MASK PCI_ERR_COR_RCVR +#define AER_DATA_LINK_LAYER_ERROR_MASK(t, e) \ + (PCI_ERR_UNC_DLP| \ + PCI_ERR_COR_BAD_TLP| \ + PCI_ERR_COR_BAD_DLLP| \ + PCI_ERR_COR_REP_ROLL| \ + ((t == AER_CORRECTABLE) ? \ + PCI_ERR_COR_REP_TIMER: 0)) + +#define AER_PHYSICAL_LAYER_ERROR 0 +#define AER_DATA_LINK_LAYER_ERROR 1 +#define AER_TRANSACTION_LAYER_ERROR 2 + +#define AER_GET_LAYER_ERROR(t, e) \ + ((e & AER_PHYSICAL_LAYER_ERROR_MASK) ? \ + AER_PHYSICAL_LAYER_ERROR : \ + (e & AER_DATA_LINK_LAYER_ERROR_MASK(t, e)) ? \ + AER_DATA_LINK_LAYER_ERROR : \ + AER_TRANSACTION_LAYER_ERROR) + +/* + * AER error strings + */ +static char* aer_error_severity_string[] = { + "Uncorrected (Non-Fatal)", + "Uncorrected (Fatal)", + "Corrected" +}; + +static char* aer_error_layer[] = { + "Physical Layer", + "Data Link Layer", + "Transaction Layer" +}; +static char* aer_correctable_error_string[] = { + "Receiver Error ", /* Bit Position 0 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "Bad TLP ", /* Bit Position 6 */ + "Bad DLLP ", /* Bit Position 7 */ + "RELAY_NUM Rollover ", /* Bit Position 8 */ + NULL, + NULL, + NULL, + "Replay Timer Timeout ", /* Bit Position 12 */ + "Advisory Non-Fatal ", /* Bit Position 13 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +}; + +static char* aer_uncorrectable_error_string[] = { + NULL, + NULL, + NULL, + NULL, + "Data Link Protocol ", /* Bit Position 4 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Poisoned TLP ", /* Bit Position 12 */ + "Flow Control Protocol ", /* Bit Position 13 */ + "Completion Timeout ", /* Bit Position 14 */ + "Completer Abort ", /* Bit Position 15 */ + "Unexpected Completion ", /* Bit Position 16 */ + "Receiver Overflow ", /* Bit Position 17 */ + "Malformed TLP ", /* Bit Position 18 */ + "ECRC ", /* Bit Position 19 */ + "Unsupported Request ", /* Bit Position 20 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +}; + +static char* aer_agent_string[] = { + "Receiver ID", + "Requester ID", + "Completer ID", + "Transmitter ID" +}; + +static char * aer_get_error_source_name(int severity, + unsigned int status, + char errmsg_buff[]) +{ + int i; + char * errmsg = NULL; + + for (i = 0; i < 32; i++) { + if (!(status & (1 << i))) + continue; + + if (severity == AER_CORRECTABLE) + errmsg = aer_correctable_error_string[i]; + else + errmsg = aer_uncorrectable_error_string[i]; + + if (!errmsg) { + sprintf(errmsg_buff, "Unknown Error Bit %2d ", i); + errmsg = errmsg_buff; + } + + break; + } + + return errmsg; +} + +static DEFINE_SPINLOCK(logbuf_lock); +static char errmsg_buff[100]; +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +{ + char * errmsg; + int err_layer, agent; + char * loglevel; + + if (info->severity == AER_CORRECTABLE) + loglevel = KERN_WARNING; + else + loglevel = KERN_ERR; + + printk("%s+------ PCI-Express Device Error ------+\n", loglevel); + printk("%sError Severity\t\t: %s\n", loglevel, + aer_error_severity_string[info->severity]); + + if ( info->status == 0) { + printk("%sPCIE Bus Error type\t: (Unaccessible)\n", loglevel); + printk("%sUnaccessible Received\t: %s\n", loglevel, + info->flags & AER_MULTI_ERROR_VALID_FLAG ? + "Multiple" : "First"); + printk("%sUnregistered Agent ID\t: %04x\n", loglevel, + (dev->bus->number << 8) | dev->devfn); + } else { + err_layer = AER_GET_LAYER_ERROR(info->severity, info->status); + printk("%sPCIE Bus Error type\t: %s\n", loglevel, + aer_error_layer[err_layer]); + + spin_lock(&logbuf_lock); + errmsg = aer_get_error_source_name(info->severity, + info->status, + errmsg_buff); + printk("%s%s\t: %s\n", loglevel, errmsg, + info->flags & AER_MULTI_ERROR_VALID_FLAG ? + "Multiple" : "First"); + spin_unlock(&logbuf_lock); + + agent = AER_GET_AGENT(info->severity, info->status); + printk("%s%s\t\t: %04x\n", loglevel, + aer_agent_string[agent], + (dev->bus->number << 8) | dev->devfn); + + printk("%sVendorID=%04xh, DeviceID=%04xh," + " Bus=%02xh, Device=%02xh, Function=%02xh\n", + loglevel, + dev->vendor, + dev->device, + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (info->flags & AER_TLP_HEADER_VALID_FLAG) { + unsigned char *tlp = (unsigned char *) &info->tlp; + printk("%sTLB Header:\n", loglevel); + printk("%s%02x%02x%02x%02x %02x%02x%02x%02x" + " %02x%02x%02x%02x %02x%02x%02x%02x\n", + loglevel, + *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, + *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), + *(tlp + 11), *(tlp + 10), *(tlp + 9), + *(tlp + 8), *(tlp + 15), *(tlp + 14), + *(tlp + 13), *(tlp + 12)); + } + } +} + diff --git a/include/linux/aer.h b/include/linux/aer.h new file mode 100644 index 000000000000..402e178b38eb --- /dev/null +++ b/include/linux/aer.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + */ + +#ifndef _AER_H_ +#define _AER_H_ + +#if defined(CONFIG_PCIEAER) +/* pci-e port driver needs this function to enable aer */ +extern int pci_enable_pcie_error_reporting(struct pci_dev *dev); +extern int pci_find_aer_capability(struct pci_dev *dev); +extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); +extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); +#else +#define pci_enable_pcie_error_reporting(dev) do { } while (0) +#define pci_find_aer_capability(dev) do { } while (0) +#define pci_disable_pcie_error_reporting(dev) do { } while (0) +#define pci_cleanup_aer_uncorrect_error_status(dev) do { } while (0) +#endif + +#endif //_AER_H_ + diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index b44e01a70914..6cd91e3f9820 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -62,6 +62,12 @@ struct pcie_port_service_driver { int (*suspend) (struct pcie_device *dev, pm_message_t state); int (*resume) (struct pcie_device *dev); + /* Service Error Recovery Handler */ + struct pci_error_handlers *err_handler; + + /* Link Reset Capability - AER service driver specific */ + pci_ers_result_t (*reset_link) (struct pci_dev *dev); + const struct pcie_port_service_id *id_table; struct device_driver driver; }; -- cgit v1.2.3-71-gd317 From b19441af185559118e8247382ea4f2f76ebffc6d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 28 Aug 2006 11:43:25 -0700 Subject: PCI: fix __must_check warnings Signed-off-by: Greg Kroah-Hartman --- drivers/pci/bus.c | 22 ++++++-- drivers/pci/hotplug/fakephp.c | 18 ++++-- drivers/pci/pci-driver.c | 5 +- drivers/pci/pci-sysfs.c | 112 ++++++++++++++++++++++++------------- drivers/pci/pcie/aer/aerdrv_core.c | 3 +- drivers/pci/pcie/portdrv_core.c | 6 +- drivers/pci/pcie/portdrv_pci.c | 16 ++++-- drivers/pci/probe.c | 16 +++++- include/linux/pci.h | 2 +- 9 files changed, 138 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 5f7db9d2436e..aadaa3c8096b 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -77,9 +77,12 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, * This adds a single pci device to the global * device list and adds sysfs and procfs entries */ -void __devinit pci_bus_add_device(struct pci_dev *dev) +int __devinit pci_bus_add_device(struct pci_dev *dev) { - device_add(&dev->dev); + int retval; + retval = device_add(&dev->dev); + if (retval) + return retval; down_write(&pci_bus_sem); list_add_tail(&dev->global_list, &pci_devices); @@ -87,6 +90,7 @@ void __devinit pci_bus_add_device(struct pci_dev *dev) pci_proc_attach_device(dev); pci_create_sysfs_dev_files(dev); + return 0; } /** @@ -104,6 +108,7 @@ void __devinit pci_bus_add_device(struct pci_dev *dev) void __devinit pci_bus_add_devices(struct pci_bus *bus) { struct pci_dev *dev; + int retval; list_for_each_entry(dev, &bus->devices, bus_list) { /* @@ -112,7 +117,9 @@ void __devinit pci_bus_add_devices(struct pci_bus *bus) */ if (!list_empty(&dev->global_list)) continue; - pci_bus_add_device(dev); + retval = pci_bus_add_device(dev); + if (retval) + dev_err(&dev->dev, "Error adding device, continuing\n"); } list_for_each_entry(dev, &bus->devices, bus_list) { @@ -129,10 +136,13 @@ void __devinit pci_bus_add_devices(struct pci_bus *bus) list_add_tail(&dev->subordinate->node, &dev->bus->children); up_write(&pci_bus_sem); - } + } pci_bus_add_devices(dev->subordinate); - - sysfs_create_link(&dev->subordinate->class_dev.kobj, &dev->dev.kobj, "bridge"); + retval = sysfs_create_link(&dev->subordinate->class_dev.kobj, + &dev->dev.kobj, "bridge"); + if (retval) + dev_err(&dev->dev, "Error creating sysfs " + "bridge symlink, continuing...\n"); } } } diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index dd2b762777c4..05a4f0f90186 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -176,7 +176,9 @@ static void pci_rescan_slot(struct pci_dev *temp) struct pci_bus *bus = temp->bus; struct pci_dev *dev; int func; + int retval; u8 hdr_type; + if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) { temp->hdr_type = hdr_type & 0x7f; if (!pci_find_slot(bus->number, temp->devfn)) { @@ -185,8 +187,12 @@ static void pci_rescan_slot(struct pci_dev *temp) dbg("New device on %s function %x:%x\n", bus->name, temp->devfn >> 3, temp->devfn & 7); - pci_bus_add_device(dev); - add_slot(dev); + retval = pci_bus_add_device(dev); + if (retval) + dev_err(&dev->dev, "error adding " + "device, continuing.\n"); + else + add_slot(dev); } } /* multifunction device? */ @@ -205,8 +211,12 @@ static void pci_rescan_slot(struct pci_dev *temp) dbg("New device on %s function %x:%x\n", bus->name, temp->devfn >> 3, temp->devfn & 7); - pci_bus_add_device(dev); - add_slot(dev); + retval = pci_bus_add_device(dev); + if (retval) + dev_err(&dev->dev, "error adding " + "device, continuing.\n"); + else + add_slot(dev); } } } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d8ace1f90dd2..309629e03bae 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -56,6 +56,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) subdevice=PCI_ANY_ID, class=0, class_mask=0; unsigned long driver_data=0; int fields=0; + int retval = 0; fields = sscanf(buf, "%x %x %x %x %x %x %lux", &vendor, &device, &subvendor, &subdevice, @@ -82,10 +83,12 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) spin_unlock(&pdrv->dynids.lock); if (get_driver(&pdrv->driver)) { - driver_attach(&pdrv->driver); + retval = driver_attach(&pdrv->driver); put_driver(&pdrv->driver); } + if (retval) + return retval; return count; } static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 010e01c4bd43..a1d2e979b17f 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -117,6 +117,7 @@ is_enabled_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); + int retval = 0; /* this can crash the machine when done on the "wrong" device */ if (!capable(CAP_SYS_ADMIN)) @@ -126,8 +127,10 @@ is_enabled_store(struct device *dev, struct device_attribute *attr, pci_disable_device(pdev); if (*buf == '1') - pci_enable_device(pdev); + retval = pci_enable_device(pdev); + if (retval) + return retval; return count; } @@ -425,16 +428,39 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, return pci_mmap_page_range(pdev, vma, mmap_type, 0); } +/** + * pci_remove_resource_files - cleanup resource files + * @dev: dev to cleanup + * + * If we created resource files for @dev, remove them from sysfs and + * free their resources. + */ +static void +pci_remove_resource_files(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + struct bin_attribute *res_attr; + + res_attr = pdev->res_attr[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + } +} + /** * pci_create_resource_files - create resource files in sysfs for @dev * @dev: dev in question * * Walk the resources in @dev creating files for each resource available. */ -static void -pci_create_resource_files(struct pci_dev *pdev) +static int pci_create_resource_files(struct pci_dev *pdev) { int i; + int retval; /* Expose the PCI resources from this device as files */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { @@ -457,35 +483,19 @@ pci_create_resource_files(struct pci_dev *pdev) res_attr->size = pci_resource_len(pdev, i); res_attr->mmap = pci_mmap_resource; res_attr->private = &pdev->resource[i]; - sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - } - } -} - -/** - * pci_remove_resource_files - cleanup resource files - * @dev: dev to cleanup - * - * If we created resource files for @dev, remove them from sysfs and - * free their resources. - */ -static void -pci_remove_resource_files(struct pci_dev *pdev) -{ - int i; - - for (i = 0; i < PCI_ROM_RESOURCE; i++) { - struct bin_attribute *res_attr; - - res_attr = pdev->res_attr[i]; - if (res_attr) { - sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); - kfree(res_attr); + retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); + if (retval) { + pci_remove_resource_files(pdev); + return retval; + } + } else { + return -ENOMEM; } } + return 0; } #else /* !HAVE_PCI_MMAP */ -static inline void pci_create_resource_files(struct pci_dev *dev) { return; } +static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; } static inline void pci_remove_resource_files(struct pci_dev *dev) { return; } #endif /* HAVE_PCI_MMAP */ @@ -570,22 +580,27 @@ static struct bin_attribute pcie_config_attr = { .write = pci_write_config, }; -int pci_create_sysfs_dev_files (struct pci_dev *pdev) +int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) { + struct bin_attribute *rom_attr = NULL; + int retval; + if (!sysfs_initialized) return -EACCES; if (pdev->cfg_size < 4096) - sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr); + retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr); else - sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr); + retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr); + if (retval) + goto err; - pci_create_resource_files(pdev); + retval = pci_create_resource_files(pdev); + if (retval) + goto err_bin_file; /* If the device has a ROM, try to expose it in sysfs. */ if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) { - struct bin_attribute *rom_attr; - rom_attr = kzalloc(sizeof(*rom_attr), GFP_ATOMIC); if (rom_attr) { pdev->rom_attr = rom_attr; @@ -595,13 +610,28 @@ int pci_create_sysfs_dev_files (struct pci_dev *pdev) rom_attr->attr.owner = THIS_MODULE; rom_attr->read = pci_read_rom; rom_attr->write = pci_write_rom; - sysfs_create_bin_file(&pdev->dev.kobj, rom_attr); + retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr); + if (retval) + goto err_rom; + } else { + retval = -ENOMEM; + goto err_bin_file; } } /* add platform-specific attributes */ pcibios_add_platform_entries(pdev); - + return 0; + +err_rom: + kfree(rom_attr); +err_bin_file: + if (pdev->cfg_size < 4096) + sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr); + else + sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr); +err: + return retval; } /** @@ -630,10 +660,14 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev) static int __init pci_sysfs_init(void) { struct pci_dev *pdev = NULL; - + int retval; + sysfs_initialized = 1; - for_each_pci_dev(pdev) - pci_create_sysfs_dev_files(pdev); + for_each_pci_dev(pdev) { + retval = pci_create_sysfs_dev_files(pdev); + if (retval) + return retval; + } return 0; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 5591043acea7..1c7e660d6535 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -357,7 +357,8 @@ static int find_aer_service_iter(struct device *device, void *data) static void find_aer_service(struct pci_dev *dev, struct find_aer_service_data *data) { - device_for_each_child(&dev->dev, data, find_aer_service_iter); + int retval; + retval = device_for_each_child(&dev->dev, data, find_aer_service_iter); } static pci_ers_result_t reset_link(struct pcie_device *aerdev, diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index cf9e810b4bf8..bd6615b4d40e 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -340,8 +340,7 @@ static int suspend_iter(struct device *dev, void *data) int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state) { - device_for_each_child(&dev->dev, &state, suspend_iter); - return 0; + return device_for_each_child(&dev->dev, &state, suspend_iter); } static int resume_iter(struct device *dev, void *data) @@ -359,8 +358,7 @@ static int resume_iter(struct device *dev, void *data) int pcie_port_device_resume(struct pci_dev *dev) { - device_for_each_child(&dev->dev, NULL, resume_iter); - return 0; + return device_for_each_child(&dev->dev, NULL, resume_iter); } #endif diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index e4a2429986f0..037690e08f5f 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -147,8 +147,10 @@ static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, { struct aer_broadcast_data result_data = {error, PCI_ERS_RESULT_CAN_RECOVER}; + int retval; - device_for_each_child(&dev->dev, &result_data, error_detected_iter); + /* can not fail */ + retval = device_for_each_child(&dev->dev, &result_data, error_detected_iter); return result_data.result; } @@ -181,8 +183,10 @@ static int mmio_enabled_iter(struct device *device, void *data) static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) { pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; + int retval; - device_for_each_child(&dev->dev, &status, mmio_enabled_iter); + /* get true return value from &status */ + retval = device_for_each_child(&dev->dev, &status, mmio_enabled_iter); return status; } @@ -214,6 +218,7 @@ static int slot_reset_iter(struct device *device, void *data) static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) { pci_ers_result_t status; + int retval; /* If fatal, restore cfg space for possible link reset at upstream */ if (dev->error_state == pci_channel_io_frozen) { @@ -221,7 +226,8 @@ static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) pci_enable_pcie_error_reporting(dev); } - device_for_each_child(&dev->dev, &status, slot_reset_iter); + /* get true return value from &status */ + retval = device_for_each_child(&dev->dev, &status, slot_reset_iter); return status; } @@ -248,7 +254,9 @@ static int resume_iter(struct device *device, void *data) static void pcie_portdrv_err_resume(struct pci_dev *dev) { - device_for_each_child(&dev->dev, NULL, resume_iter); + int retval; + /* nothing to do with error value, if it ever happens */ + retval = device_for_each_child(&dev->dev, NULL, resume_iter); } /* diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c5a58d1c6c1c..a3b0a5eb5054 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -339,6 +339,7 @@ pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr) { struct pci_bus *child; int i; + int retval; /* * Allocate a new bus, and inherit stuff from the parent.. @@ -356,8 +357,13 @@ pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr) child->class_dev.class = &pcibus_class; sprintf(child->class_dev.class_id, "%04x:%02x", pci_domain_nr(child), busnr); - class_device_register(&child->class_dev); - class_device_create_file(&child->class_dev, &class_device_attr_cpuaffinity); + retval = class_device_register(&child->class_dev); + if (retval) + goto error_register; + retval = class_device_create_file(&child->class_dev, + &class_device_attr_cpuaffinity); + if (retval) + goto error_file_create; /* * Set up the primary, secondary and subordinate @@ -375,6 +381,12 @@ pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr) bridge->subordinate = child; return child; + +error_file_create: + class_device_unregister(&child->class_dev); +error_register: + kfree(child); + return NULL; } struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr) diff --git a/include/linux/pci.h b/include/linux/pci.h index 3ec72551ac31..c9bb7bee52c7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -431,7 +431,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn); void pci_device_add(struct pci_dev *dev, struct pci_bus *bus); unsigned int pci_scan_child_bus(struct pci_bus *bus); -void pci_bus_add_device(struct pci_dev *dev); +int __must_check pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); -- cgit v1.2.3-71-gd317 From 50b0075520a0acba9cabab5203bbce918b966d9a Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 16 Aug 2006 17:42:18 +0100 Subject: PCI: Multiprobe sanitizer There are numerous drivers that can use multithreaded probing but having some kind of global flag as the way to control this makes migration to threaded probing hard and since it enables it everywhere and is almost as likely to cause serious pain as holding a clog dance in a minefield. If we have a pci_driver multithread_probe flag to inherit you can turn it on for one driver at a time. From playing so far however I think we need a different model at the device layer which serializes until the called probe function says "ok you can start another one now". That would need some kind of flag and semaphore plus a helper function. Anyway in the absence of that this is a starting point to usefully play with this stuff Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 6 +++++- include/linux/pci.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 309629e03bae..b1c0c707d96c 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -421,7 +421,11 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner) drv->driver.bus = &pci_bus_type; drv->driver.owner = owner; drv->driver.kobj.ktype = &pci_driver_kobj_type; - drv->driver.multithread_probe = pci_multithread_probe; + + if (pci_multithread_probe) + drv->driver.multithread_probe = pci_multithread_probe; + else + drv->driver.multithread_probe = drv->multithread_probe; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); diff --git a/include/linux/pci.h b/include/linux/pci.h index c9bb7bee52c7..549d8410974b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -356,6 +356,8 @@ struct pci_driver { struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; + + int multithread_probe; }; #define to_pci_driver(drv) container_of(drv,struct pci_driver, driver) -- cgit v1.2.3-71-gd317 From 24f8aa9b464b73e0553f092b747770940ee0ea54 Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Tue, 12 Sep 2006 10:16:36 -0700 Subject: PCI: add pci_stop_bus_device This patch adds pci_stop_bus_device() which stops a PCI device (detach the driver, remove from the global list and so on) and any children. This is needed for ACPI based PCI-to-PCI bridge hot-remove, and it will be also needed for ACPI based PCI root bridge hot-remove. Signed-off-by: Kenji Kaneshige Signed-off-by: MUNEDA Takahiro Signed-off-by: Satoru Takeuchi Signed-off-by: Kristen Carlson Accardi Signed-off-by: Greg Kroah-Hartman --- drivers/pci/remove.c | 37 ++++++++++++++++++++++++++++++++++++- include/linux/pci.h | 1 + 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 99ffbd478b29..430281b2e921 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -16,8 +16,11 @@ static void pci_free_resources(struct pci_dev *dev) } } -static void pci_destroy_dev(struct pci_dev *dev) +static void pci_stop_dev(struct pci_dev *dev) { + if (!dev->global_list.next) + return; + if (!list_empty(&dev->global_list)) { pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); @@ -27,6 +30,11 @@ static void pci_destroy_dev(struct pci_dev *dev) dev->global_list.next = dev->global_list.prev = NULL; up_write(&pci_bus_sem); } +} + +static void pci_destroy_dev(struct pci_dev *dev) +{ + pci_stop_dev(dev); /* Remove the device from the device lists, and prevent any further * list accesses from this device */ @@ -119,5 +127,32 @@ void pci_remove_behind_bridge(struct pci_dev *dev) } } +static void pci_stop_bus_devices(struct pci_bus *bus) +{ + struct list_head *l, *n; + + list_for_each_safe(l, n, &bus->devices) { + struct pci_dev *dev = pci_dev_b(l); + pci_stop_bus_device(dev); + } +} + +/** + * pci_stop_bus_device - stop a PCI device and any children + * @dev: the device to stop + * + * Stop a PCI device (detach the driver, remove from the global list + * and so on). This also stop any subordinate buses and children in a + * depth-first manner. + */ +void pci_stop_bus_device(struct pci_dev *dev) +{ + if (dev->subordinate) + pci_stop_bus_devices(dev->subordinate); + + pci_stop_dev(dev); +} + EXPORT_SYMBOL(pci_remove_bus_device); EXPORT_SYMBOL(pci_remove_behind_bridge); +EXPORT_SYMBOL_GPL(pci_stop_bus_device); diff --git a/include/linux/pci.h b/include/linux/pci.h index 549d8410974b..5c3a4176eb64 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -441,6 +441,7 @@ extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); extern void pci_remove_bus_device(struct pci_dev *dev); +extern void pci_stop_bus_device(struct pci_dev *dev); void pci_setup_cardbus(struct pci_bus *bus); /* Generic PCI functions exported to card drivers */ -- cgit v1.2.3-71-gd317 From d48f1de2d8170814fb64effa320848410c466f95 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 20 Sep 2006 20:56:02 +0100 Subject: [MIPS] Remove EV96100 as previously announced. Signed-off-by: Ralf Baechle --- Documentation/feature-removal-schedule.txt | 8 - Documentation/kernel-parameters.txt | 2 - arch/mips/Kconfig | 24 - arch/mips/Makefile | 7 - arch/mips/configs/atlas_defconfig | 1 - arch/mips/configs/bigsur_defconfig | 1 - arch/mips/configs/capcella_defconfig | 1 - arch/mips/configs/cobalt_defconfig | 1 - arch/mips/configs/db1000_defconfig | 1 - arch/mips/configs/db1100_defconfig | 1 - arch/mips/configs/db1200_defconfig | 1 - arch/mips/configs/db1500_defconfig | 1 - arch/mips/configs/db1550_defconfig | 1 - arch/mips/configs/ddb5477_defconfig | 1 - arch/mips/configs/decstation_defconfig | 1 - arch/mips/configs/e55_defconfig | 1 - arch/mips/configs/emma2rh_defconfig | 1 - arch/mips/configs/ev64120_defconfig | 1 - arch/mips/configs/ev96100_defconfig | 850 --------------------------- arch/mips/configs/excite_defconfig | 1 - arch/mips/configs/ip22_defconfig | 1 - arch/mips/configs/ip27_defconfig | 1 - arch/mips/configs/ip32_defconfig | 1 - arch/mips/configs/it8172_defconfig | 1 - arch/mips/configs/ivr_defconfig | 1 - arch/mips/configs/jaguar-atx_defconfig | 1 - arch/mips/configs/jmr3927_defconfig | 1 - arch/mips/configs/lasat200_defconfig | 1 - arch/mips/configs/malta_defconfig | 1 - arch/mips/configs/mipssim_defconfig | 1 - arch/mips/configs/mpc30x_defconfig | 1 - arch/mips/configs/ocelot_3_defconfig | 1 - arch/mips/configs/ocelot_c_defconfig | 1 - arch/mips/configs/ocelot_defconfig | 1 - arch/mips/configs/ocelot_g_defconfig | 1 - arch/mips/configs/pb1100_defconfig | 1 - arch/mips/configs/pb1500_defconfig | 1 - arch/mips/configs/pb1550_defconfig | 1 - arch/mips/configs/pnx8550-jbs_defconfig | 1 - arch/mips/configs/pnx8550-v2pci_defconfig | 1 - arch/mips/configs/qemu_defconfig | 1 - arch/mips/configs/rbhma4500_defconfig | 1 - arch/mips/configs/rm200_defconfig | 1 - arch/mips/configs/sb1250-swarm_defconfig | 1 - arch/mips/configs/sead_defconfig | 1 - arch/mips/configs/tb0226_defconfig | 1 - arch/mips/configs/tb0229_defconfig | 1 - arch/mips/configs/tb0287_defconfig | 1 - arch/mips/configs/workpad_defconfig | 1 - arch/mips/configs/wrppmc_defconfig | 1 - arch/mips/configs/yosemite_defconfig | 1 - arch/mips/defconfig | 1 - arch/mips/galileo-boards/ev96100/Makefile | 9 - arch/mips/galileo-boards/ev96100/init.c | 173 ------ arch/mips/galileo-boards/ev96100/irq.c | 77 --- arch/mips/galileo-boards/ev96100/puts.c | 138 ----- arch/mips/galileo-boards/ev96100/reset.c | 70 --- arch/mips/galileo-boards/ev96100/setup.c | 159 ----- arch/mips/galileo-boards/ev96100/time.c | 88 --- arch/mips/pci/Makefile | 2 - arch/mips/pci/fixup-ev96100.c | 48 -- arch/mips/pci/ops-gt96100.c | 169 ------ arch/mips/pci/pci-ev96100.c | 63 -- include/asm-mips/bootinfo.h | 3 +- include/asm-mips/galileo-boards/gt96100.h | 427 -------------- include/asm-mips/mach-ev96100/mach-gt64120.h | 46 -- include/asm-mips/serial.h | 4 +- include/linux/pci_ids.h | 3 - 68 files changed, 3 insertions(+), 2414 deletions(-) delete mode 100644 arch/mips/configs/ev96100_defconfig delete mode 100644 arch/mips/galileo-boards/ev96100/Makefile delete mode 100644 arch/mips/galileo-boards/ev96100/init.c delete mode 100644 arch/mips/galileo-boards/ev96100/irq.c delete mode 100644 arch/mips/galileo-boards/ev96100/puts.c delete mode 100644 arch/mips/galileo-boards/ev96100/reset.c delete mode 100644 arch/mips/galileo-boards/ev96100/setup.c delete mode 100644 arch/mips/galileo-boards/ev96100/time.c delete mode 100644 arch/mips/pci/fixup-ev96100.c delete mode 100644 arch/mips/pci/ops-gt96100.c delete mode 100644 arch/mips/pci/pci-ev96100.c delete mode 100644 include/asm-mips/galileo-boards/gt96100.h delete mode 100644 include/asm-mips/mach-ev96100/mach-gt64120.h (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 611acc32fdf5..bf56b20652b0 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -217,14 +217,6 @@ Who: Nick Piggin --------------------------- -What: Support for the MIPS EV96100 evaluation board -When: September 2006 -Why: Does no longer build since at least November 15, 2003, apparently - no userbase left. -Who: Ralf Baechle - ---------------------------- - What: Support for the Momentum / PMC-Sierra Jaguar ATX evaluation board When: September 2006 Why: Does no longer build since quite some time, and was never popular, diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c918cc3f65fb..255ec535bba8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -573,8 +573,6 @@ running once the system is up. gscd= [HW,CD] Format: - gt96100eth= [NET] MIPS GT96100 Advanced Communication Controller - gus= [HW,OSS] Format: ,,, diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 235208d658ff..30750c54bdf5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -203,26 +203,6 @@ config MIPS_EV64120 . Say Y here if you wish to build a kernel for this platform. -config MIPS_EV96100 - bool "Galileo EV96100 Evaluation board (EXPERIMENTAL)" - depends on EXPERIMENTAL - select DMA_NONCOHERENT - select HW_HAS_PCI - select IRQ_CPU - select MIPS_GT96100 - select RM7000_CPU_SCACHE - select SWAP_IO_SPACE - select SYS_HAS_CPU_R5000 - select SYS_HAS_CPU_RM7000 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL if EXPERIMENTAL - select SYS_SUPPORTS_BIG_ENDIAN - help - This is an evaluation board based on the Galileo GT-96100 LAN/WAN - communications controllers containing a MIPS R5000 compatible core - running at 83MHz. Their website is . Say Y - here if you wish to build a kernel for this platform. - config MIPS_IVR bool "Globespan IVR board" select DMA_NONCOHERENT @@ -1069,10 +1049,6 @@ config AU1X00_USB_DEVICE depends on MIPS_PB1500 || MIPS_PB1100 || MIPS_PB1000 default n -config MIPS_GT96100 - bool - select MIPS_GT64120 - config IT8172_CIR bool depends on MIPS_ITE8172 || MIPS_IVR diff --git a/arch/mips/Makefile b/arch/mips/Makefile index f4227d24227d..e521826b4234 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -279,13 +279,6 @@ core-$(CONFIG_MIPS_EV64120) += arch/mips/gt64120/common/ cflags-$(CONFIG_MIPS_EV64120) += -Iinclude/asm-mips/mach-ev64120 load-$(CONFIG_MIPS_EV64120) += 0xffffffff80100000 -# -# Galileo EV96100 Board -# -core-$(CONFIG_MIPS_EV96100) += arch/mips/galileo-boards/ev96100/ -cflags-$(CONFIG_MIPS_EV96100) += -Iinclude/asm-mips/mach-ev96100 -load-$(CONFIG_MIPS_EV96100) += 0xffffffff80100000 - # # Wind River PPMC Board (4KC + GT64120) # diff --git a/arch/mips/configs/atlas_defconfig b/arch/mips/configs/atlas_defconfig index 54274065e9a5..2774ba9606b7 100644 --- a/arch/mips/configs/atlas_defconfig +++ b/arch/mips/configs/atlas_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 887fd959482a..e12a475dcbf4 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/capcella_defconfig b/arch/mips/configs/capcella_defconfig index a01344f3a4c2..bfade9abb767 100644 --- a/arch/mips/configs/capcella_defconfig +++ b/arch/mips/configs/capcella_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index c95682445a28..78db2a8a711b 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y CONFIG_MIPS_COBALT=y # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/db1000_defconfig b/arch/mips/configs/db1000_defconfig index c2f33d3af62c..93cca1585bc3 100644 --- a/arch/mips/configs/db1000_defconfig +++ b/arch/mips/configs/db1000_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_DB1000=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/db1100_defconfig b/arch/mips/configs/db1100_defconfig index 8c44d16ae9a2..ffd99252a837 100644 --- a/arch/mips/configs/db1100_defconfig +++ b/arch/mips/configs/db1100_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_DB1100=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/db1200_defconfig b/arch/mips/configs/db1200_defconfig index c13768e75ac5..63eac5e89b9c 100644 --- a/arch/mips/configs/db1200_defconfig +++ b/arch/mips/configs/db1200_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_DB1200=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/db1500_defconfig b/arch/mips/configs/db1500_defconfig index 8aea73fae7fb..25a095f7dc4e 100644 --- a/arch/mips/configs/db1500_defconfig +++ b/arch/mips/configs/db1500_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_DB1500=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/db1550_defconfig b/arch/mips/configs/db1550_defconfig index 90ccb7359630..dda469c842b3 100644 --- a/arch/mips/configs/db1550_defconfig +++ b/arch/mips/configs/db1550_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_DB1550=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ddb5477_defconfig b/arch/mips/configs/ddb5477_defconfig index b598cf08f156..fcd3dd19bc74 100644 --- a/arch/mips/configs/ddb5477_defconfig +++ b/arch/mips/configs/ddb5477_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 597150b14077..8683e0df12e0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set CONFIG_MACH_DECSTATION=y # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/e55_defconfig b/arch/mips/configs/e55_defconfig index d9a0d0eb0683..4ace61c95778 100644 --- a/arch/mips/configs/e55_defconfig +++ b/arch/mips/configs/e55_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/emma2rh_defconfig b/arch/mips/configs/emma2rh_defconfig index 375b2ac24a49..5847c916c130 100644 --- a/arch/mips/configs/emma2rh_defconfig +++ b/arch/mips/configs/emma2rh_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ev64120_defconfig b/arch/mips/configs/ev64120_defconfig index b0afc118bd5c..bc4c4f125c48 100644 --- a/arch/mips/configs/ev64120_defconfig +++ b/arch/mips/configs/ev64120_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set CONFIG_MIPS_EV64120=y -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ev96100_defconfig b/arch/mips/configs/ev96100_defconfig deleted file mode 100644 index 0bdc10f11610..000000000000 --- a/arch/mips/configs/ev96100_defconfig +++ /dev/null @@ -1,850 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-rc1 -# Thu Jul 6 10:04:05 2006 -# -CONFIG_MIPS=y - -# -# Machine selection -# -# CONFIG_MIPS_MTX1 is not set -# CONFIG_MIPS_BOSPORUS is not set -# CONFIG_MIPS_PB1000 is not set -# CONFIG_MIPS_PB1100 is not set -# CONFIG_MIPS_PB1500 is not set -# CONFIG_MIPS_PB1550 is not set -# CONFIG_MIPS_PB1200 is not set -# CONFIG_MIPS_DB1000 is not set -# CONFIG_MIPS_DB1100 is not set -# CONFIG_MIPS_DB1500 is not set -# CONFIG_MIPS_DB1550 is not set -# CONFIG_MIPS_DB1200 is not set -# CONFIG_MIPS_MIRAGE is not set -# CONFIG_BASLER_EXCITE is not set -# CONFIG_MIPS_COBALT is not set -# CONFIG_MACH_DECSTATION is not set -# CONFIG_MIPS_EV64120 is not set -CONFIG_MIPS_EV96100=y -# CONFIG_MIPS_IVR is not set -# CONFIG_MIPS_ITE8172 is not set -# CONFIG_MACH_JAZZ is not set -# CONFIG_LASAT is not set -# CONFIG_MIPS_ATLAS is not set -# CONFIG_MIPS_MALTA is not set -# CONFIG_MIPS_SEAD is not set -# CONFIG_WR_PPMC is not set -# CONFIG_MIPS_SIM is not set -# CONFIG_MOMENCO_JAGUAR_ATX is not set -# CONFIG_MOMENCO_OCELOT is not set -# CONFIG_MOMENCO_OCELOT_3 is not set -# CONFIG_MOMENCO_OCELOT_C is not set -# CONFIG_MOMENCO_OCELOT_G is not set -# CONFIG_MIPS_XXS1500 is not set -# CONFIG_PNX8550_V2PCI is not set -# CONFIG_PNX8550_JBS is not set -# CONFIG_DDB5477 is not set -# CONFIG_MACH_VR41XX is not set -# CONFIG_PMC_YOSEMITE is not set -# CONFIG_QEMU is not set -# CONFIG_MARKEINS is not set -# CONFIG_SGI_IP22 is not set -# CONFIG_SGI_IP27 is not set -# CONFIG_SGI_IP32 is not set -# CONFIG_SIBYTE_BIGSUR is not set -# CONFIG_SIBYTE_SWARM is not set -# CONFIG_SIBYTE_SENTOSA is not set -# CONFIG_SIBYTE_RHONE is not set -# CONFIG_SIBYTE_CARMEL is not set -# CONFIG_SIBYTE_PTSWARM is not set -# CONFIG_SIBYTE_LITTLESUR is not set -# CONFIG_SIBYTE_CRHINE is not set -# CONFIG_SIBYTE_CRHONE is not set -# CONFIG_SNI_RM200_PCI is not set -# CONFIG_TOSHIBA_JMR3927 is not set -# CONFIG_TOSHIBA_RBTX4927 is not set -# CONFIG_TOSHIBA_RBTX4938 is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y -CONFIG_DMA_NONCOHERENT=y -CONFIG_DMA_NEED_PCI_MAP_STATE=y -CONFIG_CPU_BIG_ENDIAN=y -# CONFIG_CPU_LITTLE_ENDIAN is not set -CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y -CONFIG_IRQ_CPU=y -CONFIG_MIPS_GT64120=y -CONFIG_SWAP_IO_SPACE=y -CONFIG_MIPS_GT96100=y -CONFIG_MIPS_L1_CACHE_SHIFT=5 - -# -# CPU selection -# -# CONFIG_CPU_MIPS32_R1 is not set -# CONFIG_CPU_MIPS32_R2 is not set -# CONFIG_CPU_MIPS64_R1 is not set -# CONFIG_CPU_MIPS64_R2 is not set -# CONFIG_CPU_R3000 is not set -# CONFIG_CPU_TX39XX is not set -# CONFIG_CPU_VR41XX is not set -# CONFIG_CPU_R4300 is not set -# CONFIG_CPU_R4X00 is not set -# CONFIG_CPU_TX49XX is not set -# CONFIG_CPU_R5000 is not set -# CONFIG_CPU_R5432 is not set -# CONFIG_CPU_R6000 is not set -# CONFIG_CPU_NEVADA is not set -# CONFIG_CPU_R8000 is not set -# CONFIG_CPU_R10000 is not set -CONFIG_CPU_RM7000=y -# CONFIG_CPU_RM9000 is not set -# CONFIG_CPU_SB1 is not set -CONFIG_SYS_HAS_CPU_R5000=y -CONFIG_SYS_HAS_CPU_RM7000=y -CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y -CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y -CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y -CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y - -# -# Kernel type -# -CONFIG_32BIT=y -# CONFIG_64BIT is not set -CONFIG_PAGE_SIZE_4KB=y -# CONFIG_PAGE_SIZE_8KB is not set -# CONFIG_PAGE_SIZE_16KB is not set -# CONFIG_PAGE_SIZE_64KB is not set -CONFIG_BOARD_SCACHE=y -CONFIG_RM7000_CPU_SCACHE=y -CONFIG_CPU_HAS_PREFETCH=y -CONFIG_MIPS_MT_DISABLED=y -# CONFIG_MIPS_MT_SMTC is not set -# CONFIG_MIPS_MT_SMP is not set -# CONFIG_MIPS_VPE_LOADER is not set -# CONFIG_64BIT_PHYS_ADDR is not set -CONFIG_CPU_HAS_LLSC=y -CONFIG_CPU_HAS_SYNC=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_CPU_SUPPORTS_HIGHMEM=y -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -CONFIG_SPLIT_PTLOCK_CPUS=4 -# CONFIG_RESOURCES_64BIT is not set -# CONFIG_HZ_48 is not set -# CONFIG_HZ_100 is not set -# CONFIG_HZ_128 is not set -# CONFIG_HZ_250 is not set -# CONFIG_HZ_256 is not set -CONFIG_HZ_1000=y -# CONFIG_HZ_1024 is not set -CONFIG_SYS_SUPPORTS_ARBIT_HZ=y -CONFIG_HZ=1000 -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -# CONFIG_AUDIT is not set -# CONFIG_IKCONFIG is not set -CONFIG_RELAY=y -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set -# CONFIG_HOTPLUG is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_BASE_FULL=y -CONFIG_RT_MUTEXES=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SHMEM=y -CONFIG_SLAB=y -CONFIG_VM_EVENT_COUNTERS=y -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 -# CONFIG_SLOB is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_KMOD is not set - -# -# Block layer -# -# CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -CONFIG_DEFAULT_AS=y -# CONFIG_DEFAULT_DEADLINE is not set -# CONFIG_DEFAULT_CFQ is not set -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" - -# -# Bus options (PCI, PCMCIA, EISA, ISA, TC) -# -CONFIG_HW_HAS_PCI=y -# CONFIG_PCI is not set -CONFIG_MMU=y - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set - -# -# PCI Hotplug Support -# - -# -# Executable file formats -# -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -CONFIG_TRAD_SIGNALS=y - -# -# Networking -# -CONFIG_NET=y - -# -# Networking options -# -# CONFIG_NETDEBUG is not set -# CONFIG_PACKET is not set -CONFIG_UNIX=y -CONFIG_XFRM=y -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -# CONFIG_IP_PNP_DHCP is not set -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y -# CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -CONFIG_NETWORK_SECMARK=y -# CONFIG_NETFILTER is not set - -# -# DCCP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_DCCP is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set - -# -# TIPC Configuration (EXPERIMENTAL) -# -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -CONFIG_IEEE80211=m -# CONFIG_IEEE80211_DEBUG is not set -CONFIG_IEEE80211_CRYPT_WEP=m -CONFIG_IEEE80211_CRYPT_CCMP=m -CONFIG_IEEE80211_SOFTMAC=m -# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set -CONFIG_WIRELESS_EXT=y - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set -# CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# -CONFIG_CONNECTOR=m - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# - -# -# Block devices -# -# CONFIG_BLK_DEV_COW_COMMON is not set -# CONFIG_BLK_DEV_LOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -# CONFIG_BLK_DEV_INITRD is not set -CONFIG_CDROM_PKTCDVD=m -CONFIG_CDROM_PKTCDVD_BUFFERS=8 -# CONFIG_CDROM_PKTCDVD_WCACHE is not set -CONFIG_ATA_OVER_ETH=m - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -CONFIG_RAID_ATTRS=m -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# -# CONFIG_FUSION is not set - -# -# IEEE 1394 (FireWire) support -# - -# -# I2O device support -# - -# -# Network device support -# -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# PHY device support -# -CONFIG_PHYLIB=m - -# -# MII PHY device drivers -# -CONFIG_MARVELL_PHY=m -CONFIG_DAVICOM_PHY=m -CONFIG_QSEMI_PHY=m -CONFIG_LXT_PHY=m -CONFIG_CICADA_PHY=m -CONFIG_VITESSE_PHY=m -CONFIG_SMSC_PHY=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set -CONFIG_MIPS_GT96100ETH=y -# CONFIG_DM9000 is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -CONFIG_INPUT=y - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -# CONFIG_SERIO_I8042 is not set -CONFIG_SERIO_SERPORT=y -# CONFIG_SERIO_LIBPS2 is not set -CONFIG_SERIO_RAW=m -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y -CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=4 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_RTC is not set -# CONFIG_GEN_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# -# CONFIG_TCG_TPM is not set -# CONFIG_TELCLOCK is not set - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# SPI support -# -# CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Hardware Monitoring support -# -# CONFIG_HWMON is not set -# CONFIG_HWMON_VID is not set - -# -# Misc devices -# - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set -CONFIG_VIDEO_V4L2=y - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FB is not set - -# -# Console display driver support -# -# CONFIG_VGA_CONSOLE is not set -CONFIG_DUMMY_CONSOLE=y - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB_ARCH_HAS_EHCI is not set - -# -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' -# - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# LED devices -# -# CONFIG_NEW_LEDS is not set - -# -# LED drivers -# - -# -# LED Triggers -# - -# -# InfiniBand support -# - -# -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# - -# -# Real Time Clock -# -# CONFIG_RTC_CLASS is not set - -# -# DMA Engine support -# -# CONFIG_DMA_ENGINE is not set - -# -# DMA Clients -# - -# -# DMA Devices -# - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XIP is not set -# CONFIG_EXT3_FS is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -# CONFIG_FS_POSIX_ACL is not set -# CONFIG_XFS_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -CONFIG_FUSE_FS=m - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_SYSFS=y -# CONFIG_TMPFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y -# CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_CIFS_DEBUG2 is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_DEBUG_FS is not set -CONFIG_CROSSCOMPILE=y -CONFIG_CMDLINE="" - -# -# Security options -# -CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=m -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_CRC32C=m -# CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -CONFIG_CRC16=m -CONFIG_CRC32=m -CONFIG_LIBCRC32C=m -CONFIG_ZLIB_INFLATE=m -CONFIG_ZLIB_DEFLATE=m -CONFIG_PLIST=y diff --git a/arch/mips/configs/excite_defconfig b/arch/mips/configs/excite_defconfig index 045ebd089893..eb87cbbfd037 100644 --- a/arch/mips/configs/excite_defconfig +++ b/arch/mips/configs/excite_defconfig @@ -26,7 +26,6 @@ CONFIG_BASLER_EXCITE=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index ef16d1fb5071..9f22d13e729d 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 4bf1ee7f5f00..414a649dff8a 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index f83dc09c3ca9..dec2ba6ba03f 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/it8172_defconfig b/arch/mips/configs/it8172_defconfig index a91d72a9ca86..37f9dd7187b1 100644 --- a/arch/mips/configs/it8172_defconfig +++ b/arch/mips/configs/it8172_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set CONFIG_MIPS_ITE8172=y # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ivr_defconfig b/arch/mips/configs/ivr_defconfig index cebc67212d06..18874a4c24fe 100644 --- a/arch/mips/configs/ivr_defconfig +++ b/arch/mips/configs/ivr_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set CONFIG_MIPS_IVR=y # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/jaguar-atx_defconfig b/arch/mips/configs/jaguar-atx_defconfig index 5d9eb11aba3d..9b529857f802 100644 --- a/arch/mips/configs/jaguar-atx_defconfig +++ b/arch/mips/configs/jaguar-atx_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig index be45a9044d06..fded3f73815f 100644 --- a/arch/mips/configs/jmr3927_defconfig +++ b/arch/mips/configs/jmr3927_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/lasat200_defconfig b/arch/mips/configs/lasat200_defconfig index 64dc9f45a19c..0354bfa18b39 100644 --- a/arch/mips/configs/lasat200_defconfig +++ b/arch/mips/configs/lasat200_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 2690baf15a85..b21ca470273a 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig index c298979c18ae..adbeeadddb8f 100644 --- a/arch/mips/configs/mipssim_defconfig +++ b/arch/mips/configs/mipssim_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/mpc30x_defconfig b/arch/mips/configs/mpc30x_defconfig index 4405d127a941..79fd544fcb2a 100644 --- a/arch/mips/configs/mpc30x_defconfig +++ b/arch/mips/configs/mpc30x_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ocelot_3_defconfig b/arch/mips/configs/ocelot_3_defconfig index ec5758f22676..4d87da2b99fd 100644 --- a/arch/mips/configs/ocelot_3_defconfig +++ b/arch/mips/configs/ocelot_3_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ocelot_c_defconfig b/arch/mips/configs/ocelot_c_defconfig index 0d33d87de1a1..ce7c26475328 100644 --- a/arch/mips/configs/ocelot_c_defconfig +++ b/arch/mips/configs/ocelot_c_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ocelot_defconfig b/arch/mips/configs/ocelot_defconfig index 4b999102715e..3da2a43d1608 100644 --- a/arch/mips/configs/ocelot_defconfig +++ b/arch/mips/configs/ocelot_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/ocelot_g_defconfig b/arch/mips/configs/ocelot_g_defconfig index 827b344f6010..3a3e5ff4a3ef 100644 --- a/arch/mips/configs/ocelot_g_defconfig +++ b/arch/mips/configs/ocelot_g_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/pb1100_defconfig b/arch/mips/configs/pb1100_defconfig index 9ed60fef69e0..1a16e92900cb 100644 --- a/arch/mips/configs/pb1100_defconfig +++ b/arch/mips/configs/pb1100_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_PB1100=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/pb1500_defconfig b/arch/mips/configs/pb1500_defconfig index 6774254b1be6..9ea8edea6f29 100644 --- a/arch/mips/configs/pb1500_defconfig +++ b/arch/mips/configs/pb1500_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_PB1500=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/pb1550_defconfig b/arch/mips/configs/pb1550_defconfig index 1afe5bf6e765..c4a158976f8f 100644 --- a/arch/mips/configs/pb1550_defconfig +++ b/arch/mips/configs/pb1550_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS_PB1550=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig index ac616c82d348..1cbf270c301c 100644 --- a/arch/mips/configs/pnx8550-jbs_defconfig +++ b/arch/mips/configs/pnx8550-jbs_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/pnx8550-v2pci_defconfig b/arch/mips/configs/pnx8550-v2pci_defconfig index a8eb51bae3f3..bec30b15b9bd 100644 --- a/arch/mips/configs/pnx8550-v2pci_defconfig +++ b/arch/mips/configs/pnx8550-v2pci_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/qemu_defconfig b/arch/mips/configs/qemu_defconfig index 6a63a113b7ea..daa40c8ff694 100644 --- a/arch/mips/configs/qemu_defconfig +++ b/arch/mips/configs/qemu_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/rbhma4500_defconfig b/arch/mips/configs/rbhma4500_defconfig index 6779f449bd2d..2f5650227ba3 100644 --- a/arch/mips/configs/rbhma4500_defconfig +++ b/arch/mips/configs/rbhma4500_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index b7826d3a2b77..f26b338333ac 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig index 625c1c619b6b..9041f095f96f 100644 --- a/arch/mips/configs/sb1250-swarm_defconfig +++ b/arch/mips/configs/sb1250-swarm_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/sead_defconfig b/arch/mips/configs/sead_defconfig index 4401b602118f..02abb2f1bfaf 100644 --- a/arch/mips/configs/sead_defconfig +++ b/arch/mips/configs/sead_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig index 2ba4e25e8c34..ca3d0c4ba15b 100644 --- a/arch/mips/configs/tb0226_defconfig +++ b/arch/mips/configs/tb0226_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/tb0229_defconfig b/arch/mips/configs/tb0229_defconfig index fc8a407c1add..4e2009ace278 100644 --- a/arch/mips/configs/tb0229_defconfig +++ b/arch/mips/configs/tb0229_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig index effcb63b81a3..535a813d01a9 100644 --- a/arch/mips/configs/tb0287_defconfig +++ b/arch/mips/configs/tb0287_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig index 8f66ad71cb7e..3a3ef20b21cc 100644 --- a/arch/mips/configs/workpad_defconfig +++ b/arch/mips/configs/workpad_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/wrppmc_defconfig b/arch/mips/configs/wrppmc_defconfig index 3e4b16b39827..e6b1dea55842 100644 --- a/arch/mips/configs/wrppmc_defconfig +++ b/arch/mips/configs/wrppmc_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/configs/yosemite_defconfig b/arch/mips/configs/yosemite_defconfig index 3a68d8a25b66..06a072b77b1c 100644 --- a/arch/mips/configs/yosemite_defconfig +++ b/arch/mips/configs/yosemite_defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/defconfig b/arch/mips/defconfig index fff6fcc96212..d620c463a78b 100644 --- a/arch/mips/defconfig +++ b/arch/mips/defconfig @@ -25,7 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_EV96100 is not set # CONFIG_MIPS_IVR is not set # CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set diff --git a/arch/mips/galileo-boards/ev96100/Makefile b/arch/mips/galileo-boards/ev96100/Makefile deleted file mode 100644 index cd868ec78cbc..000000000000 --- a/arch/mips/galileo-boards/ev96100/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Copyright 2000 MontaVista Software Inc. -# Author: MontaVista Software, Inc. -# ppopov@mvista.com or source@mvista.com -# -# Makefile for the Galileo EV96100 board. -# - -obj-y += init.o irq.o puts.o reset.o time.o setup.o diff --git a/arch/mips/galileo-boards/ev96100/init.c b/arch/mips/galileo-boards/ev96100/init.c deleted file mode 100644 index a01fe9b36f2c..000000000000 --- a/arch/mips/galileo-boards/ev96100/init.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/generic/generic.c - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -/* Environment variable */ - -typedef struct { - char *name; - char *val; -} t_env_var; - -int prom_argc; -char **prom_argv, **prom_envp; - -int init_debug = 0; - -char * __init prom_getcmdline(void) -{ - return &(arcs_cmdline[0]); -} - -unsigned long __init prom_free_prom_memory(void) -{ - return 0; -} - -void __init prom_init_cmdline(void) -{ - char *cp; - int actr; - - actr = 1; /* Always ignore argv[0] */ - - cp = &(arcs_cmdline[0]); - while(actr < prom_argc) { - strcpy(cp, prom_argv[actr]); - cp += strlen(prom_argv[actr]); - *cp++ = ' '; - actr++; - } - if (cp != &(arcs_cmdline[0])) /* get rid of trailing space */ - --cp; - *cp = '\0'; -} - -char *prom_getenv(char *envname) -{ - /* - * Return a pointer to the given environment variable. - */ - - t_env_var *env = (t_env_var *) prom_envp; - int i; - - i = strlen(envname); - - while (env->name) { - if (strncmp(envname, env->name, i) == 0) { - return (env->val); - } - env++; - } - return (NULL); -} - -static inline unsigned char str2hexnum(unsigned char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return 0; /* foo */ -} - -static inline void str2eaddr(unsigned char *ea, unsigned char *str) -{ - int i; - - for (i = 0; i < 6; i++) { - unsigned char num; - - if ((*str == '.') || (*str == ':')) - str++; - num = str2hexnum(*str++) << 4; - num |= (str2hexnum(*str++)); - ea[i] = num; - } -} - -int get_ethernet_addr(char *ethernet_addr) -{ - char *ethaddr_str; - - ethaddr_str = prom_getenv("ethaddr"); - if (!ethaddr_str) { - printk("ethaddr not set in boot prom\n"); - return -1; - } - str2eaddr(ethernet_addr, ethaddr_str); - - if (init_debug > 1) { - int i; - printk("get_ethernet_addr: "); - for (i = 0; i < 5; i++) - printk("%02x:", - (unsigned char) *(ethernet_addr + i)); - printk("%02x\n", *(ethernet_addr + i)); - } - - return 0; -} - -const char *get_system_type(void) -{ - return "Galileo EV96100"; -} - -void __init prom_init(void) -{ - volatile unsigned char *uart; - char ppbuf[8]; - - prom_argc = fw_arg0; - prom_argv = (char **) fw_arg1; - prom_envp = (char **) fw_arg2; - - mips_machgroup = MACH_GROUP_GALILEO; - mips_machtype = MACH_EV96100; - - prom_init_cmdline(); - - /* 32 MB upgradable */ - add_memory_region(0, 32 << 20, BOOT_MEM_RAM); -} diff --git a/arch/mips/galileo-boards/ev96100/irq.c b/arch/mips/galileo-boards/ev96100/irq.c deleted file mode 100644 index ee5d6720f23b..000000000000 --- a/arch/mips/galileo-boards/ev96100/irq.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/atlas/atlas_int.c. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static inline unsigned int ffz8(unsigned int word) -{ - unsigned long k; - - k = 7; - if (word & 0x0fUL) { k -= 4; word <<= 4; } - if (word & 0x30UL) { k -= 2; word <<= 2; } - if (word & 0x40UL) { k -= 1; } - - return k; -} - -extern void mips_timer_interrupt(struct pt_regs *regs); - -asmlinkage void ev96100_cpu_irq(unsigned int pending, struct pt_regs *regs) -{ - do_IRQ(ffz8(pending >> 8), regs); -} - -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) -{ - unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; - - if (pending & CAUSEF_IP7) - mips_timer_interrupt(regs); - else if (pending) - ev96100_cpu_irq(pending, regs); - else - spurious_interrupt(regs); -} - -void __init arch_init_irq(void) -{ - mips_cpu_irq_init(0); -} diff --git a/arch/mips/galileo-boards/ev96100/puts.c b/arch/mips/galileo-boards/ev96100/puts.c deleted file mode 100644 index 49dc6d137b9c..000000000000 --- a/arch/mips/galileo-boards/ev96100/puts.c +++ /dev/null @@ -1,138 +0,0 @@ - -/* - * Debug routines which directly access the uart. - */ - -#include -#include - - -//#define SERIAL_BASE EV96100_UART0_REGS_BASE -#define SERIAL_BASE 0xBD000020 -#define NS16550_BASE SERIAL_BASE - -#define SERA_CMD 0x0D -#define SERA_DATA 0x08 -//#define SERB_CMD 0x05 -#define SERB_CMD 20 -#define SERB_DATA 0x00 -#define TX_BUSY 0x20 - -#define TIMEOUT 0xffff -#undef SLOW_DOWN - -static const char digits[16] = "0123456789abcdef"; -static volatile unsigned char *const com1 = (unsigned char *) SERIAL_BASE; - - -#ifdef SLOW_DOWN -static inline void slow_down() -{ - int k; - for (k = 0; k < 10000; k++); -} -#else -#define slow_down() -#endif - -void putch(const unsigned char c) -{ - unsigned char ch; - int i = 0; - - do { - ch = com1[SERB_CMD]; - slow_down(); - i++; - if (i > TIMEOUT) { - break; - } - } while (0 == (ch & TX_BUSY)); - com1[SERB_DATA] = c; -} - -void putchar(const unsigned char c) -{ - unsigned char ch; - int i = 0; - - do { - ch = com1[SERB_CMD]; - slow_down(); - i++; - if (i > TIMEOUT) { - break; - } - } while (0 == (ch & TX_BUSY)); - com1[SERB_DATA] = c; -} - -void puts(unsigned char *cp) -{ - unsigned char ch; - int i = 0; - - while (*cp) { - do { - ch = com1[SERB_CMD]; - slow_down(); - i++; - if (i > TIMEOUT) { - break; - } - } while (0 == (ch & TX_BUSY)); - com1[SERB_DATA] = *cp++; - } - putch('\r'); - putch('\n'); -} - -void fputs(unsigned char *cp) -{ - unsigned char ch; - int i = 0; - - while (*cp) { - - do { - ch = com1[SERB_CMD]; - slow_down(); - i++; - if (i > TIMEOUT) { - break; - } - } while (0 == (ch & TX_BUSY)); - com1[SERB_DATA] = *cp++; - } -} - - -void put64(uint64_t ul) -{ - int cnt; - unsigned ch; - - cnt = 16; /* 16 nibbles in a 64 bit long */ - putch('0'); - putch('x'); - do { - cnt--; - ch = (unsigned char) (ul >> cnt * 4) & 0x0F; - putch(digits[ch]); - } while (cnt > 0); -} - -void put32(unsigned u) -{ - int cnt; - unsigned ch; - - cnt = 8; /* 8 nibbles in a 32 bit long */ - putch('0'); - putch('x'); - do { - cnt--; - ch = (unsigned char) (u >> cnt * 4) & 0x0F; - putch(digits[ch]); - } while (cnt > 0); -} diff --git a/arch/mips/galileo-boards/ev96100/reset.c b/arch/mips/galileo-boards/ev96100/reset.c deleted file mode 100644 index 5ef9b7f896e6..000000000000 --- a/arch/mips/galileo-boards/ev96100/reset.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Galileo EV96100 reset routines. - * - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/generic/reset.c - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -static void mips_machine_restart(char *command); -static void mips_machine_halt(void); - -static void mips_machine_restart(char *command) -{ - set_c0_status(ST0_BEV | ST0_ERL); - change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED); - flush_cache_all(); - write_c0_wired(0); - __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); - while (1); -} - -static void mips_machine_halt(void) -{ - printk(KERN_NOTICE "You can safely turn off the power\n"); - while (1) - __asm__(".set\tmips3\n\t" - "wait\n\t" - ".set\tmips0"); -} - -void mips_reboot_setup(void) -{ - _machine_restart = mips_machine_restart; - _machine_halt = mips_machine_halt; -} diff --git a/arch/mips/galileo-boards/ev96100/setup.c b/arch/mips/galileo-boards/ev96100/setup.c deleted file mode 100644 index 639ad5562c63..000000000000 --- a/arch/mips/galileo-boards/ev96100/setup.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Galileo EV96100 setup. - * - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/atlas/atlas_setup.c. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -extern char *__init prom_getcmdline(void); - -extern void mips_reboot_setup(void); - -unsigned char mac_0_1[12]; - -void __init plat_mem_setup(void) -{ - unsigned int config = read_c0_config(); - unsigned int status = read_c0_status(); - unsigned int info = read_c0_info(); - u32 tmp; - - char *argptr; - - clear_c0_status(ST0_FR); - - if (config & 0x8) - printk("Secondary cache is enabled\n"); - else - printk("Secondary cache is disabled\n"); - - if (status & (1 << 27)) - printk("User-mode cache ops enabled\n"); - else - printk("User-mode cache ops disabled\n"); - - printk("CP0 info reg: %x\n", (unsigned) info); - if (info & (1 << 28)) - printk("burst mode Scache RAMS\n"); - else - printk("pipelined Scache RAMS\n"); - - if (info & 0x1) - printk("Atomic Enable is set\n"); - - argptr = prom_getcmdline(); -#ifdef CONFIG_SERIAL_CONSOLE - if (strstr(argptr, "console=") == NULL) { - argptr = prom_getcmdline(); - strcat(argptr, " console=ttyS0,115200"); - } -#endif - - mips_reboot_setup(); - - set_io_port_base(KSEG1); - ioport_resource.start = GT_PCI_IO_BASE; - ioport_resource.end = GT_PCI_IO_BASE + 0x01ffffff; - -#ifdef CONFIG_BLK_DEV_INITRD - ROOT_DEV = MKDEV(RAMDISK_MAJOR, 0); -#endif - - - /* - * Setup GT controller master bit so we can do config cycles - */ - - /* Clear cause register bits */ - GT_WRITE(GT_INTRCAUSE_OFS, ~(GT_INTRCAUSE_MASABORT0_BIT | - GT_INTRCAUSE_TARABORT0_BIT)); - /* Setup address */ - GT_WRITE(GT_PCI0_CFGADDR_OFS, - (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) | - (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) | - ((PCI_COMMAND / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) | - GT_PCI0_CFGADDR_CONFIGEN_BIT); - - udelay(2); - tmp = GT_READ(GT_PCI0_CFGDATA_OFS); - - tmp |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_MASTER | PCI_COMMAND_SERR); - GT_WRITE(GT_PCI0_CFGADDR_OFS, - (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) | - (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) | - ((PCI_COMMAND / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) | - GT_PCI0_CFGADDR_CONFIGEN_BIT); - udelay(2); - GT_WRITE(GT_PCI0_CFGDATA_OFS, tmp); - - /* Setup address */ - GT_WRITE(GT_PCI0_CFGADDR_OFS, - (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) | - (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) | - ((PCI_COMMAND / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) | - GT_PCI0_CFGADDR_CONFIGEN_BIT); - - udelay(2); - tmp = GT_READ(GT_PCI0_CFGDATA_OFS); -} - -unsigned short get_gt_devid(void) -{ - u32 gt_devid; - - /* Figure out if this is a gt96100 or gt96100A */ - GT_WRITE(GT_PCI0_CFGADDR_OFS, - (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) | - (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) | - ((PCI_VENDOR_ID / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) | - GT_PCI0_CFGADDR_CONFIGEN_BIT); - - udelay(4); - gt_devid = GT_READ(GT_PCI0_CFGDATA_OFS); - - return gt_devid >> 16; -} diff --git a/arch/mips/galileo-boards/ev96100/time.c b/arch/mips/galileo-boards/ev96100/time.c deleted file mode 100644 index 8cbe8426491a..000000000000 --- a/arch/mips/galileo-boards/ev96100/time.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Galileo EV96100 rtc routines. - * - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/atlas/atlas_rtc.c. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -#define ALLINTS (IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5) - -extern volatile unsigned long wall_jiffies; -unsigned long missed_heart_beats = 0; - -static unsigned long r4k_offset; /* Amount to increment compare reg each time */ -static unsigned long r4k_cur; /* What counter should be at next timer irq */ - -static inline void ack_r4ktimer(unsigned long newval) -{ - write_c0_compare(newval); -} - -/* - * There are a lot of conceptually broken versions of the MIPS timer interrupt - * handler floating around. This one is rather different, but the algorithm - * is probably more robust. - */ -void mips_timer_interrupt(struct pt_regs *regs) -{ - int irq = 7; /* FIX ME */ - - if (r4k_offset == 0) { - goto null; - } - - do { - kstat_this_cpu.irqs[irq]++; - do_timer(regs); -#ifndef CONFIG_SMP - update_process_times(user_mode(regs)); -#endif - r4k_cur += r4k_offset; - ack_r4ktimer(r4k_cur); - - } while (((unsigned long)read_c0_count() - - r4k_cur) < 0x7fffffff); - return; - -null: - ack_r4ktimer(0); -} diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 368d3d896165..edefa97b2330 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_ITE_BOARD_GEN) += ops-it8172.o obj-$(CONFIG_MIPS_BONITO64) += ops-bonito64.o obj-$(CONFIG_MIPS_GT64111) += ops-gt64111.o obj-$(CONFIG_MIPS_GT64120) += ops-gt64120.o -obj-$(CONFIG_MIPS_GT96100) += ops-gt96100.o obj-$(CONFIG_PCI_MARVELL) += ops-marvell.o obj-$(CONFIG_MIPS_MSC) += ops-msc.o obj-$(CONFIG_MIPS_NILE4) += ops-nile4.o @@ -29,7 +28,6 @@ obj-$(CONFIG_LASAT) += pci-lasat.o obj-$(CONFIG_MIPS_ATLAS) += fixup-atlas.o obj-$(CONFIG_MIPS_COBALT) += fixup-cobalt.o obj-$(CONFIG_MIPS_EV64120) += fixup-ev64120.o -obj-$(CONFIG_MIPS_EV96100) += fixup-ev96100.o pci-ev96100.o obj-$(CONFIG_MIPS_ITE8172) += fixup-ite8172g.o obj-$(CONFIG_MIPS_IVR) += fixup-ivr.o obj-$(CONFIG_SOC_AU1500) += fixup-au1000.o ops-au1000.o diff --git a/arch/mips/pci/fixup-ev96100.c b/arch/mips/pci/fixup-ev96100.c deleted file mode 100644 index e2bc977b6d58..000000000000 --- a/arch/mips/pci/fixup-ev96100.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * - * BRIEF MODULE DESCRIPTION - * EV96100 Board specific pci fixups. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include - -static char irq_tab_ev96100[][5] __initdata = { - [8] = { 0, 5, 5, 5, 5 }, - [9] = { 0, 2, 2, 2, 2 } -}; - -int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) -{ - return irq_tab_ev96100[slot][pin]; -} - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pci/ops-gt96100.c b/arch/mips/pci/ops-gt96100.c deleted file mode 100644 index 9e4ea6627e21..000000000000 --- a/arch/mips/pci/ops-gt96100.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * - * BRIEF MODULE DESCRIPTION - * Galileo EV96100 board specific pci support. - * - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/generic/pci.c - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include - -#include -#include -#include - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -static int static gt96100_config_access(unsigned char access_type, - struct pci_bus *bus, unsigned int devfn, int where, u32 * data) -{ - unsigned char bus = bus->number; - u32 intr; - - /* - * Because of a bug in the galileo (for slot 31). - */ - if (bus == 0 && devfn >= PCI_DEVFN(31, 0)) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* Clear cause register bits */ - GT_WRITE(GT_INTRCAUSE_OFS, ~(GT_INTRCAUSE_MASABORT0_BIT | - GT_INTRCAUSE_TARABORT0_BIT)); - - /* Setup address */ - GT_WRITE(GT_PCI0_CFGADDR_OFS, - (bus << GT_PCI0_CFGADDR_BUSNUM_SHF) | - (devfn << GT_PCI0_CFGADDR_FUNCTNUM_SHF) | - ((where / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) | - GT_PCI0_CFGADDR_CONFIGEN_BIT); - udelay(2); - - - if (access_type == PCI_ACCESS_WRITE) { - if (devfn != 0) - *data = le32_to_cpu(*data); - GT_WRITE(GT_PCI0_CFGDATA_OFS, *data); - } else { - *data = GT_READ(GT_PCI0_CFGDATA_OFS); - if (devfn != 0) - *data = le32_to_cpu(*data); - } - - udelay(2); - - /* Check for master or target abort */ - intr = GT_READ(GT_INTRCAUSE_OFS); - - if (intr & (GT_INTRCAUSE_MASABORT0_BIT | GT_INTRCAUSE_TARABORT0_BIT)) { - /* Error occured */ - - /* Clear bits */ - GT_WRITE(GT_INTRCAUSE_OFS, ~(GT_INTRCAUSE_MASABORT0_BIT | - GT_INTRCAUSE_TARABORT0_BIT)); - return -1; - } - return 0; -} - -/* - * We can't address 8 and 16 bit words directly. Instead we have to - * read/write a 32bit word and mask/modify the data we actually want. - */ -static int gt96100_pcibios_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 * val) -{ - u32 data = 0; - - if (gt96100_config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) - return PCIBIOS_DEVICE_NOT_FOUND; - - switch (size) { - case 1: - *val = (data >> ((where & 3) << 3)) & 0xff; - break; - - case 2: - *val = (data >> ((where & 3) << 3)) & 0xffff; - break; - - case 4: - *val = data; - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int gt96100_pcibios_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - u32 data = 0; - - switch (size) { - case 1: - if (gt96100_config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) - return -1; - - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - - if (gt96100_config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; - - case 2: - if (gt96100_config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) - return -1; - - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - - if (gt96100_config_access(PCI_ACCESS_WRITE, dev, where, &data)) - return -1; - - - return PCIBIOS_SUCCESSFUL; - - case 4: - if (gt96100_config_access(PCI_ACCESS_WRITE, dev, where, &val)) - return -1; - - return PCIBIOS_SUCCESSFUL; - } -} - -struct pci_ops gt96100_pci_ops = { - .read = gt96100_pcibios_read, - .write = gt96100_pcibios_write -}; diff --git a/arch/mips/pci/pci-ev96100.c b/arch/mips/pci/pci-ev96100.c deleted file mode 100644 index f9457ea00def..000000000000 --- a/arch/mips/pci/pci-ev96100.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Copyright (C) 2004 by Ralf Baechle (ralf@linux-mips.org) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include - -static struct resource pci_io_resource = { - .name = "io pci IO space", - .start = 0x10000000, - .end = 0x11ffffff, - .flags = IORESOURCE_IO -}; - -static struct resource pci_mem_resource = { - .name = "ext pci memory space", - .start = 0x12000000, - .end = 0x13ffffff, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops gt96100_pci_ops; - -struct pci_controller ev96100_controller = { - .pci_ops = >96100_pci_ops, - .io_resource = &pci_io_resource, - .mem_resource = &pci_mem_resource, -}; - -static void ev96100_pci_init(void) -{ - register_pci_controller(&ev96100_controller); -} - -arch_initcall(ev96100_pci_init); diff --git a/include/asm-mips/bootinfo.h b/include/asm-mips/bootinfo.h index 3b745e76f429..78c35ec46362 100644 --- a/include/asm-mips/bootinfo.h +++ b/include/asm-mips/bootinfo.h @@ -112,8 +112,7 @@ * Valid machtype for group GALILEO */ #define MACH_GROUP_GALILEO 11 /* Galileo Eval Boards */ -#define MACH_EV96100 0 /* EV96100 */ -#define MACH_EV64120A 1 /* EV64120A */ +#define MACH_EV64120A 0 /* EV64120A */ /* * Valid machtype for group MOMENCO diff --git a/include/asm-mips/galileo-boards/gt96100.h b/include/asm-mips/galileo-boards/gt96100.h deleted file mode 100644 index aabd1b629c19..000000000000 --- a/include/asm-mips/galileo-boards/gt96100.h +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright 2000 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * stevel@mvista.com or source@mvista.com - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License (Version 2) as - * published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Register offsets of the MIPS GT96100 Advanced Communication Controller. - */ -#ifndef _GT96100_H -#define _GT96100_H - -/* - * Galileo GT96100 internal register base. - */ -#define MIPS_GT96100_BASE (KSEG1ADDR(0x14000000)) - -#define GT96100_WRITE(ofs, data) \ - *(volatile u32 *)(MIPS_GT96100_BASE+ofs) = cpu_to_le32(data) -#define GT96100_READ(ofs) \ - le32_to_cpu(*(volatile u32 *)(MIPS_GT96100_BASE+ofs)) - -#define GT96100_ETH_IO_SIZE 0x4000 - -/************************************************************************ - * Register offset addresses follow - ************************************************************************/ - -/* CPU Interface Control Registers */ -#define GT96100_CPU_INTERF_CONFIG 0x000000 - -/* Ethernet Ports */ -#define GT96100_ETH_PHY_ADDR_REG 0x080800 -#define GT96100_ETH_SMI_REG 0x080810 -/* - These are offsets to port 0 registers. Add GT96100_ETH_IO_SIZE to - get offsets to port 1 registers. -*/ -#define GT96100_ETH_PORT_CONFIG 0x084800 -#define GT96100_ETH_PORT_CONFIG_EXT 0x084808 -#define GT96100_ETH_PORT_COMM 0x084810 -#define GT96100_ETH_PORT_STATUS 0x084818 -#define GT96100_ETH_SER_PARAM 0x084820 -#define GT96100_ETH_HASH_TBL_PTR 0x084828 -#define GT96100_ETH_FLOW_CNTRL_SRC_ADDR_L 0x084830 -#define GT96100_ETH_FLOW_CNTRL_SRC_ADDR_H 0x084838 -#define GT96100_ETH_SDMA_CONFIG 0x084840 -#define GT96100_ETH_SDMA_COMM 0x084848 -#define GT96100_ETH_INT_CAUSE 0x084850 -#define GT96100_ETH_INT_MASK 0x084858 -#define GT96100_ETH_1ST_RX_DESC_PTR0 0x084880 -#define GT96100_ETH_1ST_RX_DESC_PTR1 0x084884 -#define GT96100_ETH_1ST_RX_DESC_PTR2 0x084888 -#define GT96100_ETH_1ST_RX_DESC_PTR3 0x08488C -#define GT96100_ETH_CURR_RX_DESC_PTR0 0x0848A0 -#define GT96100_ETH_CURR_RX_DESC_PTR1 0x0848A4 -#define GT96100_ETH_CURR_RX_DESC_PTR2 0x0848A8 -#define GT96100_ETH_CURR_RX_DESC_PTR3 0x0848AC -#define GT96100_ETH_CURR_TX_DESC_PTR0 0x0848E0 -#define GT96100_ETH_CURR_TX_DESC_PTR1 0x0848E4 -#define GT96100_ETH_MIB_COUNT_BASE 0x085800 - -/* SDMAs */ -#define GT96100_SDMA_GROUP_CONFIG 0x101AF0 -/* SDMA Group 0 */ -#define GT96100_SDMA_G0_CHAN0_CONFIG 0x000900 -#define GT96100_SDMA_G0_CHAN0_COMM 0x000908 -#define GT96100_SDMA_G0_CHAN0_RX_DESC_BASE 0x008900 -#define GT96100_SDMA_G0_CHAN0_CURR_RX_DESC_PTR 0x008910 -#define GT96100_SDMA_G0_CHAN0_TX_DESC_BASE 0x00C900 -#define GT96100_SDMA_G0_CHAN0_CURR_TX_DESC_PTR 0x00C910 -#define GT96100_SDMA_G0_CHAN0_1ST_TX_DESC_PTR 0x00C914 -#define GT96100_SDMA_G0_CHAN1_CONFIG 0x010900 -#define GT96100_SDMA_G0_CHAN1_COMM 0x010908 -#define GT96100_SDMA_G0_CHAN1_RX_DESC_BASE 0x018900 -#define GT96100_SDMA_G0_CHAN1_CURR_RX_DESC_PTR 0x018910 -#define GT96100_SDMA_G0_CHAN1_TX_DESC_BASE 0x01C900 -#define GT96100_SDMA_G0_CHAN1_CURR_TX_DESC_PTR 0x01C910 -#define GT96100_SDMA_G0_CHAN1_1ST_TX_DESC_PTR 0x01C914 -#define GT96100_SDMA_G0_CHAN2_CONFIG 0x020900 -#define GT96100_SDMA_G0_CHAN2_COMM 0x020908 -#define GT96100_SDMA_G0_CHAN2_RX_DESC_BASE 0x028900 -#define GT96100_SDMA_G0_CHAN2_CURR_RX_DESC_PTR 0x028910 -#define GT96100_SDMA_G0_CHAN2_TX_DESC_BASE 0x02C900 -#define GT96100_SDMA_G0_CHAN2_CURR_TX_DESC_PTR 0x02C910 -#define GT96100_SDMA_G0_CHAN2_1ST_TX_DESC_PTR 0x02C914 -#define GT96100_SDMA_G0_CHAN3_CONFIG 0x030900 -#define GT96100_SDMA_G0_CHAN3_COMM 0x030908 -#define GT96100_SDMA_G0_CHAN3_RX_DESC_BASE 0x038900 -#define GT96100_SDMA_G0_CHAN3_CURR_RX_DESC_PTR 0x038910 -#define GT96100_SDMA_G0_CHAN3_TX_DESC_BASE 0x03C900 -#define GT96100_SDMA_G0_CHAN3_CURR_TX_DESC_PTR 0x03C910 -#define GT96100_SDMA_G0_CHAN3_1ST_TX_DESC_PTR 0x03C914 -#define GT96100_SDMA_G0_CHAN4_CONFIG 0x040900 -#define GT96100_SDMA_G0_CHAN4_COMM 0x040908 -#define GT96100_SDMA_G0_CHAN4_RX_DESC_BASE 0x048900 -#define GT96100_SDMA_G0_CHAN4_CURR_RX_DESC_PTR 0x048910 -#define GT96100_SDMA_G0_CHAN4_TX_DESC_BASE 0x04C900 -#define GT96100_SDMA_G0_CHAN4_CURR_TX_DESC_PTR 0x04C910 -#define GT96100_SDMA_G0_CHAN4_1ST_TX_DESC_PTR 0x04C914 -#define GT96100_SDMA_G0_CHAN5_CONFIG 0x050900 -#define GT96100_SDMA_G0_CHAN5_COMM 0x050908 -#define GT96100_SDMA_G0_CHAN5_RX_DESC_BASE 0x058900 -#define GT96100_SDMA_G0_CHAN5_CURR_RX_DESC_PTR 0x058910 -#define GT96100_SDMA_G0_CHAN5_TX_DESC_BASE 0x05C900 -#define GT96100_SDMA_G0_CHAN5_CURR_TX_DESC_PTR 0x05C910 -#define GT96100_SDMA_G0_CHAN5_1ST_TX_DESC_PTR 0x05C914 -#define GT96100_SDMA_G0_CHAN6_CONFIG 0x060900 -#define GT96100_SDMA_G0_CHAN6_COMM 0x060908 -#define GT96100_SDMA_G0_CHAN6_RX_DESC_BASE 0x068900 -#define GT96100_SDMA_G0_CHAN6_CURR_RX_DESC_PTR 0x068910 -#define GT96100_SDMA_G0_CHAN6_TX_DESC_BASE 0x06C900 -#define GT96100_SDMA_G0_CHAN6_CURR_TX_DESC_PTR 0x06C910 -#define GT96100_SDMA_G0_CHAN6_1ST_TX_DESC_PTR 0x06C914 -#define GT96100_SDMA_G0_CHAN7_CONFIG 0x070900 -#define GT96100_SDMA_G0_CHAN7_COMM 0x070908 -#define GT96100_SDMA_G0_CHAN7_RX_DESC_BASE 0x078900 -#define GT96100_SDMA_G0_CHAN7_CURR_RX_DESC_PTR 0x078910 -#define GT96100_SDMA_G0_CHAN7_TX_DESC_BASE 0x07C900 -#define GT96100_SDMA_G0_CHAN7_CURR_TX_DESC_PTR 0x07C910 -#define GT96100_SDMA_G0_CHAN7_1ST_TX_DESC_PTR 0x07C914 -/* SDMA Group 1 */ -#define GT96100_SDMA_G1_CHAN0_CONFIG 0x100900 -#define GT96100_SDMA_G1_CHAN0_COMM 0x100908 -#define GT96100_SDMA_G1_CHAN0_RX_DESC_BASE 0x108900 -#define GT96100_SDMA_G1_CHAN0_CURR_RX_DESC_PTR 0x108910 -#define GT96100_SDMA_G1_CHAN0_TX_DESC_BASE 0x10C900 -#define GT96100_SDMA_G1_CHAN0_CURR_TX_DESC_PTR 0x10C910 -#define GT96100_SDMA_G1_CHAN0_1ST_TX_DESC_PTR 0x10C914 -#define GT96100_SDMA_G1_CHAN1_CONFIG 0x110900 -#define GT96100_SDMA_G1_CHAN1_COMM 0x110908 -#define GT96100_SDMA_G1_CHAN1_RX_DESC_BASE 0x118900 -#define GT96100_SDMA_G1_CHAN1_CURR_RX_DESC_PTR 0x118910 -#define GT96100_SDMA_G1_CHAN1_TX_DESC_BASE 0x11C900 -#define GT96100_SDMA_G1_CHAN1_CURR_TX_DESC_PTR 0x11C910 -#define GT96100_SDMA_G1_CHAN1_1ST_TX_DESC_PTR 0x11C914 -#define GT96100_SDMA_G1_CHAN2_CONFIG 0x120900 -#define GT96100_SDMA_G1_CHAN2_COMM 0x120908 -#define GT96100_SDMA_G1_CHAN2_RX_DESC_BASE 0x128900 -#define GT96100_SDMA_G1_CHAN2_CURR_RX_DESC_PTR 0x128910 -#define GT96100_SDMA_G1_CHAN2_TX_DESC_BASE 0x12C900 -#define GT96100_SDMA_G1_CHAN2_CURR_TX_DESC_PTR 0x12C910 -#define GT96100_SDMA_G1_CHAN2_1ST_TX_DESC_PTR 0x12C914 -#define GT96100_SDMA_G1_CHAN3_CONFIG 0x130900 -#define GT96100_SDMA_G1_CHAN3_COMM 0x130908 -#define GT96100_SDMA_G1_CHAN3_RX_DESC_BASE 0x138900 -#define GT96100_SDMA_G1_CHAN3_CURR_RX_DESC_PTR 0x138910 -#define GT96100_SDMA_G1_CHAN3_TX_DESC_BASE 0x13C900 -#define GT96100_SDMA_G1_CHAN3_CURR_TX_DESC_PTR 0x13C910 -#define GT96100_SDMA_G1_CHAN3_1ST_TX_DESC_PTR 0x13C914 -#define GT96100_SDMA_G1_CHAN4_CONFIG 0x140900 -#define GT96100_SDMA_G1_CHAN4_COMM 0x140908 -#define GT96100_SDMA_G1_CHAN4_RX_DESC_BASE 0x148900 -#define GT96100_SDMA_G1_CHAN4_CURR_RX_DESC_PTR 0x148910 -#define GT96100_SDMA_G1_CHAN4_TX_DESC_BASE 0x14C900 -#define GT96100_SDMA_G1_CHAN4_CURR_TX_DESC_PTR 0x14C910 -#define GT96100_SDMA_G1_CHAN4_1ST_TX_DESC_PTR 0x14C914 -#define GT96100_SDMA_G1_CHAN5_CONFIG 0x150900 -#define GT96100_SDMA_G1_CHAN5_COMM 0x150908 -#define GT96100_SDMA_G1_CHAN5_RX_DESC_BASE 0x158900 -#define GT96100_SDMA_G1_CHAN5_CURR_RX_DESC_PTR 0x158910 -#define GT96100_SDMA_G1_CHAN5_TX_DESC_BASE 0x15C900 -#define GT96100_SDMA_G1_CHAN5_CURR_TX_DESC_PTR 0x15C910 -#define GT96100_SDMA_G1_CHAN5_1ST_TX_DESC_PTR 0x15C914 -#define GT96100_SDMA_G1_CHAN6_CONFIG 0x160900 -#define GT96100_SDMA_G1_CHAN6_COMM 0x160908 -#define GT96100_SDMA_G1_CHAN6_RX_DESC_BASE 0x168900 -#define GT96100_SDMA_G1_CHAN6_CURR_RX_DESC_PTR 0x168910 -#define GT96100_SDMA_G1_CHAN6_TX_DESC_BASE 0x16C900 -#define GT96100_SDMA_G1_CHAN6_CURR_TX_DESC_PTR 0x16C910 -#define GT96100_SDMA_G1_CHAN6_1ST_TX_DESC_PTR 0x16C914 -#define GT96100_SDMA_G1_CHAN7_CONFIG 0x170900 -#define GT96100_SDMA_G1_CHAN7_COMM 0x170908 -#define GT96100_SDMA_G1_CHAN7_RX_DESC_BASE 0x178900 -#define GT96100_SDMA_G1_CHAN7_CURR_RX_DESC_PTR 0x178910 -#define GT96100_SDMA_G1_CHAN7_TX_DESC_BASE 0x17C900 -#define GT96100_SDMA_G1_CHAN7_CURR_TX_DESC_PTR 0x17C910 -#define GT96100_SDMA_G1_CHAN7_1ST_TX_DESC_PTR 0x17C914 -/* MPSCs */ -#define GT96100_MPSC0_MAIN_CONFIG_LOW 0x000A00 -#define GT96100_MPSC0_MAIN_CONFIG_HIGH 0x000A04 -#define GT96100_MPSC0_PROTOCOL_CONFIG 0x000A08 -#define GT96100_MPSC_CHAN0_REG1 0x000A0C -#define GT96100_MPSC_CHAN0_REG2 0x000A10 -#define GT96100_MPSC_CHAN0_REG3 0x000A14 -#define GT96100_MPSC_CHAN0_REG4 0x000A18 -#define GT96100_MPSC_CHAN0_REG5 0x000A1C -#define GT96100_MPSC_CHAN0_REG6 0x000A20 -#define GT96100_MPSC_CHAN0_REG7 0x000A24 -#define GT96100_MPSC_CHAN0_REG8 0x000A28 -#define GT96100_MPSC_CHAN0_REG9 0x000A2C -#define GT96100_MPSC_CHAN0_REG10 0x000A30 -#define GT96100_MPSC_CHAN0_REG11 0x000A34 -#define GT96100_MPSC1_MAIN_CONFIG_LOW 0x008A00 -#define GT96100_MPSC1_MAIN_CONFIG_HIGH 0x008A04 -#define GT96100_MPSC1_PROTOCOL_CONFIG 0x008A08 -#define GT96100_MPSC_CHAN1_REG1 0x008A0C -#define GT96100_MPSC_CHAN1_REG2 0x008A10 -#define GT96100_MPSC_CHAN1_REG3 0x008A14 -#define GT96100_MPSC_CHAN1_REG4 0x008A18 -#define GT96100_MPSC_CHAN1_REG5 0x008A1C -#define GT96100_MPSC_CHAN1_REG6 0x008A20 -#define GT96100_MPSC_CHAN1_REG7 0x008A24 -#define GT96100_MPSC_CHAN1_REG8 0x008A28 -#define GT96100_MPSC_CHAN1_REG9 0x008A2C -#define GT96100_MPSC_CHAN1_REG10 0x008A30 -#define GT96100_MPSC_CHAN1_REG11 0x008A34 -#define GT96100_MPSC2_MAIN_CONFIG_LOW 0x010A00 -#define GT96100_MPSC2_MAIN_CONFIG_HIGH 0x010A04 -#define GT96100_MPSC2_PROTOCOL_CONFIG 0x010A08 -#define GT96100_MPSC_CHAN2_REG1 0x010A0C -#define GT96100_MPSC_CHAN2_REG2 0x010A10 -#define GT96100_MPSC_CHAN2_REG3 0x010A14 -#define GT96100_MPSC_CHAN2_REG4 0x010A18 -#define GT96100_MPSC_CHAN2_REG5 0x010A1C -#define GT96100_MPSC_CHAN2_REG6 0x010A20 -#define GT96100_MPSC_CHAN2_REG7 0x010A24 -#define GT96100_MPSC_CHAN2_REG8 0x010A28 -#define GT96100_MPSC_CHAN2_REG9 0x010A2C -#define GT96100_MPSC_CHAN2_REG10 0x010A30 -#define GT96100_MPSC_CHAN2_REG11 0x010A34 -#define GT96100_MPSC3_MAIN_CONFIG_LOW 0x018A00 -#define GT96100_MPSC3_MAIN_CONFIG_HIGH 0x018A04 -#define GT96100_MPSC3_PROTOCOL_CONFIG 0x018A08 -#define GT96100_MPSC_CHAN3_REG1 0x018A0C -#define GT96100_MPSC_CHAN3_REG2 0x018A10 -#define GT96100_MPSC_CHAN3_REG3 0x018A14 -#define GT96100_MPSC_CHAN3_REG4 0x018A18 -#define GT96100_MPSC_CHAN3_REG5 0x018A1C -#define GT96100_MPSC_CHAN3_REG6 0x018A20 -#define GT96100_MPSC_CHAN3_REG7 0x018A24 -#define GT96100_MPSC_CHAN3_REG8 0x018A28 -#define GT96100_MPSC_CHAN3_REG9 0x018A2C -#define GT96100_MPSC_CHAN3_REG10 0x018A30 -#define GT96100_MPSC_CHAN3_REG11 0x018A34 -#define GT96100_MPSC4_MAIN_CONFIG_LOW 0x020A00 -#define GT96100_MPSC4_MAIN_CONFIG_HIGH 0x020A04 -#define GT96100_MPSC4_PROTOCOL_CONFIG 0x020A08 -#define GT96100_MPSC_CHAN4_REG1 0x020A0C -#define GT96100_MPSC_CHAN4_REG2 0x020A10 -#define GT96100_MPSC_CHAN4_REG3 0x020A14 -#define GT96100_MPSC_CHAN4_REG4 0x020A18 -#define GT96100_MPSC_CHAN4_REG5 0x020A1C -#define GT96100_MPSC_CHAN4_REG6 0x020A20 -#define GT96100_MPSC_CHAN4_REG7 0x020A24 -#define GT96100_MPSC_CHAN4_REG8 0x020A28 -#define GT96100_MPSC_CHAN4_REG9 0x020A2C -#define GT96100_MPSC_CHAN4_REG10 0x020A30 -#define GT96100_MPSC_CHAN4_REG11 0x020A34 -#define GT96100_MPSC5_MAIN_CONFIG_LOW 0x028A00 -#define GT96100_MPSC5_MAIN_CONFIG_HIGH 0x028A04 -#define GT96100_MPSC5_PROTOCOL_CONFIG 0x028A08 -#define GT96100_MPSC_CHAN5_REG1 0x028A0C -#define GT96100_MPSC_CHAN5_REG2 0x028A10 -#define GT96100_MPSC_CHAN5_REG3 0x028A14 -#define GT96100_MPSC_CHAN5_REG4 0x028A18 -#define GT96100_MPSC_CHAN5_REG5 0x028A1C -#define GT96100_MPSC_CHAN5_REG6 0x028A20 -#define GT96100_MPSC_CHAN5_REG7 0x028A24 -#define GT96100_MPSC_CHAN5_REG8 0x028A28 -#define GT96100_MPSC_CHAN5_REG9 0x028A2C -#define GT96100_MPSC_CHAN5_REG10 0x028A30 -#define GT96100_MPSC_CHAN5_REG11 0x028A34 -#define GT96100_MPSC6_MAIN_CONFIG_LOW 0x030A00 -#define GT96100_MPSC6_MAIN_CONFIG_HIGH 0x030A04 -#define GT96100_MPSC6_PROTOCOL_CONFIG 0x030A08 -#define GT96100_MPSC_CHAN6_REG1 0x030A0C -#define GT96100_MPSC_CHAN6_REG2 0x030A10 -#define GT96100_MPSC_CHAN6_REG3 0x030A14 -#define GT96100_MPSC_CHAN6_REG4 0x030A18 -#define GT96100_MPSC_CHAN6_REG5 0x030A1C -#define GT96100_MPSC_CHAN6_REG6 0x030A20 -#define GT96100_MPSC_CHAN6_REG7 0x030A24 -#define GT96100_MPSC_CHAN6_REG8 0x030A28 -#define GT96100_MPSC_CHAN6_REG9 0x030A2C -#define GT96100_MPSC_CHAN6_REG10 0x030A30 -#define GT96100_MPSC_CHAN6_REG11 0x030A34 -#define GT96100_MPSC7_MAIN_CONFIG_LOW 0x038A00 -#define GT96100_MPSC7_MAIN_CONFIG_HIGH 0x038A04 -#define GT96100_MPSC7_PROTOCOL_CONFIG 0x038A08 -#define GT96100_MPSC_CHAN7_REG1 0x038A0C -#define GT96100_MPSC_CHAN7_REG2 0x038A10 -#define GT96100_MPSC_CHAN7_REG3 0x038A14 -#define GT96100_MPSC_CHAN7_REG4 0x038A18 -#define GT96100_MPSC_CHAN7_REG5 0x038A1C -#define GT96100_MPSC_CHAN7_REG6 0x038A20 -#define GT96100_MPSC_CHAN7_REG7 0x038A24 -#define GT96100_MPSC_CHAN7_REG8 0x038A28 -#define GT96100_MPSC_CHAN7_REG9 0x038A2C -#define GT96100_MPSC_CHAN7_REG10 0x038A30 -#define GT96100_MPSC_CHAN7_REG11 0x038A34 -/* FlexTDMs */ -/* TDPR0 - Transmit Dual Port RAM. block size 0xff */ -#define GT96100_FXTDM0_TDPR0_BLK0_BASE 0x000B00 -#define GT96100_FXTDM0_TDPR0_BLK1_BASE 0x001B00 -#define GT96100_FXTDM0_TDPR0_BLK2_BASE 0x002B00 -#define GT96100_FXTDM0_TDPR0_BLK3_BASE 0x003B00 -/* RDPR0 - Receive Dual Port RAM. block size 0xff */ -#define GT96100_FXTDM0_RDPR0_BLK0_BASE 0x004B00 -#define GT96100_FXTDM0_RDPR0_BLK1_BASE 0x005B00 -#define GT96100_FXTDM0_RDPR0_BLK2_BASE 0x006B00 -#define GT96100_FXTDM0_RDPR0_BLK3_BASE 0x007B00 -#define GT96100_FXTDM0_TX_READ_PTR 0x008B00 -#define GT96100_FXTDM0_RX_READ_PTR 0x008B04 -#define GT96100_FXTDM0_CONFIG 0x008B08 -#define GT96100_FXTDM0_AUX_CHANA_TX 0x008B0C -#define GT96100_FXTDM0_AUX_CHANA_RX 0x008B10 -#define GT96100_FXTDM0_AUX_CHANB_TX 0x008B14 -#define GT96100_FXTDM0_AUX_CHANB_RX 0x008B18 -#define GT96100_FXTDM1_TDPR1_BLK0_BASE 0x010B00 -#define GT96100_FXTDM1_TDPR1_BLK1_BASE 0x011B00 -#define GT96100_FXTDM1_TDPR1_BLK2_BASE 0x012B00 -#define GT96100_FXTDM1_TDPR1_BLK3_BASE 0x013B00 -#define GT96100_FXTDM1_RDPR1_BLK0_BASE 0x014B00 -#define GT96100_FXTDM1_RDPR1_BLK1_BASE 0x015B00 -#define GT96100_FXTDM1_RDPR1_BLK2_BASE 0x016B00 -#define GT96100_FXTDM1_RDPR1_BLK3_BASE 0x017B00 -#define GT96100_FXTDM1_TX_READ_PTR 0x018B00 -#define GT96100_FXTDM1_RX_READ_PTR 0x018B04 -#define GT96100_FXTDM1_CONFIG 0x018B08 -#define GT96100_FXTDM1_AUX_CHANA_TX 0x018B0C -#define GT96100_FXTDM1_AUX_CHANA_RX 0x018B10 -#define GT96100_FLTDM1_AUX_CHANB_TX 0x018B14 -#define GT96100_FLTDM1_AUX_CHANB_RX 0x018B18 -#define GT96100_FLTDM2_TDPR2_BLK0_BASE 0x020B00 -#define GT96100_FLTDM2_TDPR2_BLK1_BASE 0x021B00 -#define GT96100_FLTDM2_TDPR2_BLK2_BASE 0x022B00 -#define GT96100_FLTDM2_TDPR2_BLK3_BASE 0x023B00 -#define GT96100_FLTDM2_RDPR2_BLK0_BASE 0x024B00 -#define GT96100_FLTDM2_RDPR2_BLK1_BASE 0x025B00 -#define GT96100_FLTDM2_RDPR2_BLK2_BASE 0x026B00 -#define GT96100_FLTDM2_RDPR2_BLK3_BASE 0x027B00 -#define GT96100_FLTDM2_TX_READ_PTR 0x028B00 -#define GT96100_FLTDM2_RX_READ_PTR 0x028B04 -#define GT96100_FLTDM2_CONFIG 0x028B08 -#define GT96100_FLTDM2_AUX_CHANA_TX 0x028B0C -#define GT96100_FLTDM2_AUX_CHANA_RX 0x028B10 -#define GT96100_FLTDM2_AUX_CHANB_TX 0x028B14 -#define GT96100_FLTDM2_AUX_CHANB_RX 0x028B18 -#define GT96100_FLTDM3_TDPR3_BLK0_BASE 0x030B00 -#define GT96100_FLTDM3_TDPR3_BLK1_BASE 0x031B00 -#define GT96100_FLTDM3_TDPR3_BLK2_BASE 0x032B00 -#define GT96100_FLTDM3_TDPR3_BLK3_BASE 0x033B00 -#define GT96100_FXTDM3_RDPR3_BLK0_BASE 0x034B00 -#define GT96100_FXTDM3_RDPR3_BLK1_BASE 0x035B00 -#define GT96100_FXTDM3_RDPR3_BLK2_BASE 0x036B00 -#define GT96100_FXTDM3_RDPR3_BLK3_BASE 0x037B00 -#define GT96100_FXTDM3_TX_READ_PTR 0x038B00 -#define GT96100_FXTDM3_RX_READ_PTR 0x038B04 -#define GT96100_FXTDM3_CONFIG 0x038B08 -#define GT96100_FXTDM3_AUX_CHANA_TX 0x038B0C -#define GT96100_FXTDM3_AUX_CHANA_RX 0x038B10 -#define GT96100_FXTDM3_AUX_CHANB_TX 0x038B14 -#define GT96100_FXTDM3_AUX_CHANB_RX 0x038B18 -/* Baud Rate Generators */ -#define GT96100_BRG0_CONFIG 0x102A00 -#define GT96100_BRG0_BAUD_TUNE 0x102A04 -#define GT96100_BRG1_CONFIG 0x102A08 -#define GT96100_BRG1_BAUD_TUNE 0x102A0C -#define GT96100_BRG2_CONFIG 0x102A10 -#define GT96100_BRG2_BAUD_TUNE 0x102A14 -#define GT96100_BRG3_CONFIG 0x102A18 -#define GT96100_BRG3_BAUD_TUNE 0x102A1C -#define GT96100_BRG4_CONFIG 0x102A20 -#define GT96100_BRG4_BAUD_TUNE 0x102A24 -#define GT96100_BRG5_CONFIG 0x102A28 -#define GT96100_BRG5_BAUD_TUNE 0x102A2C -#define GT96100_BRG6_CONFIG 0x102A30 -#define GT96100_BRG6_BAUD_TUNE 0x102A34 -#define GT96100_BRG7_CONFIG 0x102A38 -#define GT96100_BRG7_BAUD_TUNE 0x102A3C -/* Routing Registers */ -#define GT96100_ROUTE_MAIN 0x101A00 -#define GT96100_ROUTE_RX_CLOCK 0x101A10 -#define GT96100_ROUTE_TX_CLOCK 0x101A20 -/* General Purpose Ports */ -#define GT96100_GPP_CONFIG0 0x100A00 -#define GT96100_GPP_CONFIG1 0x100A04 -#define GT96100_GPP_CONFIG2 0x100A08 -#define GT96100_GPP_CONFIG3 0x100A0C -#define GT96100_GPP_IO0 0x100A20 -#define GT96100_GPP_IO1 0x100A24 -#define GT96100_GPP_IO2 0x100A28 -#define GT96100_GPP_IO3 0x100A2C -#define GT96100_GPP_DATA0 0x100A40 -#define GT96100_GPP_DATA1 0x100A44 -#define GT96100_GPP_DATA2 0x100A48 -#define GT96100_GPP_DATA3 0x100A4C -#define GT96100_GPP_LEVEL0 0x100A60 -#define GT96100_GPP_LEVEL1 0x100A64 -#define GT96100_GPP_LEVEL2 0x100A68 -#define GT96100_GPP_LEVEL3 0x100A6C -/* Watchdog */ -#define GT96100_WD_CONFIG 0x101A80 -#define GT96100_WD_VALUE 0x101A84 -/* Communication Unit Arbiter */ -#define GT96100_COMM_UNIT_ARBTR_CONFIG 0x101AC0 -/* PCI Arbiters */ -#define GT96100_PCI0_ARBTR_CONFIG 0x101AE0 -#define GT96100_PCI1_ARBTR_CONFIG 0x101AE4 -/* CIU Arbiter */ -#define GT96100_CIU_ARBITER_CONFIG 0x101AC0 -/* Interrupt Controller */ -#define GT96100_MAIN_CAUSE 0x000C18 -#define GT96100_INT0_MAIN_MASK 0x000C1C -#define GT96100_INT1_MAIN_MASK 0x000C24 -#define GT96100_HIGH_CAUSE 0x000C98 -#define GT96100_INT0_HIGH_MASK 0x000C9C -#define GT96100_INT1_HIGH_MASK 0x000CA4 -#define GT96100_INT0_SELECT 0x000C70 -#define GT96100_INT1_SELECT 0x000C74 -#define GT96100_SERIAL_CAUSE 0x103A00 -#define GT96100_SERINT0_MASK 0x103A80 -#define GT96100_SERINT1_MASK 0x103A88 - -#endif /* _GT96100_H */ diff --git a/include/asm-mips/mach-ev96100/mach-gt64120.h b/include/asm-mips/mach-ev96100/mach-gt64120.h deleted file mode 100644 index 0ef1e6c25acf..000000000000 --- a/include/asm-mips/mach-ev96100/mach-gt64120.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * This is a direct copy of the ev96100.h file, with a global - * search and replace. The numbers are the same. - * - * The reason I'm duplicating this is so that the 64120/96100 - * defines won't be confusing in the source code. - */ -#ifndef _ASM_GT64120_EV96100_GT64120_DEP_H -#define _ASM_GT64120_EV96100_GT64120_DEP_H - -/* - * GT96100 config space base address - */ -#define GT64120_BASE (KSEG1ADDR(0x14000000)) - -/* - * PCI Bus allocation - * - * (Guessing ...) - */ -#define GT_PCI_MEM_BASE 0x12000000UL -#define GT_PCI_MEM_SIZE 0x02000000UL -#define GT_PCI_IO_BASE 0x10000000UL -#define GT_PCI_IO_SIZE 0x02000000UL -#define GT_ISA_IO_BASE PCI_IO_BASE - -/* - * Duart I/O ports. - */ -#define EV96100_COM1_BASE_ADDR (0xBD000000 + 0x20) -#define EV96100_COM2_BASE_ADDR (0xBD000000 + 0x00) - - -/* - * EV96100 interrupt controller register base. - */ -#define EV96100_ICTRL_REGS_BASE (KSEG1ADDR(0x1f000000)) - -/* - * EV96100 UART register base. - */ -#define EV96100_UART0_REGS_BASE EV96100_COM1_BASE_ADDR -#define EV96100_UART1_REGS_BASE EV96100_COM2_BASE_ADDR -#define EV96100_BASE_BAUD ( 3686400 / 16 ) - -#endif /* _ASM_GT64120_EV96100_GT64120_DEP_H */ diff --git a/include/asm-mips/serial.h b/include/asm-mips/serial.h index 584bd9c0ab2e..035637c67e7c 100644 --- a/include/asm-mips/serial.h +++ b/include/asm-mips/serial.h @@ -52,9 +52,9 @@ #endif /* - * Both Galileo boards have the same UART mappings. + * Galileo EV64120 evaluation board */ -#if defined (CONFIG_MIPS_EV96100) || defined (CONFIG_MIPS_EV64120) +#ifdef CONFIG_MIPS_EV64120 #include #include #define EV96100_SERIAL_PORT_DEFNS \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6a1e09834559..5c1c698a92ac 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1482,9 +1482,6 @@ #define PCI_DEVICE_ID_MARVELL_GT64260 0x6430 #define PCI_DEVICE_ID_MARVELL_MV64360 0x6460 #define PCI_DEVICE_ID_MARVELL_MV64460 0x6480 -#define PCI_DEVICE_ID_MARVELL_GT96100 0x9652 -#define PCI_DEVICE_ID_MARVELL_GT96100A 0x9653 - #define PCI_VENDOR_ID_V3 0x11b0 #define PCI_DEVICE_ID_V3_V960 0x0001 -- cgit v1.2.3-71-gd317 From ae6ddcc5f24d6b06ae9231dc128904750a4155e0 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Wed, 27 Sep 2006 01:49:27 -0700 Subject: [PATCH] ext3 and jbd cleanup: remove whitespace Remove whitespace from ext3 and jbd, before we clone ext4. Signed-off-by: Mingming Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/balloc.c | 16 +++--- fs/ext3/bitmap.c | 2 +- fs/ext3/dir.c | 14 +++--- fs/ext3/file.c | 2 +- fs/ext3/fsync.c | 6 +-- fs/ext3/hash.c | 6 +-- fs/ext3/ialloc.c | 48 +++++++++--------- fs/ext3/inode.c | 64 ++++++++++++------------ fs/ext3/namei.c | 28 +++++------ fs/ext3/super.c | 24 ++++----- fs/jbd/checkpoint.c | 30 +++++------ fs/jbd/journal.c | 56 ++++++++++----------- fs/jbd/recovery.c | 54 ++++++++++---------- fs/jbd/revoke.c | 70 +++++++++++++------------- fs/jbd/transaction.c | 128 +++++++++++++++++++++++------------------------ include/linux/ext3_jbd.h | 10 ++-- include/linux/jbd.h | 56 ++++++++++----------- 17 files changed, 307 insertions(+), 307 deletions(-) (limited to 'include/linux') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 063d994bda0b..e6b983707008 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -74,7 +74,7 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, } /* - * Read the bitmap for a given block_group, reading into the specified + * Read the bitmap for a given block_group, reading into the specified * slot in the superblock's bitmap cache. * * Return buffer_head on success or NULL in case of failure. @@ -419,8 +419,8 @@ do_more: } /* @@@ This prevents newly-allocated data from being * freed and then reallocated within the same - * transaction. - * + * transaction. + * * Ideally we would want to allow that to happen, but to * do so requires making journal_forget() capable of * revoking the queued write of a data block, which @@ -433,7 +433,7 @@ do_more: * safe not to set the allocation bit in the committed * bitmap, because we know that there is no outstanding * activity on the buffer any more and so it is safe to - * reallocate it. + * reallocate it. */ BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); J_ASSERT_BH(bitmap_bh, @@ -518,7 +518,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode, * data would allow the old block to be overwritten before the * transaction committed (because we force data to disk before commit). * This would lead to corruption if we crashed between overwriting the - * data and committing the delete. + * data and committing the delete. * * @@@ We may want to make this allocation behaviour conditional on * data-writes at some point, and disable it for metadata allocations or @@ -584,7 +584,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, if (start > 0) { /* - * The goal was occupied; search forward for a free + * The goal was occupied; search forward for a free * block within the next XX blocks. * * end_goal is more or less random, but it has to be @@ -1194,7 +1194,7 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries) /* * ext3_new_block uses a goal block to assist allocation. If the goal is * free, or there is a free block within 32 blocks of the goal, that block - * is allocated. Otherwise a forward search is made for a free block; within + * is allocated. Otherwise a forward search is made for a free block; within * each block group the search first looks for an entire free byte in the block * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. @@ -1303,7 +1303,7 @@ retry_alloc: smp_rmb(); /* - * Now search the rest of the groups. We assume that + * Now search the rest of the groups. We assume that * i and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c index ce4f82b9e528..b9176eed98d1 100644 --- a/fs/ext3/bitmap.c +++ b/fs/ext3/bitmap.c @@ -20,7 +20,7 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) unsigned int i; unsigned long sum = 0; - if (!map) + if (!map) return (0); for (i = 0; i < numchars; i++) sum += nibblemap[map->b_data[i] & 0xf] + diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index fbb0d4ed07d4..6f9e5a523c87 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -59,7 +59,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) return (ext3_filetype_table[filetype]); } - + int ext3_check_dir_entry (const char * function, struct inode * dir, struct ext3_dir_entry_2 * de, @@ -162,7 +162,7 @@ revalidate: * to make sure. */ if (filp->f_version != inode->i_version) { for (i = 0; i < sb->s_blocksize && i < offset; ) { - de = (struct ext3_dir_entry_2 *) + de = (struct ext3_dir_entry_2 *) (bh->b_data + i); /* It's too expensive to do a full * dirent test each time round this @@ -181,7 +181,7 @@ revalidate: filp->f_version = inode->i_version; } - while (!error && filp->f_pos < inode->i_size + while (!error && filp->f_pos < inode->i_size && offset < sb->s_blocksize) { de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); if (!ext3_check_dir_entry ("ext3_readdir", inode, de, @@ -229,7 +229,7 @@ out: /* * These functions convert from the major/minor hash to an f_pos * value. - * + * * Currently we only use major hash numer. This is unfortunate, but * on 32-bit machines, the same VFS interface is used for lseek and * llseek, so if we use the 64 bit offset, then the 32-bit versions of @@ -250,7 +250,7 @@ out: struct fname { __u32 hash; __u32 minor_hash; - struct rb_node rb_hash; + struct rb_node rb_hash; struct fname *next; __u32 inode; __u8 name_len; @@ -410,7 +410,7 @@ static int call_filldir(struct file * filp, void * dirent, curr_pos = hash2pos(fname->hash, fname->minor_hash); while (fname) { error = filldir(dirent, fname->name, - fname->name_len, curr_pos, + fname->name_len, curr_pos, fname->inode, get_dtype(sb, fname->file_type)); if (error) { @@ -465,7 +465,7 @@ static int ext3_dx_readdir(struct file * filp, /* * Fill the rbtree if we have no more entries, * or the inode has changed since we last read in the - * cached entries. + * cached entries. */ if ((!info->curr_node) || (filp->f_version != inode->i_version)) { diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 1efefb630ea9..994efd189f4e 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -100,7 +100,7 @@ ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t force_commit: err = ext3_force_commit(inode->i_sb); - if (err) + if (err) return err; return ret; } diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 49382a208e05..dd1fd3c0fc05 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -8,14 +8,14 @@ * Universite Pierre et Marie Curie (Paris VI) * from * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * + * * ext3fs fsync primitive * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 - * + * * Removed unnecessary code duplication for little endian machines - * and excessive __inline__s. + * and excessive __inline__s. * Andi Kleen, 1997 * * Major simplications and cleanup - we only need to do the metadata, because diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index 5a2d1235ead0..7fa637cd322a 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c @@ -4,7 +4,7 @@ * Copyright (C) 2002 by Theodore Ts'o * * This file is released under the GPL v2. - * + * * This file may be redistributed under the terms of the GNU Public * License. */ @@ -80,11 +80,11 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) * Returns the hash of a filename. If len is 0 and name is NULL, then * this function can be used to test whether or not a hash version is * supported. - * + * * The seed is an 4 longword (32 bits) "secret" which can be used to * uniquify a hash. If the seed is all zero's, then some default seed * may be used. - * + * * A particular hash version specifies whether or not the seed is * represented, and whether or not the returned hash is 32 bits or 64 * bits. 32 bit hashes will return 0 for the minor hash. diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 36546ed36a14..5e288368499b 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -216,7 +216,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) continue; - if (!best_desc || + if (!best_desc || (le16_to_cpu(desc->bg_free_blocks_count) > le16_to_cpu(best_desc->bg_free_blocks_count))) { best_group = group; @@ -226,30 +226,30 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) return best_group; } -/* - * Orlov's allocator for directories. - * +/* + * Orlov's allocator for directories. + * * We always try to spread first-level directories. * - * If there are blockgroups with both free inodes and free blocks counts - * not worse than average we return one with smallest directory count. - * Otherwise we simply return a random group. - * - * For the rest rules look so: - * - * It's OK to put directory into a group unless - * it has too many directories already (max_dirs) or - * it has too few free inodes left (min_inodes) or - * it has too few free blocks left (min_blocks) or - * it's already running too large debt (max_debt). - * Parent's group is prefered, if it doesn't satisfy these - * conditions we search cyclically through the rest. If none - * of the groups look good we just look for a group with more - * free inodes than average (starting at parent's group). - * - * Debt is incremented each time we allocate a directory and decremented - * when we allocate an inode, within 0--255. - */ + * If there are blockgroups with both free inodes and free blocks counts + * not worse than average we return one with smallest directory count. + * Otherwise we simply return a random group. + * + * For the rest rules look so: + * + * It's OK to put directory into a group unless + * it has too many directories already (max_dirs) or + * it has too few free inodes left (min_inodes) or + * it has too few free blocks left (min_blocks) or + * it's already running too large debt (max_debt). + * Parent's group is prefered, if it doesn't satisfy these + * conditions we search cyclically through the rest. If none + * of the groups look good we just look for a group with more + * free inodes than average (starting at parent's group). + * + * Debt is incremented each time we allocate a directory and decremented + * when we allocate an inode, within 0--255. + */ #define INODE_COST 64 #define BLOCK_COST 256 @@ -454,7 +454,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) group = find_group_dir(sb, dir); else group = find_group_orlov(sb, dir); - } else + } else group = find_group_other(sb, dir); err = -ENOSPC; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 84be02e93652..473d206b1d7e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -55,7 +55,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode) /* * The ext3 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. + * revoked in all cases. * * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record @@ -105,7 +105,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, * Work out how many blocks we need to proceed with the next chunk of a * truncate transaction. */ -static unsigned long blocks_for_truncate(struct inode *inode) +static unsigned long blocks_for_truncate(struct inode *inode) { unsigned long needed; @@ -122,13 +122,13 @@ static unsigned long blocks_for_truncate(struct inode *inode) /* But we need to bound the transaction so we don't overflow the * journal. */ - if (needed > EXT3_MAX_TRANS_DATA) + if (needed > EXT3_MAX_TRANS_DATA) needed = EXT3_MAX_TRANS_DATA; return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; } -/* +/* * Truncate transactions can be complex and absolutely huge. So we need to * be able to restart the transaction at a conventient checkpoint to make * sure we don't overflow the journal. @@ -136,9 +136,9 @@ static unsigned long blocks_for_truncate(struct inode *inode) * start_transaction gets us a new handle for a truncate transaction, * and extend_transaction tries to extend the existing one a bit. If * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct + * transaction in the top-level truncate loop. --sct */ -static handle_t *start_transaction(struct inode *inode) +static handle_t *start_transaction(struct inode *inode) { handle_t *result; @@ -215,12 +215,12 @@ void ext3_delete_inode (struct inode * inode) ext3_orphan_del(handle, inode); EXT3_I(inode)->i_dtime = get_seconds(); - /* + /* * One subtle ordering requirement: if anything has gone wrong * (transaction abort, IO errors, whatever), then we can still * do these next steps (the fs will already have been marked as * having errors), but we can't free the inode if the mark_dirty - * fails. + * fails. */ if (ext3_mark_inode_dirty(handle, inode)) /* If that failed, just do the required in-core inode clear. */ @@ -398,7 +398,7 @@ no_block: * + if there is a block to the left of our position - allocate near it. * + if pointer will live in indirect block - allocate near that block. * + if pointer will live in inode - allocate in the same - * cylinder group. + * cylinder group. * * In the latter case we colour the starting block by the callers PID to * prevent it from clashing with concurrent allocations for a different inode @@ -744,7 +744,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, jbd_debug(5, "splicing indirect only\n"); BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); err = ext3_journal_dirty_metadata(handle, where->bh); - if (err) + if (err) goto err_out; } else { /* @@ -1137,7 +1137,7 @@ static int walk_page_buffers( handle_t *handle, * So what we do is to rely on the fact that journal_stop/journal_start * will _not_ run commit under these circumstances because handle->h_ref * is elevated. We'll still have enough credits for the tiny quotafile - * write. + * write. */ static int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh) @@ -1282,7 +1282,7 @@ static int ext3_journalled_commit_write(struct file *file, if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; ret2 = ext3_mark_inode_dirty(handle, inode); - if (!ret) + if (!ret) ret = ret2; } ret2 = ext3_journal_stop(handle); @@ -1291,7 +1291,7 @@ static int ext3_journalled_commit_write(struct file *file, return ret; } -/* +/* * bmap() is special. It gets used by applications such as lilo and by * the swapper to find the on-disk block of a specific piece of data. * @@ -1300,10 +1300,10 @@ static int ext3_journalled_commit_write(struct file *file, * filesystem and enables swap, then they may get a nasty shock when the * data getting swapped to that swapfile suddenly gets overwritten by * the original zero's written out previously to the journal and - * awaiting writeback in the kernel's buffer cache. + * awaiting writeback in the kernel's buffer cache. * * So, if we see any bmap calls here on a modified, data-journaled file, - * take extra steps to flush any blocks which might be in the cache. + * take extra steps to flush any blocks which might be in the cache. */ static sector_t ext3_bmap(struct address_space *mapping, sector_t block) { @@ -1312,16 +1312,16 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block) int err; if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { - /* + /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: * only if we run lilo or swapon on a freshly made file - * do we expect this to happen. + * do we expect this to happen. * * (bmap requires CAP_SYS_RAWIO so this does not * represent an unprivileged user DOS attack --- we'd be * in trouble if mortal users could trigger this path at - * will.) + * will.) * * NB. EXT3_STATE_JDATA is not set on files other than * regular files. If somebody wants to bmap a directory @@ -1457,7 +1457,7 @@ static int ext3_ordered_writepage(struct page *page, */ /* - * And attach them to the current transaction. But only if + * And attach them to the current transaction. But only if * block_write_full_page() succeeded. Otherwise they are unmapped, * and generally junk. */ @@ -1644,7 +1644,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } } - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext3_get_block, NULL); @@ -2025,7 +2025,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode, __le32 *first, __le32 *last) { ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ - unsigned long count = 0; /* Number of blocks in the run */ + unsigned long count = 0; /* Number of blocks in the run */ __le32 *block_to_free_p = NULL; /* Pointer into inode/ind corresponding to block_to_free */ @@ -2054,7 +2054,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode, } else if (nr == block_to_free + count) { count++; } else { - ext3_clear_blocks(handle, inode, this_bh, + ext3_clear_blocks(handle, inode, this_bh, block_to_free, count, block_to_free_p, p); block_to_free = nr; @@ -2184,7 +2184,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, *p = 0; BUFFER_TRACE(parent_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, + ext3_journal_dirty_metadata(handle, parent_bh); } } @@ -2704,7 +2704,7 @@ void ext3_read_inode(struct inode * inode) if (raw_inode->i_block[0]) init_special_inode(inode, inode->i_mode, old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); - else + else init_special_inode(inode, inode->i_mode, new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } @@ -2724,8 +2724,8 @@ bad_inode: * * The caller must have write access to iloc->bh. */ -static int ext3_do_update_inode(handle_t *handle, - struct inode *inode, +static int ext3_do_update_inode(handle_t *handle, + struct inode *inode, struct ext3_iloc *iloc) { struct ext3_inode *raw_inode = ext3_raw_inode(iloc); @@ -2900,7 +2900,7 @@ int ext3_write_inode(struct inode *inode, int wait) * commit will leave the blocks being flushed in an unused state on * disk. (On recovery, the inode will get truncated and the blocks will * be freed, so we have a strong guarantee that no future commit will - * leave these blocks visible to the user.) + * leave these blocks visible to the user.) * * Called with inode->sem down. */ @@ -3043,13 +3043,13 @@ int ext3_mark_iloc_dirty(handle_t *handle, return err; } -/* +/* * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. + * iloc->bh. This _must_ be cleaned up later. */ int -ext3_reserve_inode_write(handle_t *handle, struct inode *inode, +ext3_reserve_inode_write(handle_t *handle, struct inode *inode, struct ext3_iloc *iloc) { int err = 0; @@ -3139,7 +3139,7 @@ out: } #if 0 -/* +/* * Bind an inode's backing buffer_head into this transaction, to prevent * it from being flushed to disk early. Unlike * ext3_reserve_inode_write, this leaves behind no bh reference and @@ -3157,7 +3157,7 @@ static int ext3_pin_inode(handle_t *handle, struct inode *inode) BUFFER_TRACE(iloc.bh, "get_write_access"); err = journal_get_write_access(handle, iloc.bh); if (!err) - err = ext3_journal_dirty_metadata(handle, + err = ext3_journal_dirty_metadata(handle, iloc.bh); brelse(iloc.bh); } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 2aa7101b27cd..4123f5261bcd 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -76,7 +76,7 @@ static struct buffer_head *ext3_append(handle_t *handle, #ifdef DX_DEBUG #define dxtrace(command) command #else -#define dxtrace(command) +#define dxtrace(command) #endif struct fake_dirent @@ -169,7 +169,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, - struct dx_frame *frames, + struct dx_frame *frames, __u32 *start_hash); static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, struct ext3_dir_entry_2 **res_dir, int *err); @@ -250,7 +250,7 @@ static void dx_show_index (char * label, struct dx_entry *entries) } struct stats -{ +{ unsigned names; unsigned space; unsigned bcount; @@ -464,7 +464,7 @@ static void dx_release (struct dx_frame *frames) */ static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, - struct dx_frame *frames, + struct dx_frame *frames, __u32 *start_hash) { struct dx_frame *p; @@ -632,7 +632,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, } count += ret; hashval = ~0; - ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, + ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, frame, frames, &hashval); *next_hash = hashval; if (ret < 0) { @@ -649,7 +649,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, break; } dx_release(frames); - dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", + dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", count, *next_hash)); return count; errout: @@ -1050,7 +1050,7 @@ struct dentry *ext3_get_parent(struct dentry *child) parent = ERR_PTR(-ENOMEM); } return parent; -} +} #define S_SHIFT 12 static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = { @@ -1198,7 +1198,7 @@ errout: * add_dirent_to_buf will attempt search the directory block for * space. It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. - * + * * NOTE! bh is NOT released in the case where ENOSPC is returned. In * all other cases bh is released. */ @@ -1572,7 +1572,7 @@ cleanup: * ext3_delete_entry deletes a directory entry by merging it with the * previous entry */ -static int ext3_delete_entry (handle_t *handle, +static int ext3_delete_entry (handle_t *handle, struct inode * dir, struct ext3_dir_entry_2 * de_del, struct buffer_head * bh) @@ -1643,12 +1643,12 @@ static int ext3_add_nondir(handle_t *handle, * is so far negative - it has no inode. * * If the create succeeds, we fill in the inode information - * with d_instantiate(). + * with d_instantiate(). */ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { - handle_t *handle; + handle_t *handle; struct inode * inode; int err, retries = 0; @@ -1813,7 +1813,7 @@ static int empty_dir (struct inode * inode) de1 = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); if (le32_to_cpu(de->inode) != inode->i_ino || - !le32_to_cpu(de1->inode) || + !le32_to_cpu(de1->inode) || strcmp (".", de->name) || strcmp ("..", de1->name)) { ext3_warning (inode->i_sb, "empty_dir", @@ -1883,7 +1883,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode) * being truncated, or files being unlinked. */ /* @@@ FIXME: Observation from aviro: - * I think I can trigger J_ASSERT in ext3_orphan_add(). We block + * I think I can trigger J_ASSERT in ext3_orphan_add(). We block * here (on lock_super()), so race with ext3_link() which might bump * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. @@ -2393,4 +2393,4 @@ struct inode_operations ext3_special_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext3_permission, -}; +}; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3559086eee5f..4b95bfe4c8f7 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -62,13 +62,13 @@ static void ext3_unlockfs(struct super_block *sb); static void ext3_write_super (struct super_block * sb); static void ext3_write_super_lockfs(struct super_block *sb); -/* +/* * Wrappers for journal_start/end. * * The only special thing we need to do here is to make sure that all * journal_end calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if - * appropriate. + * appropriate. */ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) { @@ -90,11 +90,11 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) return journal_start(journal, nblocks); } -/* +/* * The only special thing we need to do here is to make sure that all * journal_stop calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if - * appropriate. + * appropriate. */ int __ext3_journal_stop(const char *where, handle_t *handle) { @@ -369,7 +369,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) { struct list_head *l; - printk(KERN_ERR "sb orphan head is %d\n", + printk(KERN_ERR "sb orphan head is %d\n", le32_to_cpu(sbi->s_es->s_last_orphan)); printk(KERN_ERR "sb_info orphan list:\n"); @@ -378,7 +378,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) printk(KERN_ERR " " "inode %s:%ld at %p: mode %o, nlink %d, next %d\n", inode->i_sb->s_id, inode->i_ino, inode, - inode->i_mode, inode->i_nlink, + inode->i_mode, inode->i_nlink, NEXT_ORPHAN(inode)); } } @@ -475,7 +475,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) inode_init_once(&ei->vfs_inode); } } - + static int init_inodecache(void) { ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", @@ -1483,7 +1483,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) - printk(KERN_WARNING + printk(KERN_WARNING "EXT3-fs warning: feature flags set on rev 0 fs, " "running e2fsck is recommended\n"); /* @@ -1509,7 +1509,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (blocksize < EXT3_MIN_BLOCK_SIZE || blocksize > EXT3_MAX_BLOCK_SIZE) { - printk(KERN_ERR + printk(KERN_ERR "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", blocksize, sb->s_id); goto failed_mount; @@ -1533,14 +1533,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; bh = sb_bread(sb, logic_sb_block); if (!bh) { - printk(KERN_ERR + printk(KERN_ERR "EXT3-fs: Can't read superblock on 2nd try.\n"); goto failed_mount; } es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { - printk (KERN_ERR + printk (KERN_ERR "EXT3-fs: Magic mismatch, very weird !\n"); goto failed_mount; } @@ -1820,7 +1820,7 @@ out_fail: /* * Setup any per-fs journal parameters now. We'll do this both on * initial mount, once the journal has been initialised but before we've - * done any recovery; and again on any subsequent remount. + * done any recovery; and again on any subsequent remount. */ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index d0685596e5a6..961ada28db5e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -1,6 +1,6 @@ /* * linux/fs/checkpoint.c - * + * * Written by Stephen C. Tweedie , 1999 * * Copyright 1999 Red Hat Software --- All Rights Reserved @@ -9,8 +9,8 @@ * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. * - * Checkpoint routines for the generic filesystem journaling code. - * Part of the ext2fs journaling system. + * Checkpoint routines for the generic filesystem journaling code. + * Part of the ext2fs journaling system. * * Checkpointing is the process of ensuring that a section of the log is * committed fully to disk, so that that portion of the log can be @@ -226,7 +226,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. + * scan of the checkpoint list. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it @@ -270,7 +270,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to - * disk, as that would break recoverability. + * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); @@ -293,7 +293,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. - * + * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) @@ -304,10 +304,10 @@ int log_do_checkpoint(journal_t *journal) jbd_debug(1, "Start checkpoint\n"); - /* + /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the - * journal straight away. + * journal straight away. */ result = cleanup_journal_tail(journal); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); @@ -385,9 +385,9 @@ out: * we have already got rid of any since the last update of the log tail * in the journal superblock. If so, we can instantly roll the * superblock forward to remove those transactions from the log. - * + * * Return <0 on error, 0 on success, 1 if there was nothing to clean up. - * + * * Called with the journal lock held. * * This is the only part of the journaling code which really needs to be @@ -404,8 +404,8 @@ int cleanup_journal_tail(journal_t *journal) unsigned long blocknr, freed; /* OK, work out the oldest transaction remaining in the log, and - * the log block it starts at. - * + * the log block it starts at. + * * If the log is now empty, we need to work out which is the * next transaction ID we will write, and where it will * start. */ @@ -558,7 +558,7 @@ out: return ret; } -/* +/* * journal_remove_checkpoint: called after a buffer has been committed * to disk (either by being write-back flushed to disk, or being * committed to the log). @@ -636,7 +636,7 @@ out: * Called with the journal locked. * Called with j_list_lock held. */ -void __journal_insert_checkpoint(struct journal_head *jh, +void __journal_insert_checkpoint(struct journal_head *jh, transaction_t *transaction) { JBUFFER_TRACE(jh, "entry"); @@ -658,7 +658,7 @@ void __journal_insert_checkpoint(struct journal_head *jh, /* * We've finished with this transaction structure: adios... - * + * * The transaction must have no links except for the checkpoint by this * point. * diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f66724ce443a..87c5a6d00805 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) * this is a no-op. If needed, we can use j_blk_offset - everything is * ready. */ -int journal_bmap(journal_t *journal, unsigned long blocknr, +int journal_bmap(journal_t *journal, unsigned long blocknr, unsigned long *retp) { int err = 0; @@ -699,10 +699,10 @@ fail: * @len: Lenght of the journal in blocks. * @blocksize: blocksize of journalling device * @returns: a newly created journal_t * - * + * * journal_init_dev creates a journal which maps a fixed contiguous * range of blocks on an arbitrary block device. - * + * */ journal_t * journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, @@ -739,11 +739,11 @@ journal_t * journal_init_dev(struct block_device *bdev, return journal; } - -/** + +/** * journal_t * journal_init_inode () - creates a journal which maps to a inode. * @inode: An inode to create the journal in - * + * * journal_init_inode creates a journal which maps an on-disk inode as * the journal. The inode must exist already, must support bmap() and * must have all data blocks preallocated. @@ -763,7 +763,7 @@ journal_t * journal_init_inode (struct inode *inode) journal->j_inode = inode; jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", - journal, inode->i_sb->s_id, inode->i_ino, + journal, inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size, inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); @@ -798,10 +798,10 @@ journal_t * journal_init_inode (struct inode *inode) return journal; } -/* +/* * If the journal init or create aborts, we need to mark the journal * superblock as being NULL to prevent the journal destroy from writing - * back a bogus superblock. + * back a bogus superblock. */ static void journal_fail_superblock (journal_t *journal) { @@ -844,13 +844,13 @@ static int journal_reset(journal_t *journal) return 0; } -/** +/** * int journal_create() - Initialise the new journal file * @journal: Journal to create. This structure must have been initialised - * + * * Given a journal_t structure which tells us which disk blocks we can * use, create a new journal superblock and initialise all of the - * journal fields from scratch. + * journal fields from scratch. **/ int journal_create(journal_t *journal) { @@ -915,7 +915,7 @@ int journal_create(journal_t *journal) return journal_reset(journal); } -/** +/** * void journal_update_superblock() - Update journal sb on disk. * @journal: The journal to update. * @wait: Set to '0' if you don't want to wait for IO completion. @@ -939,7 +939,7 @@ void journal_update_superblock(journal_t *journal, int wait) journal->j_transaction_sequence) { jbd_debug(1,"JBD: Skipping superblock update on recovered sb " "(start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, + journal->j_tail, journal->j_tail_sequence, journal->j_errno); goto out; } @@ -1062,7 +1062,7 @@ static int load_superblock(journal_t *journal) /** * int journal_load() - Read journal from disk. * @journal: Journal to act on. - * + * * Given a journal_t structure which tells us which disk blocks contain * a journal, read the journal from disk to initialise the in-memory * structures. @@ -1172,9 +1172,9 @@ void journal_destroy(journal_t *journal) * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount * @incompat: bitmask of incompatible features - * + * * Check whether the journal uses all of a given set of - * features. Return true (non-zero) if it does. + * features. Return true (non-zero) if it does. **/ int journal_check_used_features (journal_t *journal, unsigned long compat, @@ -1203,7 +1203,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount * @incompat: bitmask of incompatible features - * + * * Check whether the journaling code supports the use of * all of a given set of features on this journal. Return true * (non-zero) if it can. */ @@ -1241,7 +1241,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat, * @incompat: bitmask of incompatible features * * Mark a given journal feature as present on the - * superblock. Returns true if the requested features could be set. + * superblock. Returns true if the requested features could be set. * */ @@ -1327,7 +1327,7 @@ static int journal_convert_superblock_v1(journal_t *journal, /** * int journal_flush () - Flush journal * @journal: Journal to act on. - * + * * Flush all data for a given journal to disk and empty the journal. * Filesystems can use this when remounting readonly to ensure that * recovery does not need to happen on remount. @@ -1394,7 +1394,7 @@ int journal_flush(journal_t *journal) * int journal_wipe() - Wipe journal contents * @journal: Journal to act on. * @write: flag (see below) - * + * * Wipe out all of the contents of a journal, safely. This will produce * a warning if the journal contains any valid recovery information. * Must be called between journal_init_*() and journal_load(). @@ -1449,7 +1449,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) /* * Journal abort has very specific semantics, which we describe - * for journal abort. + * for journal abort. * * Two internal function, which provide abort to te jbd layer * itself are here. @@ -1504,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) * Perform a complete, immediate shutdown of the ENTIRE * journal (not of a single transaction). This operation cannot be * undone without closing and reopening the journal. - * + * * The journal_abort function is intended to support higher level error * recovery mechanisms such as the ext2/ext3 remount-readonly error * mode. @@ -1538,7 +1538,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) * supply an errno; a null errno implies that absolutely no further * writes are done to the journal (unless there are any already in * progress). - * + * */ void journal_abort(journal_t *journal, int errno) @@ -1546,7 +1546,7 @@ void journal_abort(journal_t *journal, int errno) __journal_abort_soft(journal, errno); } -/** +/** * int journal_errno () - returns the journal's error state. * @journal: journal to examine. * @@ -1570,7 +1570,7 @@ int journal_errno(journal_t *journal) return err; } -/** +/** * int journal_clear_err () - clears the journal's error state * @journal: journal to act on. * @@ -1590,7 +1590,7 @@ int journal_clear_err(journal_t *journal) return err; } -/** +/** * void journal_ack_err() - Ack journal err. * @journal: journal to act on. * @@ -1612,7 +1612,7 @@ int journal_blocks_per_page(struct inode *inode) /* * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. + * debug different VM deadlock avoidance strategies. */ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) { diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index de5bafb4e853..73bb64806ed3 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -1,6 +1,6 @@ /* * linux/fs/recovery.c - * + * * Written by Stephen C. Tweedie , 1999 * * Copyright 1999-2000 Red Hat Software --- All Rights Reserved @@ -10,7 +10,7 @@ * option, any later version, incorporated herein by reference. * * Journal recovery routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. + * part of the ext2fs journaling system. */ #ifndef __KERNEL__ @@ -25,9 +25,9 @@ /* * Maintain information about the progress of the recovery job, so that - * the different passes can carry information between them. + * the different passes can carry information between them. */ -struct recovery_info +struct recovery_info { tid_t start_transaction; tid_t end_transaction; @@ -116,7 +116,7 @@ static int do_readahead(journal_t *journal, unsigned int start) err = 0; failed: - if (nbufs) + if (nbufs) journal_brelse_array(bufs, nbufs); return err; } @@ -128,7 +128,7 @@ failed: * Read a block from the journal */ -static int jread(struct buffer_head **bhp, journal_t *journal, +static int jread(struct buffer_head **bhp, journal_t *journal, unsigned int offset) { int err; @@ -212,14 +212,14 @@ do { \ /** * journal_recover - recovers a on-disk journal * @journal: the journal to recover - * + * * The primary function for recovering the log contents when mounting a - * journaled device. + * journaled device. * * Recovery is done in three passes. In the first pass, we look for the * end of the log. In the second, we assemble the list of revoke * blocks. In the third and final pass, we replay any un-revoked blocks - * in the log. + * in the log. */ int journal_recover(journal_t *journal) { @@ -231,10 +231,10 @@ int journal_recover(journal_t *journal) memset(&info, 0, sizeof(info)); sb = journal->j_superblock; - /* + /* * The journal superblock's s_start field (the current log head) * is always zero if, and only if, the journal was cleanly - * unmounted. + * unmounted. */ if (!sb->s_start) { @@ -253,7 +253,7 @@ int journal_recover(journal_t *journal) jbd_debug(0, "JBD: recovery, exit status %d, " "recovered transactions %u to %u\n", err, info.start_transaction, info.end_transaction); - jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", + jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", info.nr_replays, info.nr_revoke_hits, info.nr_revokes); /* Restart the log at the next transaction ID, thus invalidating @@ -268,15 +268,15 @@ int journal_recover(journal_t *journal) /** * journal_skip_recovery - Start journal and wipe exiting records * @journal: journal to startup - * + * * Locate any valid recovery information from the journal and set up the * journal structures in memory to ignore it (presumably because the - * caller has evidence that it is out of date). + * caller has evidence that it is out of date). * This function does'nt appear to be exorted.. * * We perform one pass over the journal to allow us to tell the user how * much recovery information is being erased, and to let us initialise - * the journal transaction sequence numbers to the next unused ID. + * the journal transaction sequence numbers to the next unused ID. */ int journal_skip_recovery(journal_t *journal) { @@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal) #ifdef CONFIG_JBD_DEBUG int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); #endif - jbd_debug(0, + jbd_debug(0, "JBD: ignoring %d transaction%s from the journal.\n", dropped, (dropped == 1) ? "" : "s"); journal->j_transaction_sequence = ++info.end_transaction; @@ -324,10 +324,10 @@ static int do_one_pass(journal_t *journal, MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) / sizeof(journal_block_tag_t)); - /* + /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log - * block offsets): query the superblock. + * block offsets): query the superblock. */ sb = journal->j_superblock; @@ -344,7 +344,7 @@ static int do_one_pass(journal_t *journal, * Now we walk through the log, transaction by transaction, * making sure that each transaction has a commit block in the * expected place. Each complete transaction gets replayed back - * into the main filesystem. + * into the main filesystem. */ while (1) { @@ -379,8 +379,8 @@ static int do_one_pass(journal_t *journal, next_log_block++; wrap(journal, next_log_block); - /* What kind of buffer is it? - * + /* What kind of buffer is it? + * * If it is a descriptor block, check that it has the * expected sequence number. Otherwise, we're all done * here. */ @@ -394,7 +394,7 @@ static int do_one_pass(journal_t *journal, blocktype = be32_to_cpu(tmp->h_blocktype); sequence = be32_to_cpu(tmp->h_sequence); - jbd_debug(3, "Found magic %d, sequence %d\n", + jbd_debug(3, "Found magic %d, sequence %d\n", blocktype, sequence); if (sequence != next_commit_ID) { @@ -438,7 +438,7 @@ static int do_one_pass(journal_t *journal, /* Recover what we can, but * report failure at the end. */ success = err; - printk (KERN_ERR + printk (KERN_ERR "JBD: IO error %d recovering " "block %ld in log\n", err, io_block); @@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal, * revoked, then we're all done * here. */ if (journal_test_revoke - (journal, blocknr, + (journal, blocknr, next_commit_ID)) { brelse(obh); ++info->nr_revoke_hits; @@ -465,7 +465,7 @@ static int do_one_pass(journal_t *journal, blocknr, journal->j_blocksize); if (nbh == NULL) { - printk(KERN_ERR + printk(KERN_ERR "JBD: Out of memory " "during recovery.\n"); err = -ENOMEM; @@ -537,7 +537,7 @@ static int do_one_pass(journal_t *journal, } done: - /* + /* * We broke out of the log scan loop: either we came to the * known end of the log or we found an unexpected block in the * log. If the latter happened, then we know that the "current" @@ -567,7 +567,7 @@ static int do_one_pass(journal_t *journal, /* Scan a revoke record, marking all blocks mentioned as revoked. */ -static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, +static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, tid_t sequence, struct recovery_info *info) { journal_revoke_header_t *header; diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index a56144183462..c532429d8d9b 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -1,6 +1,6 @@ /* * linux/fs/revoke.c - * + * * Written by Stephen C. Tweedie , 2000 * * Copyright 2000 Red Hat corp --- All Rights Reserved @@ -15,10 +15,10 @@ * Revoke is the mechanism used to prevent old log records for deleted * metadata from being replayed on top of newer data using the same * blocks. The revoke mechanism is used in two separate places: - * + * * + Commit: during commit we write the entire list of the current * transaction's revoked blocks to the journal - * + * * + Recovery: during recovery we record the transaction ID of all * revoked blocks. If there are multiple revoke records in the log * for a single block, only the last one counts, and if there is a log @@ -29,7 +29,7 @@ * single transaction: * * Block is revoked and then journaled: - * The desired end result is the journaling of the new block, so we + * The desired end result is the journaling of the new block, so we * cancel the revoke before the transaction commits. * * Block is journaled and then revoked: @@ -41,7 +41,7 @@ * transaction must have happened after the block was journaled and so * the revoke must take precedence. * - * Block is revoked and then written as data: + * Block is revoked and then written as data: * The data write is allowed to succeed, but the revoke is _not_ * cancelled. We still need to prevent old log records from * overwriting the new data. We don't even need to clear the revoke @@ -54,7 +54,7 @@ * buffer has not been revoked, and cancel_revoke * need do nothing. * RevokeValid set, Revoked set: - * buffer has been revoked. + * buffer has been revoked. */ #ifndef __KERNEL__ @@ -77,7 +77,7 @@ static kmem_cache_t *revoke_table_cache; journal replay, this involves recording the transaction ID of the last transaction to revoke this block. */ -struct jbd_revoke_record_s +struct jbd_revoke_record_s { struct list_head hash; tid_t sequence; /* Used for recovery only */ @@ -90,8 +90,8 @@ struct jbd_revoke_table_s { /* It is conceivable that we might want a larger hash table * for recovery. Must be a power of two. */ - int hash_size; - int hash_shift; + int hash_size; + int hash_shift; struct list_head *hash_table; }; @@ -301,22 +301,22 @@ void journal_destroy_revoke(journal_t *journal) #ifdef __KERNEL__ -/* +/* * journal_revoke: revoke a given buffer_head from the journal. This * prevents the block from being replayed during recovery if we take a * crash after this current transaction commits. Any subsequent * metadata writes of the buffer in this transaction cancel the - * revoke. + * revoke. * * Note that this call may block --- it is up to the caller to make * sure that there are no further calls to journal_write_metadata * before the revoke is complete. In ext3, this implies calling the * revoke before clearing the block bitmap when we are deleting - * metadata. + * metadata. * * Revoke performs a journal_forget on any buffer_head passed in as a * parameter, but does _not_ forget the buffer_head if the bh was only - * found implicitly. + * found implicitly. * * bh_in may not be a journalled buffer - it may have come off * the hash tables without an attached journal_head. @@ -325,7 +325,7 @@ void journal_destroy_revoke(journal_t *journal) * by one. */ -int journal_revoke(handle_t *handle, unsigned long blocknr, +int journal_revoke(handle_t *handle, unsigned long blocknr, struct buffer_head *bh_in) { struct buffer_head *bh = NULL; @@ -487,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal) else journal->j_revoke = journal->j_revoke_table[0]; - for (i = 0; i < journal->j_revoke->hash_size; i++) + for (i = 0; i < journal->j_revoke->hash_size; i++) INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); } @@ -498,7 +498,7 @@ void journal_switch_revoke_table(journal_t *journal) * Called with the journal lock held. */ -void journal_write_revoke_records(journal_t *journal, +void journal_write_revoke_records(journal_t *journal, transaction_t *transaction) { struct journal_head *descriptor; @@ -507,7 +507,7 @@ void journal_write_revoke_records(journal_t *journal, struct list_head *hash_list; int i, offset, count; - descriptor = NULL; + descriptor = NULL; offset = 0; count = 0; @@ -519,10 +519,10 @@ void journal_write_revoke_records(journal_t *journal, hash_list = &revoke->hash_table[i]; while (!list_empty(hash_list)) { - record = (struct jbd_revoke_record_s *) + record = (struct jbd_revoke_record_s *) hash_list->next; write_one_revoke_record(journal, transaction, - &descriptor, &offset, + &descriptor, &offset, record); count++; list_del(&record->hash); @@ -534,14 +534,14 @@ void journal_write_revoke_records(journal_t *journal, jbd_debug(1, "Wrote %d revoke records\n", count); } -/* +/* * Write out one revoke record. We need to create a new descriptor - * block if the old one is full or if we have not already created one. + * block if the old one is full or if we have not already created one. */ -static void write_one_revoke_record(journal_t *journal, +static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, - struct journal_head **descriptorp, + struct journal_head **descriptorp, int *offsetp, struct jbd_revoke_record_s *record) { @@ -584,21 +584,21 @@ static void write_one_revoke_record(journal_t *journal, *descriptorp = descriptor; } - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = cpu_to_be32(record->blocknr); offset += 4; *offsetp = offset; } -/* +/* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to * be waited for during commit, so it has to go onto the appropriate * journal buffer list. */ -static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, +static void flush_descriptor(journal_t *journal, + struct journal_head *descriptor, int offset) { journal_revoke_header_t *header; @@ -618,7 +618,7 @@ static void flush_descriptor(journal_t *journal, } #endif -/* +/* * Revoke support for recovery. * * Recovery needs to be able to: @@ -629,7 +629,7 @@ static void flush_descriptor(journal_t *journal, * check whether a given block in a given transaction should be replayed * (ie. has not been revoked by a revoke record in that or a subsequent * transaction) - * + * * empty the revoke table after recovery. */ @@ -637,11 +637,11 @@ static void flush_descriptor(journal_t *journal, * First, setting revoke records. We create a new revoke record for * every block ever revoked in the log as we scan it for recovery, and * we update the existing records if we find multiple revokes for a - * single block. + * single block. */ -int journal_set_revoke(journal_t *journal, - unsigned long blocknr, +int journal_set_revoke(journal_t *journal, + unsigned long blocknr, tid_t sequence) { struct jbd_revoke_record_s *record; @@ -653,18 +653,18 @@ int journal_set_revoke(journal_t *journal, if (tid_gt(sequence, record->sequence)) record->sequence = sequence; return 0; - } + } return insert_revoke_hash(journal, blocknr, sequence); } -/* +/* * Test revoke records. For a given block referenced in the log, has * that block been revoked? A revoke record with a given transaction * sequence number revokes all blocks in that transaction and earlier * ones, but later transactions still need replayed. */ -int journal_test_revoke(journal_t *journal, +int journal_test_revoke(journal_t *journal, unsigned long blocknr, tid_t sequence) { diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index f5169a96260e..bf7fd7117817 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1,6 +1,6 @@ /* * linux/fs/transaction.c - * + * * Written by Stephen C. Tweedie , 1998 * * Copyright 1998 Red Hat corp --- All Rights Reserved @@ -10,7 +10,7 @@ * option, any later version, incorporated herein by reference. * * Generic filesystem transaction handling code; part of the ext2fs - * journaling system. + * journaling system. * * This file manages transactions (compound commits managed by the * journaling code) and handles (individual atomic operations by the @@ -74,7 +74,7 @@ get_transaction(journal_t *journal, transaction_t *transaction) * start_this_handle: Given a handle, deal with any locking or stalling * needed to make sure that there is enough journal space for the handle * to begin. Attach the handle to a transaction and set up the - * transaction's buffer credits. + * transaction's buffer credits. */ static int start_this_handle(journal_t *journal, handle_t *handle) @@ -117,7 +117,7 @@ repeat_locked: if (is_journal_aborted(journal) || (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { spin_unlock(&journal->j_state_lock); - ret = -EROFS; + ret = -EROFS; goto out; } @@ -182,7 +182,7 @@ repeat_locked: goto repeat; } - /* + /* * The commit code assumes that it can get enough log space * without forcing a checkpoint. This is *critical* for * correctness: a checkpoint of a buffer which is also @@ -191,7 +191,7 @@ repeat_locked: * * We must therefore ensure the necessary space in the journal * *before* starting to dirty potentially checkpointed buffers - * in the new transaction. + * in the new transaction. * * The worst part is, any transaction currently committing can * reduce the free space arbitrarily. Be careful to account for @@ -246,13 +246,13 @@ static handle_t *new_handle(int nblocks) } /** - * handle_t *journal_start() - Obtain a new handle. + * handle_t *journal_start() - Obtain a new handle. * @journal: Journal to start transaction on. * @nblocks: number of block buffer we might modify * * We make sure that the transaction can guarantee at least nblocks of * modified buffers in the log. We block until the log can guarantee - * that much space. + * that much space. * * This function is visible to journal users (like ext3fs), so is not * called with the journal already locked. @@ -292,11 +292,11 @@ handle_t *journal_start(journal_t *journal, int nblocks) * int journal_extend() - extend buffer credits. * @handle: handle to 'extend' * @nblocks: nr blocks to try to extend by. - * + * * Some transactions, such as large extends and truncates, can be done * atomically all at once or in several stages. The operation requests * a credit for a number of buffer modications in advance, but can - * extend its credit if it needs more. + * extend its credit if it needs more. * * journal_extend tries to give the running handle more buffer credits. * It does not guarantee that allocation - this is a best-effort only. @@ -363,7 +363,7 @@ out: * int journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested - * + * * Restart a handle for a multi-transaction filesystem * operation. * @@ -462,7 +462,7 @@ void journal_lock_updates(journal_t *journal) /** * void journal_unlock_updates (journal_t* journal) - release barrier * @journal: Journal to release the barrier on. - * + * * Release a transaction barrier obtained with journal_lock_updates(). * * Should be called without the journal lock held. @@ -547,8 +547,8 @@ repeat: jbd_lock_bh_state(bh); /* We now hold the buffer lock so it is safe to query the buffer - * state. Is the buffer dirty? - * + * state. Is the buffer dirty? + * * If so, there are two possibilities. The buffer may be * non-journaled, and undergoing a quite legitimate writeback. * Otherwise, it is journaled, and we don't expect dirty buffers @@ -566,7 +566,7 @@ repeat: */ if (jh->b_transaction) { J_ASSERT_JH(jh, - jh->b_transaction == transaction || + jh->b_transaction == transaction || jh->b_transaction == journal->j_committing_transaction); if (jh->b_next_transaction) @@ -653,7 +653,7 @@ repeat: * buffer had better remain locked during the kmalloc, * but that should be true --- we hold the journal lock * still and the buffer is already on the BUF_JOURNAL - * list so won't be flushed. + * list so won't be flushed. * * Subtle point, though: if this is a get_undo_access, * then we will be relying on the frozen_data to contain @@ -765,8 +765,8 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh) * manually rather than reading off disk), then we need to keep the * buffer_head locked until it has been completely filled with new * data. In this case, we should be able to make the assertion that - * the bh is not already part of an existing transaction. - * + * the bh is not already part of an existing transaction. + * * The buffer should already be locked by the caller by this point. * There is no lock ranking violation: it was a newly created, * unlocked buffer beforehand. */ @@ -778,7 +778,7 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh) * * Call this if you create a new bh. */ -int journal_get_create_access(handle_t *handle, struct buffer_head *bh) +int journal_get_create_access(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; @@ -847,13 +847,13 @@ out: * do not reuse freed space until the deallocation has been committed, * since if we overwrote that space we would make the delete * un-rewindable in case of a crash. - * + * * To deal with that, journal_get_undo_access requests write access to a * buffer for parts of non-rewindable operations such as delete * operations on the bitmaps. The journaling code must keep a copy of * the buffer's contents prior to the undo_access call until such time * as we know that the buffer has definitely been committed to disk. - * + * * We never need to know which transaction the committed data is part * of, buffers touched here are guaranteed to be dirtied later and so * will be committed to a new transaction in due course, at which point @@ -911,13 +911,13 @@ out: return err; } -/** +/** * int journal_dirty_data() - mark a buffer as containing dirty data which * needs to be flushed before we can commit the - * current transaction. + * current transaction. * @handle: transaction * @bh: bufferhead to mark - * + * * The buffer is placed on the transaction's data list and is marked as * belonging to the transaction. * @@ -946,15 +946,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) /* * What if the buffer is already part of a running transaction? - * + * * There are two cases: * 1) It is part of the current running transaction. Refile it, * just in case we have allocated it as metadata, deallocated - * it, then reallocated it as data. + * it, then reallocated it as data. * 2) It is part of the previous, still-committing transaction. * If all we want to do is to guarantee that the buffer will be * written to disk before this new transaction commits, then - * being sure that the *previous* transaction has this same + * being sure that the *previous* transaction has this same * property is sufficient for us! Just leave it on its old * transaction. * @@ -1076,18 +1076,18 @@ no_journal: return 0; } -/** +/** * int journal_dirty_metadata() - mark a buffer as containing dirty metadata * @handle: transaction to add buffer to. - * @bh: buffer to mark - * + * @bh: buffer to mark + * * mark dirty metadata which needs to be journaled as part of the current * transaction. * * The buffer is placed on the transaction's metadata list and is marked - * as belonging to the transaction. + * as belonging to the transaction. * - * Returns error number or 0 on success. + * Returns error number or 0 on success. * * Special care needs to be taken if the buffer already belongs to the * current committing transaction (in which case we should have frozen @@ -1135,11 +1135,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) set_buffer_jbddirty(bh); - /* + /* * Metadata already on the current transaction list doesn't * need to be filed. Metadata on another transaction's list must * be committing, and will be refiled once the commit completes: - * leave it alone for now. + * leave it alone for now. */ if (jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "already on other transaction"); @@ -1165,7 +1165,7 @@ out: return 0; } -/* +/* * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. * @@ -1176,20 +1176,20 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh) BUFFER_TRACE(bh, "entry"); } -/** +/** * void journal_forget() - bforget() for potentially-journaled buffers. * @handle: transaction handle * @bh: bh to 'forget' * * We can only do the bforget if there are no commits pending against the * buffer. If the buffer is dirty in the current running transaction we - * can safely unlink it. + * can safely unlink it. * * bh may not be a journalled buffer at all - it may be a non-JBD * buffer which came off the hashtable. Check for this. * * Decrements bh->b_count by one. - * + * * Allow this call even if the handle has aborted --- it may be part of * the caller's cleanup after an abort. */ @@ -1237,7 +1237,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) drop_reserve = 1; - /* + /* * We are no longer going to journal this buffer. * However, the commit of this transaction is still * important to the buffer: the delete that we are now @@ -1246,7 +1246,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) * * So, if we have a checkpoint on the buffer, we should * now refile the buffer on our BJ_Forget list so that - * we know to remove the checkpoint after we commit. + * we know to remove the checkpoint after we commit. */ if (jh->b_cp_transaction) { @@ -1264,7 +1264,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) } } } else if (jh->b_transaction) { - J_ASSERT_JH(jh, (jh->b_transaction == + J_ASSERT_JH(jh, (jh->b_transaction == journal->j_committing_transaction)); /* However, if the buffer is still owned by a prior * (committing) transaction, we can't drop it yet... */ @@ -1294,7 +1294,7 @@ drop: /** * int journal_stop() - complete a transaction * @handle: tranaction to complete. - * + * * All done for a particular handle. * * There is not much action needed here. We just return any remaining @@ -1303,7 +1303,7 @@ drop: * filesystem is marked for synchronous update. * * journal_stop itself will not usually return an error, but it may - * do so in unusual circumstances. In particular, expect it to + * do so in unusual circumstances. In particular, expect it to * return -EIO if a journal_abort has been executed since the * transaction began. */ @@ -1388,7 +1388,7 @@ int journal_stop(handle_t *handle) /* * Special case: JFS_SYNC synchronous updates require us - * to wait for the commit to complete. + * to wait for the commit to complete. */ if (handle->h_sync && !(current->flags & PF_MEMALLOC)) err = log_wait_commit(journal, tid); @@ -1439,7 +1439,7 @@ int journal_force_commit(journal_t *journal) * jbd_lock_bh_state(jh2bh(jh)) is held. */ -static inline void +static inline void __blist_add_buffer(struct journal_head **list, struct journal_head *jh) { if (!*list) { @@ -1454,7 +1454,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh) } } -/* +/* * Remove a buffer from a transaction list, given the transaction's list * head pointer. * @@ -1475,7 +1475,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) jh->b_tnext->b_tprev = jh->b_tprev; } -/* +/* * Remove a buffer from the appropriate transaction list. * * Note that this function can *change* the value of @@ -1595,17 +1595,17 @@ out: } -/** +/** * int journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free * @unused_gfp_mask: unused * - * + * * For all the buffers on this page, * if they are fully written out ordered data, move them onto BUF_CLEAN * so try_to_free_buffers() can reap them. - * + * * This function returns non-zero if we wish try_to_free_buffers() * to be called. We do this if the page is releasable by try_to_free_buffers(). * We also do it if the page has locked or dirty buffers and the caller wants @@ -1629,7 +1629,7 @@ out: * cannot happen because we never reallocate freed data as metadata * while the data is part of a transaction. Yes? */ -int journal_try_to_free_buffers(journal_t *journal, +int journal_try_to_free_buffers(journal_t *journal, struct page *page, gfp_t unused_gfp_mask) { struct buffer_head *head; @@ -1697,7 +1697,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) } /* - * journal_invalidatepage + * journal_invalidatepage * * This code is tricky. It has a number of cases to deal with. * @@ -1705,15 +1705,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * * i_size must be updated on disk before we start calling invalidatepage on the * data. - * + * * This is done in ext3 by defining an ext3_setattr method which * updates i_size before truncate gets going. By maintaining this * invariant, we can be sure that it is safe to throw away any buffers * attached to the current transaction: once the transaction commits, * we know that the data will not be needed. - * + * * Note however that we can *not* throw away data belonging to the - * previous, committing transaction! + * previous, committing transaction! * * Any disk blocks which *are* part of the previous, committing * transaction (and which therefore cannot be discarded immediately) are @@ -1732,7 +1732,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * don't make guarantees about the order in which data hits disk --- in * particular we don't guarantee that new dirty data is flushed before * transaction commit --- so it is always safe just to discard data - * immediately in that mode. --sct + * immediately in that mode. --sct */ /* @@ -1876,9 +1876,9 @@ zap_buffer_unlocked: return may_free; } -/** +/** * void journal_invalidatepage() - * @journal: journal to use for flush... + * @journal: journal to use for flush... * @page: page to flush * @offset: length of page to invalidate. * @@ -1886,7 +1886,7 @@ zap_buffer_unlocked: * */ void journal_invalidatepage(journal_t *journal, - struct page *page, + struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; @@ -1924,8 +1924,8 @@ void journal_invalidatepage(journal_t *journal, } } -/* - * File a buffer on the given transaction list. +/* + * File a buffer on the given transaction list. */ void __journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) @@ -1948,7 +1948,7 @@ void __journal_file_buffer(struct journal_head *jh, * with __jbd_unexpected_dirty_buffer()'s handling of dirty * state. */ - if (jlist == BJ_Metadata || jlist == BJ_Reserved || + if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) @@ -2008,7 +2008,7 @@ void journal_file_buffer(struct journal_head *jh, jbd_unlock_bh_state(jh2bh(jh)); } -/* +/* * Remove a buffer from its current buffer list in preparation for * dropping it from its current transaction entirely. If the buffer has * already started to be used by a subsequent transaction, refile the @@ -2060,7 +2060,7 @@ void __journal_refile_buffer(struct journal_head *jh) * to the caller to remove the journal_head if necessary. For the * unlocked journal_refile_buffer call, the caller isn't going to be * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. + * ourselves to avoid a jh leak. * * *** The journal_head may be freed by this call! *** */ diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h index c8307c02dd07..ce0e6109aff0 100644 --- a/include/linux/ext3_jbd.h +++ b/include/linux/ext3_jbd.h @@ -23,7 +23,7 @@ /* Define the number of blocks we need to account to a transaction to * modify one block of data. - * + * * We may have to touch one inode, one bitmap buffer, up to three * indirection blocks, the group and superblock summaries, and the data * block to complete the transaction. */ @@ -88,16 +88,16 @@ #endif int -ext3_mark_iloc_dirty(handle_t *handle, +ext3_mark_iloc_dirty(handle_t *handle, struct inode *inode, struct ext3_iloc *iloc); -/* +/* * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. + * iloc->bh. This _must_ be cleaned up later. */ -int ext3_reserve_inode_write(handle_t *handle, struct inode *inode, +int ext3_reserve_inode_write(handle_t *handle, struct inode *inode, struct ext3_iloc *iloc); int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index a04c154c5207..7d847931ee5a 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -1,6 +1,6 @@ /* * linux/include/linux/jbd.h - * + * * Written by Stephen C. Tweedie * * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved @@ -97,8 +97,8 @@ extern void jbd_slab_free(void *ptr, size_t size); * number of outstanding buffers possible at any time. When the * operation completes, any buffer credits not used are credited back to * the transaction, so that at all times we know how many buffers the - * outstanding updates on a transaction might possibly touch. - * + * outstanding updates on a transaction might possibly touch. + * * This is an opaque datatype. **/ typedef struct handle_s handle_t; /* Atomic operation type */ @@ -108,7 +108,7 @@ typedef struct handle_s handle_t; /* Atomic operation type */ * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. * * journal_t is linked to from the fs superblock structure. - * + * * We use the journal_t to keep track of all outstanding transaction * activity on the filesystem, and to manage the state of the log * writing process. @@ -128,7 +128,7 @@ typedef struct journal_s journal_t; /* Journal control structure */ * On-disk structures */ -/* +/* * Descriptor block types: */ @@ -149,8 +149,8 @@ typedef struct journal_header_s } journal_header_t; -/* - * The block tag: used to describe a single buffer in the journal +/* + * The block tag: used to describe a single buffer in the journal */ typedef struct journal_block_tag_s { @@ -158,9 +158,9 @@ typedef struct journal_block_tag_s __be32 t_flags; /* See below */ } journal_block_tag_t; -/* +/* * The revoke descriptor: used on disk to describe a series of blocks to - * be revoked from the log + * be revoked from the log */ typedef struct journal_revoke_header_s { @@ -374,10 +374,10 @@ struct jbd_revoke_table_s; **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation - * in so it can be fixed later. + * in so it can be fixed later. */ -struct handle_s +struct handle_s { /* Which compound transaction is this update a part of? */ transaction_t *h_transaction; @@ -435,7 +435,7 @@ struct handle_s * */ -struct transaction_s +struct transaction_s { /* Pointer to the journal for this transaction. [no locking] */ journal_t *t_journal; @@ -455,7 +455,7 @@ struct transaction_s T_RUNDOWN, T_FLUSH, T_COMMIT, - T_FINISHED + T_FINISHED } t_state; /* @@ -569,7 +569,7 @@ struct transaction_s * journal_t. * @j_flags: General journaling state flags * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? + * prior abort)? * @j_sb_buffer: First part of superblock buffer * @j_superblock: Second part of superblock buffer * @j_format_version: Version of the superblock format @@ -583,7 +583,7 @@ struct transaction_s * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction * to start committing, or for a barrier lock to be released * @j_wait_logspace: Wait queue for waiting for checkpointing to complete - * @j_wait_done_commit: Wait queue for waiting for commit to complete + * @j_wait_done_commit: Wait queue for waiting for commit to complete * @j_wait_checkpoint: Wait queue to trigger checkpointing * @j_wait_commit: Wait queue to trigger commit * @j_wait_updates: Wait queue to wait for updates to complete @@ -592,7 +592,7 @@ struct transaction_s * @j_tail: Journal tail - identifies the oldest still-used block in the * journal. * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block + * @j_first: The block number of the first usable block * @j_last: The block number one beyond the last usable block * @j_dev: Device where we store the journal * @j_blocksize: blocksize for the location where we store the journal. @@ -604,12 +604,12 @@ struct transaction_s * @j_list_lock: Protects the buffer lists and internal buffer state. * @j_inode: Optional inode where we store the journal. If present, all journal * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log + * @j_tail_sequence: Sequence number of the oldest transaction in the log * @j_transaction_sequence: Sequence number of the next transaction to grant * @j_commit_sequence: Sequence number of the most recently committed * transaction * @j_commit_request: Sequence number of the most recent transaction wanting - * commit + * commit * @j_uuid: Uuid of client object. * @j_task: Pointer to the current commit thread for this journal * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a @@ -823,8 +823,8 @@ struct journal_s void *j_private; }; -/* - * Journal flag definitions +/* + * Journal flag definitions */ #define JFS_UNMOUNT 0x001 /* Journal thread is being destroyed */ #define JFS_ABORT 0x002 /* Journaling has been aborted for errors. */ @@ -833,7 +833,7 @@ struct journal_s #define JFS_LOADED 0x010 /* The journal superblock has been loaded */ #define JFS_BARRIER 0x020 /* Use IDE barriers */ -/* +/* * Function declarations for the journaling transaction and buffer * management */ @@ -862,7 +862,7 @@ int __journal_remove_checkpoint(struct journal_head *); void __journal_insert_checkpoint(struct journal_head *, transaction_t *); /* Buffer IO */ -extern int +extern int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, @@ -890,7 +890,7 @@ static inline handle_t *journal_current_handle(void) /* The journaling code user interface: * * Create and destroy handles - * Register buffer modifications against the current transaction. + * Register buffer modifications against the current transaction. */ extern handle_t *journal_start(journal_t *, int nblocks); @@ -917,11 +917,11 @@ extern journal_t * journal_init_dev(struct block_device *bdev, int start, int len, int bsize); extern journal_t * journal_init_inode (struct inode *); extern int journal_update_format (journal_t *); -extern int journal_check_used_features +extern int journal_check_used_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int journal_check_available_features +extern int journal_check_available_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int journal_set_features +extern int journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int journal_create (journal_t *); extern int journal_load (journal_t *journal); @@ -1015,7 +1015,7 @@ do { \ * bit, when set, indicates that we have had a fatal error somewhere, * either inside the journaling layer or indicated to us by the client * (eg. ext3), and that we and should not commit any further - * transactions. + * transactions. */ static inline int is_journal_aborted(journal_t *journal) @@ -1082,7 +1082,7 @@ static inline int jbd_space_needed(journal_t *journal) #define BJ_Reserved 7 /* Buffer is reserved for access by journal */ #define BJ_Locked 8 /* Locked for I/O during commit */ #define BJ_Types 9 - + extern int jbd_blocks_per_page(struct inode *inode); #ifdef __KERNEL__ -- cgit v1.2.3-71-gd317 From 37ed322290eb6d5cf2ab33915793ed4219eae1d6 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Sep 2006 01:49:31 -0700 Subject: [PATCH] JBD: 16T fixes These are a few places I've found in jbd that look like they may not be 16T-safe, or consistent with the use of unsigned longs for block containers. Problems here would be somewhat hard to hit, would require journal blocks past the 8T boundary, which would not be terribly common. Still, should fix. (some of these have come from the ext4 work on jbd as well). I think there's one more possibility that the wrap() function may not be safe IF your last block in the journal butts right up against the 232 block boundary, but that seems like a VERY remote possibility, and I'm not worrying about it at this point. Signed-off-by: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/journal.c | 6 +++--- include/linux/jbd.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 2613fca92740..88d970e09ce6 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal) int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, - int blocknr) + unsigned long blocknr) { int need_copy_out = 0; int done_copy_out = 0; @@ -696,7 +696,7 @@ fail: * @bdev: Block device on which to create the journal * @fs_dev: Device which hold journalled filesystem for this journal. * @start: Block nr Start of journal. - * @len: Lenght of the journal in blocks. + * @len: Length of the journal in blocks. * @blocksize: blocksize of journalling device * @returns: a newly created journal_t * * @@ -820,7 +820,7 @@ static void journal_fail_superblock (journal_t *journal) static int journal_reset(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; - unsigned int first, last; + unsigned long first, last; first = be32_to_cpu(sb->s_first); last = be32_to_cpu(sb->s_maxlen); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 7d847931ee5a..dc262624efa5 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -732,7 +732,7 @@ struct journal_s */ struct block_device *j_dev; int j_blocksize; - unsigned int j_blk_offset; + unsigned long j_blk_offset; /* * Device which holds the client fs. For internal journal this will be @@ -866,7 +866,7 @@ extern int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, - int blocknr); + unsigned long blocknr); /* Transaction locking */ extern void __wait_on_journal (journal_t *); -- cgit v1.2.3-71-gd317 From e9ad5620bfb901df8a7a2603c88689ededeecaf1 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 27 Sep 2006 01:49:35 -0700 Subject: [PATCH] ext3: More whitespace cleanups More white space cleanups in preparation of cloning ext4 from ext3. Removing spaces that precede a tab. Signed-off-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/acl.c | 2 +- fs/ext3/balloc.c | 32 ++++++++++++++++---------------- fs/ext3/dir.c | 2 +- fs/ext3/hash.c | 2 +- fs/ext3/inode.c | 8 ++++---- fs/ext3/namei.c | 18 +++++++++--------- fs/ext3/super.c | 6 +++--- fs/jbd/checkpoint.c | 2 +- fs/jbd/journal.c | 2 +- fs/jbd/recovery.c | 2 +- fs/jbd/transaction.c | 6 +++--- include/linux/ext3_fs.h | 2 +- include/linux/ext3_fs_i.h | 2 +- include/linux/jbd.h | 10 +++++----- 14 files changed, 48 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 0d21d558b87a..92bf78221429 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -258,7 +258,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, default: return -EINVAL; } - if (acl) { + if (acl) { value = ext3_acl_to_disk(acl, &size); if (IS_ERR(value)) return (int)PTR_ERR(value); diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index f1897feb18f4..c19be8fc3e51 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -40,7 +40,7 @@ /** * ext3_get_group_desc() -- load group descriptor from disk - * @sb: super block + * @sb: super block * @block_group: given block group * @bh: pointer to the buffer head to store the block * group descriptor @@ -355,7 +355,7 @@ void ext3_init_block_alloc_info(struct inode *inode) rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - /* + /* * if filesystem is mounted with NORESERVATION, the goal * reservation window size is set to zero to indicate * block reservation is off @@ -681,7 +681,7 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, jbd_lock_bh_state(bh); if (jh->b_committed_data) start = ext3_find_next_zero_bit(jh->b_committed_data, - maxblocks, next); + maxblocks, next); jbd_unlock_bh_state(bh); } return -1; @@ -790,10 +790,10 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) * and at last, allocate the blocks by claiming the found free bit as allocated. * * To set the range of this allocation: - * if there is a reservation window, only try to allocate block(s) from the + * if there is a reservation window, only try to allocate block(s) from the * file's own reservation window; * Otherwise, the allocation range starts from the give goal block, ends at - * the block group's last block. + * the block group's last block. * * If we failed to allocate the desired block then we may end up crossing to a * new bitmap. In that case we must release write access to the old one via @@ -880,12 +880,12 @@ fail_access: } /** - * find_next_reservable_window(): + * find_next_reservable_window(): * find a reservable space within the given range. * It does not allocate the reservation window for now: * alloc_new_reservation() will do the work later. * - * @search_head: the head of the searching list; + * @search_head: the head of the searching list; * This is not necessarily the list head of the whole filesystem * * We have both head and start_block to assist the search @@ -893,12 +893,12 @@ fail_access: * but we will shift to the place where start_block is, * then start from there, when looking for a reservable space. * - * @size: the target new reservation window size + * @size: the target new reservation window size * - * @group_first_block: the first block we consider to start + * @group_first_block: the first block we consider to start * the real search from * - * @last_block: + * @last_block: * the maximum block number that our goal reservable space * could start from. This is normally the last block in this * group. The search will end when we found the start of next @@ -906,10 +906,10 @@ fail_access: * This could handle the cross boundary reservation window * request. * - * basically we search from the given range, rather than the whole - * reservation double linked list, (start_block, last_block) - * to find a free region that is of my size and has not - * been reserved. + * basically we search from the given range, rather than the whole + * reservation double linked list, (start_block, last_block) + * to find a free region that is of my size and has not + * been reserved. * */ static int find_next_reservable_window( @@ -962,7 +962,7 @@ static int find_next_reservable_window( /* * Found a reserveable space big enough. We could * have a reservation across the group boundary here - */ + */ break; } } @@ -998,7 +998,7 @@ static int find_next_reservable_window( } /** - * alloc_new_reservation()--allocate a new reservation window + * alloc_new_reservation()--allocate a new reservation window * * To make a new reservation, we search part of the filesystem * reservation list (the list that inside the group). We try to diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 6f9e5a523c87..4d97f437cd48 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir, unsigned long offset) { const char * error_msg = NULL; - const int rlen = le16_to_cpu(de->rec_len); + const int rlen = le16_to_cpu(de->rec_len); if (rlen < EXT3_DIR_REC_LEN(1)) error_msg = "rec_len is smaller than minimal"; diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index 7fa637cd322a..deeb27b5ba83 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c @@ -95,7 +95,7 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) __u32 minor_hash = 0; const char *p; int i; - __u32 in[8], buf[4]; + __u32 in[8], buf[4]; /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 56665fab027d..c9fc15f8b609 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -13,11 +13,11 @@ * Copyright (C) 1991, 1992 Linus Torvalds * * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 + * (sct@redhat.com), 1993, 1998 * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) + * (jj@sunsite.ms.mff.cuni.cz) * * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 */ @@ -470,7 +470,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, * ext3_blks_to_allocate: Look up the block map and count the number * of direct blocks need to be allocated for the given branch. * - * @branch: chain of indirect blocks + * @branch: chain of indirect blocks * @k: number of blocks need for indirect blocks * @blks: number of data blocks to be mapped. * @blocks_to_boundary: the offset in the indirect block @@ -1098,7 +1098,7 @@ static int walk_page_buffers( handle_t *handle, for ( bh = head, block_start = 0; ret == 0 && (bh != head || !block_start); - block_start = block_end, bh = next) + block_start = block_end, bh = next) { next = bh->b_this_page; block_end = block_start + blocksize; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 70dc5206ebfa..85d132c37ee0 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -15,13 +15,13 @@ * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 + * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 * Hash Tree Directory indexing (c) - * Daniel Phillips, 2001 + * Daniel Phillips, 2001 * Hash Tree Directory indexing porting - * Christopher Li, 2002 + * Christopher Li, 2002 * Hash Tree Directory indexing cleanup - * Theodore Ts'o, 2002 + * Theodore Ts'o, 2002 */ #include @@ -278,7 +278,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent ((char *) de - base)); } space += EXT3_DIR_REC_LEN(de->name_len); - names++; + names++; } de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); } @@ -1688,7 +1688,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry, retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1816,7 +1816,7 @@ static int empty_dir (struct inode * inode) !le32_to_cpu(de1->inode) || strcmp (".", de->name) || strcmp ("..", de1->name)) { - ext3_warning (inode->i_sb, "empty_dir", + ext3_warning (inode->i_sb, "empty_dir", "bad directory (dir #%lu) - no `.' or `..'", inode->i_ino); brelse (bh); @@ -2129,7 +2129,7 @@ static int ext3_symlink (struct inode * dir, retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2227,7 +2227,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, DQUOT_INIT(new_dentry->d_inode); handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4b526b496102..0544325d9d7e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -734,8 +734,8 @@ static match_table_t tokens = { static ext3_fsblk_t get_sb_block(void **data) { - ext3_fsblk_t sb_block; - char *options = (char *) *data; + ext3_fsblk_t sb_block; + char *options = (char *) *data; if (!options || strncmp(options, "sb=", 3) != 0) return 1; /* Default location */ @@ -2740,7 +2740,7 @@ static int __init init_ext3_fs(void) out: destroy_inodecache(); out1: - exit_ext3_xattr(); + exit_ext3_xattr(); return err; } diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 961ada28db5e..0208cc7ac5d0 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -480,7 +480,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) if (!jh) return 0; - last_jh = jh->b_cpprev; + last_jh = jh->b_cpprev; do { jh = next_jh; next_jh = jh->b_cpnext; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 88d970e09ce6..b571209d6a1e 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -181,7 +181,7 @@ loop: transaction->t_expires)) should_sleep = 0; if (journal->j_flags & JFS_UNMOUNT) - should_sleep = 0; + should_sleep = 0; if (should_sleep) { spin_unlock(&journal->j_state_lock); schedule(); diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 73bb64806ed3..445eed6ce5dc 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal, unsigned long next_log_block; int err, success = 0; journal_superblock_t * sb; - journal_header_t * tmp; + journal_header_t * tmp; struct buffer_head * bh; unsigned int sequence; int blocktype; diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index bf7fd7117817..e1b3c8af4d17 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -580,7 +580,7 @@ repeat: */ JBUFFER_TRACE(jh, "Unexpected dirty buffer"); jbd_unexpected_dirty_buffer(jh); - } + } unlock_buffer(bh); @@ -1373,7 +1373,7 @@ int journal_stop(handle_t *handle) if (handle->h_sync || transaction->t_outstanding_credits > journal->j_max_transaction_buffers || - time_after_eq(jiffies, transaction->t_expires)) { + time_after_eq(jiffies, transaction->t_expires)) { /* Do this even for aborted journals: an abort still * completes the commit thread, it just doesn't write * anything to disk. */ @@ -1908,7 +1908,7 @@ void journal_invalidatepage(journal_t *journal, next = bh->b_this_page; if (offset <= curr_off) { - /* This block is wholly outside the truncation point */ + /* This block is wholly outside the truncation point */ lock_buffer(bh); may_free &= journal_unmap_buffer(journal, bh); unlock_buffer(bh); diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 0eed918b3816..369c67502346 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -473,7 +473,7 @@ struct ext3_super_block { __u8 s_reserved_char_pad; __u16 s_reserved_word_pad; __le32 s_default_mount_opts; - __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_first_meta_bg; /* First metablock block group */ __u32 s_reserved[190]; /* Padding to the end of the block */ }; diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index 2f18b9511f21..4395e5206746 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -35,7 +35,7 @@ struct ext3_reserve_window { }; struct ext3_reserve_window_node { - struct rb_node rsv_node; + struct rb_node rsv_node; __u32 rsv_goal_size; __u32 rsv_alloc_hit; struct ext3_reserve_window rsv_window; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index dc262624efa5..a6d9daa38c6d 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -64,7 +64,7 @@ extern int journal_enable_debug; if ((n) <= journal_enable_debug) { \ printk (KERN_DEBUG "(%s, %d): %s: ", \ __FILE__, __LINE__, __FUNCTION__); \ - printk (f, ## a); \ + printk (f, ## a); \ } \ } while (0) #else @@ -201,9 +201,9 @@ typedef struct journal_superblock_s /* 0x0024 */ /* Remaining fields are only valid in a version-2 superblock */ - __be32 s_feature_compat; /* compatible feature set */ - __be32 s_feature_incompat; /* incompatible feature set */ - __be32 s_feature_ro_compat; /* readonly-compatible feature set */ + __be32 s_feature_compat; /* compatible feature set */ + __be32 s_feature_incompat; /* incompatible feature set */ + __be32 s_feature_ro_compat; /* readonly-compatible feature set */ /* 0x0030 */ __u8 s_uuid[16]; /* 128-bit uuid for journal */ @@ -699,7 +699,7 @@ struct journal_s wait_queue_head_t j_wait_updates; /* Semaphore for locking against concurrent checkpoints */ - struct mutex j_checkpoint_mutex; + struct mutex j_checkpoint_mutex; /* * Journal head: identifies the first unused block in the journal. -- cgit v1.2.3-71-gd317 From a4e4de36dc446b2193bdc8ebb96a96e44b69dd94 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 27 Sep 2006 01:49:36 -0700 Subject: [PATCH] ext3: Fix sparse warnings Fixing up some endian-ness warnings in preparation to clone ext4 from ext3. Signed-off-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/resize.c | 21 +++++++++++---------- fs/ext3/super.c | 4 +--- fs/jbd/journal.c | 2 +- include/linux/ext3_fs.h | 2 +- 4 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 5e1337fd878a..e186f7fb698b 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -336,7 +336,7 @@ static int verify_reserved_gdb(struct super_block *sb, unsigned five = 5; unsigned seven = 7; unsigned grp; - __u32 *p = (__u32 *)primary->b_data; + __le32 *p = (__le32 *)primary->b_data; int gdbackups = 0; while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { @@ -380,7 +380,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, struct buffer_head *dind; int gdbackups; struct ext3_iloc iloc; - __u32 *data; + __le32 *data; int err; if (test_opt(sb, DEBUG)) @@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, goto exit_bh; } - data = (__u32 *)dind->b_data; + data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { ext3_warning(sb, __FUNCTION__, "new group %u GDT block "E3FSBLK" not reserved", @@ -519,7 +519,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, struct buffer_head *dind; struct ext3_iloc iloc; ext3_fsblk_t blk; - __u32 *data, *end; + __le32 *data, *end; int gdbackups = 0; int res, i; int err; @@ -536,8 +536,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; - data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; - end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); + data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; + end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { @@ -545,7 +545,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, ext3_warning(sb, __FUNCTION__, "reserved block "E3FSBLK " not at offset %ld", - blk, (long)(data - (__u32 *)dind->b_data)); + blk, + (long)(data - (__le32 *)dind->b_data)); err = -EINVAL; goto exit_bh; } @@ -560,7 +561,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, goto exit_bh; } if (++data >= end) - data = (__u32 *)dind->b_data; + data = (__le32 *)dind->b_data; } for (i = 0; i < reserved_gdb; i++) { @@ -584,7 +585,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, blk = input->group * EXT3_BLOCKS_PER_GROUP(sb); for (i = 0; i < reserved_gdb; i++) { int err2; - data = (__u32 *)primary[i]->b_data; + data = (__le32 *)primary[i]->b_data; /* printk("reserving backup %lu[%u] = %lu\n", primary[i]->b_blocknr, gdbackups, blk + primary[i]->b_blocknr); */ @@ -689,7 +690,7 @@ exit_err: "can't update backup for group %d (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT3_VALID_FS; - sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); + sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS); mark_buffer_dirty(sbi->s_sbh); } } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0544325d9d7e..10985017765b 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2348,10 +2348,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) */ ext3_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); - if ((ret = ext3_group_extend(sb, es, n_blocks_count))) { - err = ret; + if ((err = ext3_group_extend(sb, es, n_blocks_count))) goto restore_opts; - } if (!ext3_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b571209d6a1e..2fc66c3e6681 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1094,7 +1094,7 @@ int journal_load(journal_t *journal) /* * Create a slab for this blocksize */ - err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); + err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); if (err) return err; diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 369c67502346..cc08f56750da 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -460,7 +460,7 @@ struct ext3_super_block { */ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ - __u16 s_reserved_gdt_blocks; /* Per group desc for online growth */ + __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ /* * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. */ -- cgit v1.2.3-71-gd317 From 133d205a18b7a4d8cb52959c5310f6664277cf61 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 27 Sep 2006 01:49:41 -0700 Subject: [PATCH] Make kmem_cache_destroy() return void un-, de-, -free, -destroy, -exit, etc functions should in general return void. Also, There is very little, say, filesystem driver code can do upon failed kmem_cache_destroy(). If it will be decided to BUG in this case, BUG should be put in generic code, instead. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 4 ++-- mm/slab.c | 6 ++---- mm/slob.c | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 66d6eb78d1c6..a96fd9310d55 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -60,7 +60,7 @@ extern void __init kmem_cache_init(void); extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *, kmem_cache_t *, unsigned long), void (*)(void *, kmem_cache_t *, unsigned long)); -extern int kmem_cache_destroy(kmem_cache_t *); +extern void kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *); extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); @@ -249,7 +249,7 @@ struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, unsigned long, void (*)(void *, struct kmem_cache *, unsigned long), void (*)(void *, struct kmem_cache *, unsigned long)); -int kmem_cache_destroy(struct kmem_cache *c); +void kmem_cache_destroy(struct kmem_cache *c); void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags); void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); void kmem_cache_free(struct kmem_cache *c, void *b); diff --git a/mm/slab.c b/mm/slab.c index 7a48eb1a60c8..c52ebf9c4462 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2442,7 +2442,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); * @cachep: the cache to destroy * * Remove a struct kmem_cache object from the slab cache. - * Returns 0 on success. * * It is expected this function will be called by a module when it is * unloaded. This will remove the cache completely, and avoid a duplicate @@ -2454,7 +2453,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); * The caller must guarantee that noone will allocate memory from the cache * during the kmem_cache_destroy(). */ -int kmem_cache_destroy(struct kmem_cache *cachep) +void kmem_cache_destroy(struct kmem_cache *cachep) { BUG_ON(!cachep || in_interrupt()); @@ -2475,7 +2474,7 @@ int kmem_cache_destroy(struct kmem_cache *cachep) list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); unlock_cpu_hotplug(); - return 1; + return; } if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) @@ -2483,7 +2482,6 @@ int kmem_cache_destroy(struct kmem_cache *cachep) __kmem_cache_destroy(cachep); unlock_cpu_hotplug(); - return 0; } EXPORT_SYMBOL(kmem_cache_destroy); diff --git a/mm/slob.c b/mm/slob.c index 20188627347c..542394184a58 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -270,10 +270,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, } EXPORT_SYMBOL(kmem_cache_create); -int kmem_cache_destroy(struct kmem_cache *c) +void kmem_cache_destroy(struct kmem_cache *c) { slob_free(c, sizeof(struct kmem_cache)); - return 0; } EXPORT_SYMBOL(kmem_cache_destroy); -- cgit v1.2.3-71-gd317 From c713216deebd95d2b0ab38fef8bb2361c0180c2d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 27 Sep 2006 01:49:43 -0700 Subject: [PATCH] Introduce mechanism for registering active regions of memory At a basic level, architectures define structures to record where active ranges of page frames are located. Once located, the code to calculate zone sizes and holes in each architecture is very similar. Some of this zone and hole sizing code is difficult to read for no good reason. This set of patches eliminates the similar-looking architecture-specific code. The patches introduce a mechanism where architectures register where the active ranges of page frames are with add_active_range(). When all areas have been discovered, free_area_init_nodes() is called to initialise the pgdat and zones. The zone sizes and holes are then calculated in an architecture independent manner. Patch 1 introduces the mechanism for registering and initialising PFN ranges Patch 2 changes ppc to use the mechanism - 139 arch-specific LOC removed Patch 3 changes x86 to use the mechanism - 136 arch-specific LOC removed Patch 4 changes x86_64 to use the mechanism - 74 arch-specific LOC removed Patch 5 changes ia64 to use the mechanism - 52 arch-specific LOC removed Patch 6 accounts for mem_map as a memory hole as the pages are not reclaimable. It adjusts the watermarks slightly Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig, gensparse_defconfig and defconfig. Bob Picco has also tested and debugged on IA64. Jack Steiner successfully boot tested on a mammoth SGI IA64-based machine. These were on patches against 2.6.17-rc1 and release 3 of these patches but there have been no ia64-changes since release 3. There are differences in the zone sizes for x86_64 as the arch-specific code for x86_64 accounts the kernel image and the starting mem_maps as memory holes but the architecture-independent code accounts the memory as present. The big benefit of this set of patches is a sizable reduction of architecture-specific code, some of which is very hairy. There should be a greater reduction when other architectures use the same mechanisms for zone and hole sizing but I lack the hardware to test on. Additional credit; Dave Hansen for the initial suggestion and comments on early patches Andy Whitcroft for reviewing early versions and catching numerous errors Tony Luck for testing and debugging on IA64 Bob Picco for fixing bugs related to pfn registration, reviewing a number of patch revisions, providing a number of suggestions on future direction and testing heavily Jack Steiner and Robin Holt for testing on IA64 and clarifying issues related to memory holes Yasunori for testing on IA64 Andi Kleen for reviewing and feeding back about x86_64 Christian Kujau for providing valuable information related to ACPI problems on x86_64 and testing potential fixes This patch: Define the structure to represent an active range of page frames within a node in an architecture independent manner. Architectures are expected to register active ranges of PFNs using add_active_range(nid, start_pfn, end_pfn) and call free_area_init_nodes() passing the PFNs of the end of each zone. Signed-off-by: Mel Gorman Signed-off-by: Bob Picco Cc: Dave Hansen Cc: Andy Whitcroft Cc: Andi Kleen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Keith Mannthey" Cc: "Luck, Tony" Cc: KAMEZAWA Hiroyuki Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 47 +++++ include/linux/mmzone.h | 10 +- mm/page_alloc.c | 552 ++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 584 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 856f0ee7e84a..c0402da7cce0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -937,6 +937,53 @@ extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, pg_data_t *pgdat, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +/* + * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its + * zones, allocate the backing mem_map and account for memory holes in a more + * architecture independent manner. This is a substitute for creating the + * zone_sizes[] and zholes_size[] arrays and passing them to + * free_area_init_node() + * + * An architecture is expected to register range of page frames backed by + * physical memory with add_active_range() before calling + * free_area_init_nodes() passing in the PFN each zone ends at. At a basic + * usage, an architecture is expected to do something like + * + * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, + * max_highmem_pfn}; + * for_each_valid_physical_page_range() + * add_active_range(node_id, start_pfn, end_pfn) + * free_area_init_nodes(max_zone_pfns); + * + * If the architecture guarantees that there are no holes in the ranges + * registered with add_active_range(), free_bootmem_active_regions() + * will call free_bootmem_node() for each registered physical page range. + * Similarly sparse_memory_present_with_active_regions() calls + * memory_present() for each range when SPARSEMEM is enabled. + * + * See mm/page_alloc.c for more information on each function exposed by + * CONFIG_ARCH_POPULATES_NODE_MAP + */ +extern void free_area_init_nodes(unsigned long *max_zone_pfn); +extern void add_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); +extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, + unsigned long new_end_pfn); +extern void remove_all_active_ranges(void); +extern unsigned long absent_pages_in_range(unsigned long start_pfn, + unsigned long end_pfn); +extern void get_pfn_range_for_nid(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn); +extern unsigned long find_min_pfn_with_active_regions(void); +extern unsigned long find_max_pfn_with_active_regions(void); +extern void free_bootmem_with_active_regions(int nid, + unsigned long max_low_pfn); +extern void sparse_memory_present_with_active_regions(int nid); +#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +extern int early_pfn_to_nid(unsigned long pfn); +#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); extern void setup_per_zone_pages_min(void); extern void mem_init(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3693f1a52788..7fa1cbe9fa7a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -305,6 +305,13 @@ struct zonelist { struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited }; +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +struct node_active_region { + unsigned long start_pfn; + unsigned long end_pfn; + int nid; +}; +#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ /* * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM @@ -518,7 +525,8 @@ extern struct zone *next_zone(struct zone *zone); #endif -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ + !defined(CONFIG_ARCH_POPULATES_NODE_MAP) #define early_pfn_to_nid(nid) (0UL) #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9810f0a60db7..26c9939857fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -103,6 +105,33 @@ int min_free_kbytes = 1024; unsigned long __meminitdata nr_kernel_pages; unsigned long __meminitdata nr_all_pages; +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP + /* + * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct + * ranges of memory (RAM) that may be registered with add_active_range(). + * Ranges passed to add_active_range() will be merged if possible + * so the number of times add_active_range() can be called is + * related to the number of nodes and the number of holes + */ + #ifdef CONFIG_MAX_ACTIVE_REGIONS + /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ + #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS + #else + #if MAX_NUMNODES >= 32 + /* If there can be many nodes, allow up to 50 holes per node */ + #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) + #else + /* By default, allow up to 256 distinct regions */ + #define MAX_ACTIVE_REGIONS 256 + #endif + #endif + + struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS]; + int __initdata nr_nodemap_entries; + unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; + unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; +#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ + #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { @@ -1642,25 +1671,6 @@ static inline unsigned long wait_table_bits(unsigned long size) #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) -static void __init calculate_zone_totalpages(struct pglist_data *pgdat, - unsigned long *zones_size, unsigned long *zholes_size) -{ - unsigned long realtotalpages, totalpages = 0; - enum zone_type i; - - for (i = 0; i < MAX_NR_ZONES; i++) - totalpages += zones_size[i]; - pgdat->node_spanned_pages = totalpages; - - realtotalpages = totalpages; - if (zholes_size) - for (i = 0; i < MAX_NR_ZONES; i++) - realtotalpages -= zholes_size[i]; - pgdat->node_present_pages = realtotalpages; - printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); -} - - /* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is @@ -1977,6 +1987,272 @@ __meminit int init_currently_empty_zone(struct zone *zone, return 0; } +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +/* + * Basic iterator support. Return the first range of PFNs for a node + * Note: nid == MAX_NUMNODES returns first region regardless of node + */ +static int __init first_active_region_index_in_nid(int nid) +{ + int i; + + for (i = 0; i < nr_nodemap_entries; i++) + if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) + return i; + + return -1; +} + +/* + * Basic iterator support. Return the next active range of PFNs for a node + * Note: nid == MAX_NUMNODES returns next region regardles of node + */ +static int __init next_active_region_index_in_nid(int index, int nid) +{ + for (index = index + 1; index < nr_nodemap_entries; index++) + if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) + return index; + + return -1; +} + +#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +/* + * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. + * Architectures may implement their own version but if add_active_range() + * was used and there are no special requirements, this is a convenient + * alternative + */ +int __init early_pfn_to_nid(unsigned long pfn) +{ + int i; + + for (i = 0; i < nr_nodemap_entries; i++) { + unsigned long start_pfn = early_node_map[i].start_pfn; + unsigned long end_pfn = early_node_map[i].end_pfn; + + if (start_pfn <= pfn && pfn < end_pfn) + return early_node_map[i].nid; + } + + return 0; +} +#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ + +/* Basic iterator support to walk early_node_map[] */ +#define for_each_active_range_index_in_nid(i, nid) \ + for (i = first_active_region_index_in_nid(nid); i != -1; \ + i = next_active_region_index_in_nid(i, nid)) + +/** + * free_bootmem_with_active_regions - Call free_bootmem_node for each active range + * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed + * @max_low_pfn: The highest PFN that till be passed to free_bootmem_node + * + * If an architecture guarantees that all ranges registered with + * add_active_ranges() contain no holes and may be freed, this + * this function may be used instead of calling free_bootmem() manually. + */ +void __init free_bootmem_with_active_regions(int nid, + unsigned long max_low_pfn) +{ + int i; + + for_each_active_range_index_in_nid(i, nid) { + unsigned long size_pages = 0; + unsigned long end_pfn = early_node_map[i].end_pfn; + + if (early_node_map[i].start_pfn >= max_low_pfn) + continue; + + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; + + size_pages = end_pfn - early_node_map[i].start_pfn; + free_bootmem_node(NODE_DATA(early_node_map[i].nid), + PFN_PHYS(early_node_map[i].start_pfn), + size_pages << PAGE_SHIFT); + } +} + +/** + * sparse_memory_present_with_active_regions - Call memory_present for each active range + * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used + * + * If an architecture guarantees that all ranges registered with + * add_active_ranges() contain no holes and may be freed, this + * this function may be used instead of calling memory_present() manually. + */ +void __init sparse_memory_present_with_active_regions(int nid) +{ + int i; + + for_each_active_range_index_in_nid(i, nid) + memory_present(early_node_map[i].nid, + early_node_map[i].start_pfn, + early_node_map[i].end_pfn); +} + +/** + * get_pfn_range_for_nid - Return the start and end page frames for a node + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned + * @start_pfn: Passed by reference. On return, it will have the node start_pfn + * @end_pfn: Passed by reference. On return, it will have the node end_pfn + * + * It returns the start and end page frame of a node based on information + * provided by an arch calling add_active_range(). If called for a node + * with no available memory, a warning is printed and the start and end + * PFNs will be 0 + */ +void __init get_pfn_range_for_nid(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + int i; + *start_pfn = -1UL; + *end_pfn = 0; + + for_each_active_range_index_in_nid(i, nid) { + *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); + *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); + } + + if (*start_pfn == -1UL) { + printk(KERN_WARNING "Node %u active with no memory\n", nid); + *start_pfn = 0; + } +} + +/* + * Return the number of pages a zone spans in a node, including holes + * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() + */ +unsigned long __init zone_spanned_pages_in_node(int nid, + unsigned long zone_type, + unsigned long *ignored) +{ + unsigned long node_start_pfn, node_end_pfn; + unsigned long zone_start_pfn, zone_end_pfn; + + /* Get the start and end of the node and zone */ + get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); + zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; + zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; + + /* Check that this node has pages within the zone's required range */ + if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) + return 0; + + /* Move the zone boundaries inside the node if necessary */ + zone_end_pfn = min(zone_end_pfn, node_end_pfn); + zone_start_pfn = max(zone_start_pfn, node_start_pfn); + + /* Return the spanned pages */ + return zone_end_pfn - zone_start_pfn; +} + +/* + * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, + * then all holes in the requested range will be accounted for + */ +unsigned long __init __absent_pages_in_range(int nid, + unsigned long range_start_pfn, + unsigned long range_end_pfn) +{ + int i = 0; + unsigned long prev_end_pfn = 0, hole_pages = 0; + unsigned long start_pfn; + + /* Find the end_pfn of the first active range of pfns in the node */ + i = first_active_region_index_in_nid(nid); + if (i == -1) + return 0; + + prev_end_pfn = early_node_map[i].start_pfn; + + /* Find all holes for the zone within the node */ + for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { + + /* No need to continue if prev_end_pfn is outside the zone */ + if (prev_end_pfn >= range_end_pfn) + break; + + /* Make sure the end of the zone is not within the hole */ + start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); + prev_end_pfn = max(prev_end_pfn, range_start_pfn); + + /* Update the hole size cound and move on */ + if (start_pfn > range_start_pfn) { + BUG_ON(prev_end_pfn > start_pfn); + hole_pages += start_pfn - prev_end_pfn; + } + prev_end_pfn = early_node_map[i].end_pfn; + } + + return hole_pages; +} + +/** + * absent_pages_in_range - Return number of page frames in holes within a range + * @start_pfn: The start PFN to start searching for holes + * @end_pfn: The end PFN to stop searching for holes + * + * It returns the number of pages frames in memory holes within a range + */ +unsigned long __init absent_pages_in_range(unsigned long start_pfn, + unsigned long end_pfn) +{ + return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn); +} + +/* Return the number of page frames in holes in a zone on a node */ +unsigned long __init zone_absent_pages_in_node(int nid, + unsigned long zone_type, + unsigned long *ignored) +{ + return __absent_pages_in_range(nid, + arch_zone_lowest_possible_pfn[zone_type], + arch_zone_highest_possible_pfn[zone_type]); +} +#else +static inline unsigned long zone_spanned_pages_in_node(int nid, + unsigned long zone_type, + unsigned long *zones_size) +{ + return zones_size[zone_type]; +} + +static inline unsigned long zone_absent_pages_in_node(int nid, + unsigned long zone_type, + unsigned long *zholes_size) +{ + if (!zholes_size) + return 0; + + return zholes_size[zone_type]; +} +#endif + +static void __init calculate_node_totalpages(struct pglist_data *pgdat, + unsigned long *zones_size, unsigned long *zholes_size) +{ + unsigned long realtotalpages, totalpages = 0; + enum zone_type i; + + for (i = 0; i < MAX_NR_ZONES; i++) + totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, + zones_size); + pgdat->node_spanned_pages = totalpages; + + realtotalpages = totalpages; + for (i = 0; i < MAX_NR_ZONES; i++) + realtotalpages -= + zone_absent_pages_in_node(pgdat->node_id, i, + zholes_size); + pgdat->node_present_pages = realtotalpages; + printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, + realtotalpages); +} + /* * Set up the zone data structures: * - mark all pages reserved @@ -2000,9 +2276,9 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize; - realsize = size = zones_size[j]; - if (zholes_size) - realsize -= zholes_size[j]; + size = zone_spanned_pages_in_node(nid, j, zones_size); + realsize = size - zone_absent_pages_in_node(nid, j, + zholes_size); if (!is_highmem_idx(j)) nr_kernel_pages += realsize; @@ -2073,8 +2349,13 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) /* * With no DISCONTIG, the global mem_map is just set as node 0's */ - if (pgdat == NODE_DATA(0)) + if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP + if (page_to_pfn(mem_map) != pgdat->node_start_pfn) + mem_map -= pgdat->node_start_pfn; +#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ + } #endif #endif /* CONFIG_FLAT_NODE_MEM_MAP */ } @@ -2085,13 +2366,236 @@ void __meminit free_area_init_node(int nid, struct pglist_data *pgdat, { pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; - calculate_zone_totalpages(pgdat, zones_size, zholes_size); + calculate_node_totalpages(pgdat, zones_size, zholes_size); alloc_node_mem_map(pgdat); free_area_init_core(pgdat, zones_size, zholes_size); } +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +/** + * add_active_range - Register a range of PFNs backed by physical memory + * @nid: The node ID the range resides on + * @start_pfn: The start PFN of the available physical memory + * @end_pfn: The end PFN of the available physical memory + * + * These ranges are stored in an early_node_map[] and later used by + * free_area_init_nodes() to calculate zone sizes and holes. If the + * range spans a memory hole, it is up to the architecture to ensure + * the memory is not freed by the bootmem allocator. If possible + * the range being registered will be merged with existing ranges. + */ +void __init add_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + int i; + + printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " + "%d entries of %d used\n", + nid, start_pfn, end_pfn, + nr_nodemap_entries, MAX_ACTIVE_REGIONS); + + /* Merge with existing active regions if possible */ + for (i = 0; i < nr_nodemap_entries; i++) { + if (early_node_map[i].nid != nid) + continue; + + /* Skip if an existing region covers this new one */ + if (start_pfn >= early_node_map[i].start_pfn && + end_pfn <= early_node_map[i].end_pfn) + return; + + /* Merge forward if suitable */ + if (start_pfn <= early_node_map[i].end_pfn && + end_pfn > early_node_map[i].end_pfn) { + early_node_map[i].end_pfn = end_pfn; + return; + } + + /* Merge backward if suitable */ + if (start_pfn < early_node_map[i].end_pfn && + end_pfn >= early_node_map[i].start_pfn) { + early_node_map[i].start_pfn = start_pfn; + return; + } + } + + /* Check that early_node_map is large enough */ + if (i >= MAX_ACTIVE_REGIONS) { + printk(KERN_CRIT "More than %d memory regions, truncating\n", + MAX_ACTIVE_REGIONS); + return; + } + + early_node_map[i].nid = nid; + early_node_map[i].start_pfn = start_pfn; + early_node_map[i].end_pfn = end_pfn; + nr_nodemap_entries = i + 1; +} + +/** + * shrink_active_range - Shrink an existing registered range of PFNs + * @nid: The node id the range is on that should be shrunk + * @old_end_pfn: The old end PFN of the range + * @new_end_pfn: The new PFN of the range + * + * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. + * The map is kept at the end physical page range that has already been + * registered with add_active_range(). This function allows an arch to shrink + * an existing registered range. + */ +void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, + unsigned long new_end_pfn) +{ + int i; + + /* Find the old active region end and shrink */ + for_each_active_range_index_in_nid(i, nid) + if (early_node_map[i].end_pfn == old_end_pfn) { + early_node_map[i].end_pfn = new_end_pfn; + break; + } +} + +/** + * remove_all_active_ranges - Remove all currently registered regions + * During discovery, it may be found that a table like SRAT is invalid + * and an alternative discovery method must be used. This function removes + * all currently registered regions. + */ +void __init remove_all_active_ranges() +{ + memset(early_node_map, 0, sizeof(early_node_map)); + nr_nodemap_entries = 0; +} + +/* Compare two active node_active_regions */ +static int __init cmp_node_active_region(const void *a, const void *b) +{ + struct node_active_region *arange = (struct node_active_region *)a; + struct node_active_region *brange = (struct node_active_region *)b; + + /* Done this way to avoid overflows */ + if (arange->start_pfn > brange->start_pfn) + return 1; + if (arange->start_pfn < brange->start_pfn) + return -1; + + return 0; +} + +/* sort the node_map by start_pfn */ +static void __init sort_node_map(void) +{ + sort(early_node_map, (size_t)nr_nodemap_entries, + sizeof(struct node_active_region), + cmp_node_active_region, NULL); +} + +/* Find the lowest pfn for a node. This depends on a sorted early_node_map */ +unsigned long __init find_min_pfn_for_node(unsigned long nid) +{ + int i; + + /* Assuming a sorted map, the first range found has the starting pfn */ + for_each_active_range_index_in_nid(i, nid) + return early_node_map[i].start_pfn; + + printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); + return 0; +} + +/** + * find_min_pfn_with_active_regions - Find the minimum PFN registered + * + * It returns the minimum PFN based on information provided via + * add_active_range() + */ +unsigned long __init find_min_pfn_with_active_regions(void) +{ + return find_min_pfn_for_node(MAX_NUMNODES); +} + +/** + * find_max_pfn_with_active_regions - Find the maximum PFN registered + * + * It returns the maximum PFN based on information provided via + * add_active_range() + */ +unsigned long __init find_max_pfn_with_active_regions(void) +{ + int i; + unsigned long max_pfn = 0; + + for (i = 0; i < nr_nodemap_entries; i++) + max_pfn = max(max_pfn, early_node_map[i].end_pfn); + + return max_pfn; +} + +/** + * free_area_init_nodes - Initialise all pg_data_t and zone data + * @arch_max_dma_pfn: The maximum PFN usable for ZONE_DMA + * @arch_max_dma32_pfn: The maximum PFN usable for ZONE_DMA32 + * @arch_max_low_pfn: The maximum PFN usable for ZONE_NORMAL + * @arch_max_high_pfn: The maximum PFN usable for ZONE_HIGHMEM + * + * This will call free_area_init_node() for each active node in the system. + * Using the page ranges provided by add_active_range(), the size of each + * zone in each node and their holes is calculated. If the maximum PFN + * between two adjacent zones match, it is assumed that the zone is empty. + * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed + * that arch_max_dma32_pfn has no pages. It is also assumed that a zone + * starts where the previous one ended. For example, ZONE_DMA32 starts + * at arch_max_dma_pfn. + */ +void __init free_area_init_nodes(unsigned long *max_zone_pfn) +{ + unsigned long nid; + enum zone_type i; + + /* Record where the zone boundaries are */ + memset(arch_zone_lowest_possible_pfn, 0, + sizeof(arch_zone_lowest_possible_pfn)); + memset(arch_zone_highest_possible_pfn, 0, + sizeof(arch_zone_highest_possible_pfn)); + arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); + arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; + for (i = 1; i < MAX_NR_ZONES; i++) { + arch_zone_lowest_possible_pfn[i] = + arch_zone_highest_possible_pfn[i-1]; + arch_zone_highest_possible_pfn[i] = + max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); + } + + /* Regions in the early_node_map can be in any order */ + sort_node_map(); + + /* Print out the zone ranges */ + printk("Zone PFN ranges:\n"); + for (i = 0; i < MAX_NR_ZONES; i++) + printk(" %-8s %8lu -> %8lu\n", + zone_names[i], + arch_zone_lowest_possible_pfn[i], + arch_zone_highest_possible_pfn[i]); + + /* Print out the early_node_map[] */ + printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); + for (i = 0; i < nr_nodemap_entries; i++) + printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, + early_node_map[i].start_pfn, + early_node_map[i].end_pfn); + + /* Initialise every node */ + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + free_area_init_node(nid, pgdat, NULL, + find_min_pfn_for_node(nid), NULL); + } +} +#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ + #ifndef CONFIG_NEED_MULTIPLE_NODES static bootmem_data_t contig_bootmem_data; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; -- cgit v1.2.3-71-gd317 From 0e0b864e069c52a7b3e4a7da56e29b03a012fd75 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 27 Sep 2006 01:49:56 -0700 Subject: [PATCH] Account for memmap and optionally the kernel image as holes The x86_64 code accounted for memmap and some portions of the the DMA zone as holes. This was because those areas would never be reclaimed and accounting for them as memory affects min watermarks. This patch will account for the memmap as a memory hole. Architectures may optionally use set_dma_reserve() if they wish to account for a portion of memory in ZONE_DMA as a hole. Signed-off-by: Mel Gorman Cc: Dave Hansen Cc: Andy Whitcroft Cc: Andi Kleen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Keith Mannthey" Cc: "Luck, Tony" Cc: KAMEZAWA Hiroyuki Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/init.c | 4 +++- include/linux/mm.h | 1 + mm/page_alloc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 47928399e38a..3e16fe08150e 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -655,8 +655,10 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) #else reserve_bootmem(phys, len); #endif - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; + set_dma_reserve(dma_reserve); + } } int kern_addr_valid(unsigned long addr) diff --git a/include/linux/mm.h b/include/linux/mm.h index c0402da7cce0..22936e1fcdf2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid); extern int early_pfn_to_nid(unsigned long pfn); #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); extern void setup_per_zone_pages_min(void); extern void mem_init(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 26c9939857fa..8d9a1eb9fbba 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -104,6 +104,7 @@ int min_free_kbytes = 1024; unsigned long __meminitdata nr_kernel_pages; unsigned long __meminitdata nr_all_pages; +static unsigned long __initdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* @@ -2213,6 +2214,20 @@ unsigned long __init zone_absent_pages_in_node(int nid, arch_zone_lowest_possible_pfn[zone_type], arch_zone_highest_possible_pfn[zone_type]); } + +/* Return the zone index a PFN is in */ +int memmap_zone_idx(struct page *lmem_map) +{ + int i; + unsigned long phys_addr = virt_to_phys(lmem_map); + unsigned long pfn = phys_addr >> PAGE_SHIFT; + + for (i = 0; i < MAX_NR_ZONES; i++) + if (pfn < arch_zone_highest_possible_pfn[i]) + break; + + return i; +} #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -2230,6 +2245,11 @@ static inline unsigned long zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } + +static inline int memmap_zone_idx(struct page *lmem_map) +{ + return MAX_NR_ZONES; +} #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, @@ -2274,12 +2294,35 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; - unsigned long size, realsize; + unsigned long size, realsize, memmap_pages; size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, zholes_size); + /* + * Adjust realsize so that it accounts for how much memory + * is used by this zone for memmap. This affects the watermark + * and per-cpu initialisations + */ + memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; + if (realsize >= memmap_pages) { + realsize -= memmap_pages; + printk(KERN_DEBUG + " %s zone: %lu pages used for memmap\n", + zone_names[j], memmap_pages); + } else + printk(KERN_WARNING + " %s zone: %lu pages exceeds realsize %lu\n", + zone_names[j], memmap_pages, realsize); + + /* Account for reserved DMA pages */ + if (j == ZONE_DMA && realsize > dma_reserve) { + realsize -= dma_reserve; + printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", + dma_reserve); + } + if (!is_highmem_idx(j)) nr_kernel_pages += realsize; nr_all_pages += realsize; @@ -2596,6 +2639,21 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) } #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +/** + * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA + * @new_dma_reserve - The number of pages to mark reserved + * + * The per-cpu batchsize and zone watermarks are determined by present_pages. + * In the DMA zone, a significant percentage may be consumed by kernel image + * and other unfreeable allocations which can skew the watermarks badly. This + * function may optionally be used to account for unfreeable pages in + * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize + */ +void __init set_dma_reserve(unsigned long new_dma_reserve) +{ + dma_reserve = new_dma_reserve; +} + #ifndef CONFIG_NEED_MULTIPLE_NODES static bootmem_data_t contig_bootmem_data; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; -- cgit v1.2.3-71-gd317 From fb01439c5b778d5974a488c5d4fe85e6d0e18a68 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 27 Sep 2006 01:49:59 -0700 Subject: [PATCH] Allow an arch to expand node boundaries Arch-independent zone-sizing determines the size of a node (pgdat->node_spanned_pages) based on the physical memory that was registered by the architecture. However, when CONFIG_MEMORY_HOTPLUG_RESERVE is set, the architecture expects that the spanned_pages will be much larger and that mem_map will be allocated that is used lated on memory hot-add. This patch allows an architecture that sets CONFIG_MEMORY_HOTPLUG_RESERVE to call push_node_boundaries() which will set the node beginning and end to at *least* the requested boundary. Cc: Dave Hansen Cc: Andy Whitcroft Cc: Andi Kleen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Keith Mannthey" Cc: "Luck, Tony" Cc: KAMEZAWA Hiroyuki Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/srat.c | 2 ++ include/linux/mm.h | 2 ++ mm/page_alloc.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) (limited to 'include/linux') diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index db1b2e11cf8f..f8c04d6935c9 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -324,6 +324,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) nd->start, nd->end); e820_register_active_regions(node, nd->start >> PAGE_SHIFT, nd->end >> PAGE_SHIFT); + push_node_boundaries(node, nd->start >> PAGE_SHIFT, + nd->end >> PAGE_SHIFT); #ifdef RESERVE_HOTADD if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 22936e1fcdf2..9d046db31e76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -970,6 +970,8 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, unsigned long new_end_pfn); +extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); extern void remove_all_active_ranges(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 75133e1dc4b1..acbf58f8a1b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -131,6 +131,10 @@ static unsigned long __initdata dma_reserve; int __initdata nr_nodemap_entries; unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE + unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES]; + unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES]; +#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ #ifdef CONFIG_DEBUG_VM @@ -2094,6 +2098,62 @@ void __init sparse_memory_present_with_active_regions(int nid) early_node_map[i].end_pfn); } +/** + * push_node_boundaries - Push node boundaries to at least the requested boundary + * @nid: The nid of the node to push the boundary for + * @start_pfn: The start pfn of the node + * @end_pfn: The end pfn of the node + * + * In reserve-based hot-add, mem_map is allocated that is unused until hotadd + * time. Specifically, on x86_64, SRAT will report ranges that can potentially + * be hotplugged even though no physical memory exists. This function allows + * an arch to push out the node boundaries so mem_map is allocated that can + * be used later. + */ +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE +void __init push_node_boundaries(unsigned int nid, + unsigned long start_pfn, unsigned long end_pfn) +{ + printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n", + nid, start_pfn, end_pfn); + + /* Initialise the boundary for this node if necessary */ + if (node_boundary_end_pfn[nid] == 0) + node_boundary_start_pfn[nid] = -1UL; + + /* Update the boundaries */ + if (node_boundary_start_pfn[nid] > start_pfn) + node_boundary_start_pfn[nid] = start_pfn; + if (node_boundary_end_pfn[nid] < end_pfn) + node_boundary_end_pfn[nid] = end_pfn; +} + +/* If necessary, push the node boundary out for reserve hotadd */ +static void __init account_node_boundary(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n", + nid, *start_pfn, *end_pfn); + + /* Return if boundary information has not been provided */ + if (node_boundary_end_pfn[nid] == 0) + return; + + /* Check the boundaries and update if necessary */ + if (node_boundary_start_pfn[nid] < *start_pfn) + *start_pfn = node_boundary_start_pfn[nid]; + if (node_boundary_end_pfn[nid] > *end_pfn) + *end_pfn = node_boundary_end_pfn[nid]; +} +#else +void __init push_node_boundaries(unsigned int nid, + unsigned long start_pfn, unsigned long end_pfn) {} + +static void __init account_node_boundary(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) {} +#endif + + /** * get_pfn_range_for_nid - Return the start and end page frames for a node * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned @@ -2121,6 +2181,9 @@ void __init get_pfn_range_for_nid(unsigned int nid, printk(KERN_WARNING "Node %u active with no memory\n", nid); *start_pfn = 0; } + + /* Push the node boundaries out if requested */ + account_node_boundary(nid, start_pfn, end_pfn); } /* @@ -2527,6 +2590,10 @@ void __init remove_all_active_ranges() { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE + memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); + memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); +#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ } /* Compare two active node_active_regions */ -- cgit v1.2.3-71-gd317 From e129b5c23c2b471d47f1c5d2b8b193fc2034af43 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Sep 2006 01:50:00 -0700 Subject: [PATCH] vm: add per-zone writeout counter The VM is supposed to minimise the number of pages which get written off the LRU (for IO scheduling efficiency, and for high reclaim-success rates). But we don't actually have a clear way of showing how true this is. So add `nr_vmscan_write' to /proc/vmstat and /proc/zoneinfo - the number of pages which have been written by the vm scanner in this zone and globally. Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + mm/vmscan.c | 3 ++- mm/vmstat.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7fa1cbe9fa7a..1b0680cd84d2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -58,6 +58,7 @@ enum zone_stat_item { NR_WRITEBACK, NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, + NR_VMSCAN_WRITE, #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 87779dda4ec6..87d340999ce8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -370,7 +371,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) /* synchronous write or broken a_ops? */ ClearPageReclaim(page); } - + inc_zone_page_state(page, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 490d8c1a0ded..69c657132e1f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -465,6 +465,7 @@ static char *vmstat_text[] = { "nr_writeback", "nr_unstable", "nr_bounce", + "nr_vmscan_write", #ifdef CONFIG_NUMA "numa_hit", -- cgit v1.2.3-71-gd317 From 5b99cd0effaf846240a15441aec459a592577eaf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 27 Sep 2006 01:50:01 -0700 Subject: [PATCH] own header file for struct page This moves the definition of struct page from mm.h to its own header file page-struct.h. This is a prereq to fix SetPageUptodate which is broken on s390: #define SetPageUptodate(_page) do { struct page *__page = (_page); if (!test_and_set_bit(PG_uptodate, &__page->flags)) page_test_and_clear_dirty(_page); } while (0) _page gets used twice in this macro which can cause subtle bugs. Using __page for the page_test_and_clear_dirty call doesn't work since it causes yet another problem with the page_test_and_clear_dirty macro as well. In order to avoid all these problems caused by macros it seems to be a good idea to get rid of them and convert them to static inline functions. Because of header file include order it's necessary to have a seperate header file for the struct page definition. Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 62 +------------------------------------------- include/linux/mm_types.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmzone.h | 5 ++++ 3 files changed, 73 insertions(+), 61 deletions(-) create mode 100644 include/linux/mm_types.h (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d046db31e76..7477fb59c4f2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -16,6 +16,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -215,62 +216,6 @@ struct vm_operations_struct { struct mmu_gather; struct inode; -/* - * Each physical page in the system has a struct page associated with - * it to keep track of whatever it is we are using the page for at the - * moment. Note that we have no way to track which tasks are using - * a page, though if it is a pagecache page, rmap structures can tell us - * who is mapping it. - */ -struct page { - unsigned long flags; /* Atomic flags, some possibly - * updated asynchronously */ - atomic_t _count; /* Usage count, see below. */ - atomic_t _mapcount; /* Count of ptes mapped in mms, - * to show when page is mapped - * & limit reverse map searches. - */ - union { - struct { - unsigned long private; /* Mapping-private opaque data: - * usually used for buffer_heads - * if PagePrivate set; used for - * swp_entry_t if PageSwapCache; - * indicates order in the buddy - * system if PG_buddy is set. - */ - struct address_space *mapping; /* If low bit clear, points to - * inode address_space, or NULL. - * If page mapped as anonymous - * memory, low bit is set, and - * it points to anon_vma object: - * see PAGE_MAPPING_ANON below. - */ - }; -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS - spinlock_t ptl; -#endif - }; - pgoff_t index; /* Our offset within mapping. */ - struct list_head lru; /* Pageout list, eg. active_list - * protected by zone->lru_lock ! - */ - /* - * On machines where all RAM is mapped into kernel address space, - * we can simply calculate the virtual address. On machines with - * highmem some memory is mapped into kernel virtual memory - * dynamically, so we need a place to store that address. - * Note that this field could be 16 bits on x86 ... ;) - * - * Architectures with slow multiplication can define - * WANT_PAGE_VIRTUAL in asm/page.h - */ -#if defined(WANT_PAGE_VIRTUAL) - void *virtual; /* Kernel virtual address (NULL if - not kmapped, ie. highmem) */ -#endif /* WANT_PAGE_VIRTUAL */ -}; - #define page_private(page) ((page)->private) #define set_page_private(page, v) ((page)->private = (v)) @@ -546,11 +491,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ #include -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ -extern struct page *mem_map; -#endif - static __always_inline void *lowmem_page_address(struct page *page) { return __va(page_to_pfn(page) << PAGE_SHIFT); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h new file mode 100644 index 000000000000..c3852fd4a1cc --- /dev/null +++ b/include/linux/mm_types.h @@ -0,0 +1,67 @@ +#ifndef _LINUX_MM_TYPES_H +#define _LINUX_MM_TYPES_H + +#include +#include +#include +#include + +struct address_space; + +/* + * Each physical page in the system has a struct page associated with + * it to keep track of whatever it is we are using the page for at the + * moment. Note that we have no way to track which tasks are using + * a page, though if it is a pagecache page, rmap structures can tell us + * who is mapping it. + */ +struct page { + unsigned long flags; /* Atomic flags, some possibly + * updated asynchronously */ + atomic_t _count; /* Usage count, see below. */ + atomic_t _mapcount; /* Count of ptes mapped in mms, + * to show when page is mapped + * & limit reverse map searches. + */ + union { + struct { + unsigned long private; /* Mapping-private opaque data: + * usually used for buffer_heads + * if PagePrivate set; used for + * swp_entry_t if PageSwapCache; + * indicates order in the buddy + * system if PG_buddy is set. + */ + struct address_space *mapping; /* If low bit clear, points to + * inode address_space, or NULL. + * If page mapped as anonymous + * memory, low bit is set, and + * it points to anon_vma object: + * see PAGE_MAPPING_ANON below. + */ + }; +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + spinlock_t ptl; +#endif + }; + pgoff_t index; /* Our offset within mapping. */ + struct list_head lru; /* Pageout list, eg. active_list + * protected by zone->lru_lock ! + */ + /* + * On machines where all RAM is mapped into kernel address space, + * we can simply calculate the virtual address. On machines with + * highmem some memory is mapped into kernel virtual memory + * dynamically, so we need a place to store that address. + * Note that this field could be 16 bits on x86 ... ;) + * + * Architectures with slow multiplication can define + * WANT_PAGE_VIRTUAL in asm/page.h + */ +#if defined(WANT_PAGE_VIRTUAL) + void *virtual; /* Kernel virtual address (NULL if + not kmapped, ie. highmem) */ +#endif /* WANT_PAGE_VIRTUAL */ +}; + +#endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b0680cd84d2..562cf7a8f3ee 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -314,6 +314,11 @@ struct node_active_region { }; #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#ifndef CONFIG_DISCONTIGMEM +/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +extern struct page *mem_map; +#endif + /* * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM * (mostly NUMA machines?) to denote a higher-level memory zone than the -- cgit v1.2.3-71-gd317 From 08e0f6a9705376732fd3bc9bf8ba97a6b5211eb1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 27 Sep 2006 01:50:06 -0700 Subject: [PATCH] Add NUMA_BUILD definition in kernel.h to avoid #ifdef CONFIG_NUMA The NUMA_BUILD constant is always available and will be set to 1 on NUMA_BUILDs. That way checks valid only under CONFIG_NUMA can easily be done without #ifdef CONFIG_NUMA F.e. if (NUMA_BUILD && ) { ... } [akpm: not a thing we'd normally do, but CONFIG_NUMA is special: it is causing ifdef explosion in core kernel, so let's see if this is a comfortable way in whcih to control that] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 7 +++++++ mm/page_alloc.c | 12 +++++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4fa373bb18ac..4d00988dad03 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -350,4 +350,11 @@ struct sysinfo { /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) +/* This helps us to avoid #ifdef CONFIG_NUMA */ +#ifdef CONFIG_NUMA +#define NUMA_BUILD 1 +#else +#define NUMA_BUILD 0 +#endif + #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8a6418b0d2c5..4c76188b1681 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -942,7 +942,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, */ do { zone = *z; - if (unlikely((gfp_mask & __GFP_THISNODE) && + if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) && zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) break; if ((alloc_flags & ALLOC_CPUSET) && @@ -1256,14 +1256,12 @@ unsigned int nr_free_pagecache_pages(void) { return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); } -#ifdef CONFIG_NUMA -static void show_node(struct zone *zone) + +static inline void show_node(struct zone *zone) { - printk("Node %ld ", zone_to_nid(zone)); + if (NUMA_BUILD) + printk("Node %ld ", zone_to_nid(zone)); } -#else -#define show_node(zone) do { } while (0) -#endif void si_meminfo(struct sysinfo *val) { -- cgit v1.2.3-71-gd317 From 77f700dab4c05f8ee17584ec869672796d7bcb87 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 27 Sep 2006 01:50:07 -0700 Subject: [PATCH] Disable GFP_THISNODE in the non-NUMA case GFP_THISNODE must be set to 0 in the non numa case otherwise we disable retry and warnings for failing allocations in the SMP and UP case. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8b34aabfe4c6..bf2b6bc3f6fd 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -67,7 +67,12 @@ struct vm_area_struct; #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) +#else +#define GFP_THISNODE 0 +#endif + /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ -- cgit v1.2.3-71-gd317 From d5f541ed6e31518508c688912e7464facf253c87 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 27 Sep 2006 01:50:08 -0700 Subject: [PATCH] Add node to zone for the NUMA case Add the node in order to optimize zone_to_nid. Signed-off-by: Christoph Lameter Acked-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +++++- include/linux/mmzone.h | 1 + mm/page_alloc.c | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7477fb59c4f2..8e433bbc6e7e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -446,7 +446,11 @@ static inline struct zone *page_zone(struct page *page) static inline unsigned long zone_to_nid(struct zone *zone) { - return zone->zone_pgdat->node_id; +#ifdef CONFIG_NUMA + return zone->node; +#else + return 0; +#endif } static inline unsigned long page_to_nid(struct page *page) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 562cf7a8f3ee..59855b8718a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -168,6 +168,7 @@ struct zone { unsigned long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA + int node; /* * zone reclaim becomes active if more unmapped pages exist. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4c76188b1681..d0432e44f77d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2405,6 +2405,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; #ifdef CONFIG_NUMA + zone->node = nid; zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; -- cgit v1.2.3-71-gd317 From f4b81804a2d1ab341a4613089dc31ecce0800ed8 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Wed, 27 Sep 2006 01:50:10 -0700 Subject: [PATCH] do_no_pfn() Implement do_no_pfn() for handling mapping of memory without a struct page backing it. This avoids creating fake page table entries for regions which are not backed by real memory. This feature is used by the MSPEC driver and other users, where it is highly undesirable to have a struct page sitting behind the page (for instance if the page is accessed in cached mode via the struct page in parallel to the the driver accessing it uncached, which can result in data corruption on some architectures, such as ia64). This version uses specific NOPFN_{SIGBUS,OOM} return values, rather than expect all negative pfn values would be an error. It also bugs on cow mappings as this would not work with the VM. [akpm@osdl.org: micro-optimise] Signed-off-by: Jes Sorensen Cc: Hugh Dickins Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 ++++++ mm/memory.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 66 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e433bbc6e7e..22165cb18906 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -199,6 +199,7 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); + unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); /* notification that a previously read-only page is about to become @@ -593,6 +594,12 @@ static inline int page_mapped(struct page *page) #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) +/* + * Error return values for the *_nopfn functions + */ +#define NOPFN_SIGBUS ((unsigned long) -1) +#define NOPFN_OOM ((unsigned long) -2) + /* * Different kinds of faults, as returned by handle_mm_fault(). * Used to decide whether a process gets delivered SIGBUS or diff --git a/mm/memory.c b/mm/memory.c index 92a3ebd8d795..f2ef1dcfff77 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2255,6 +2255,54 @@ oom: return VM_FAULT_OOM; } +/* + * do_no_pfn() tries to create a new page mapping for a page without + * a struct_page backing it + * + * As this is called only for pages that do not currently exist, we + * do not need to flush old virtual caches or the TLB. + * + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), and pte mapped but not yet locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. + * + * It is expected that the ->nopfn handler always returns the same pfn + * for a given virtual mapping. + * + * Mark this `noinline' to prevent it from bloating the main pagefault code. + */ +static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + int write_access) +{ + spinlock_t *ptl; + pte_t entry; + unsigned long pfn; + int ret = VM_FAULT_MINOR; + + pte_unmap(page_table); + BUG_ON(!(vma->vm_flags & VM_PFNMAP)); + BUG_ON(is_cow_mapping(vma->vm_flags)); + + pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); + if (pfn == NOPFN_OOM) + return VM_FAULT_OOM; + if (pfn == NOPFN_SIGBUS) + return VM_FAULT_SIGBUS; + + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + + /* Only go through if we didn't race with anybody else... */ + if (pte_none(*page_table)) { + entry = pfn_pte(pfn, vma->vm_page_prot); + if (write_access) + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + set_pte_at(mm, address, page_table, entry); + } + pte_unmap_unlock(page_table, ptl); + return ret; +} + /* * Fault of a previously existing named mapping. Repopulate the pte * from the encoded file_pte if possible. This enables swappable @@ -2317,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm, old_entry = entry = *pte; if (!pte_present(entry)) { if (pte_none(entry)) { - if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(mm, vma, address, - pte, pmd, write_access); - return do_no_page(mm, vma, address, - pte, pmd, write_access); + if (vma->vm_ops) { + if (vma->vm_ops->nopage) + return do_no_page(mm, vma, address, + pte, pmd, + write_access); + if (unlikely(vma->vm_ops->nopfn)) + return do_no_pfn(mm, vma, address, pte, + pmd, write_access); + } + return do_anonymous_page(mm, vma, address, + pte, pmd, write_access); } if (pte_file(entry)) return do_file_page(mm, vma, address, -- cgit v1.2.3-71-gd317 From d24afc57d51b1be41f95521e81399061fa5875a6 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 27 Sep 2006 01:50:13 -0700 Subject: [PATCH] Mark __remove_vm_area() static The function is exported but not used from anywhere else. It's also marked as "not for driver use" so noone out there should really care. Signed-off-by: Rolf Eike Beer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 - mm/vmalloc.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index dee88c6b6fa7..ce5f1482e6be 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -62,7 +62,6 @@ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, extern struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node); extern struct vm_struct *remove_vm_area(void *addr); -extern struct vm_struct *__remove_vm_area(void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); extern void unmap_vm_area(struct vm_struct *area); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 659ec634856a..1ac191ce5641 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -272,7 +272,7 @@ static struct vm_struct *__find_vm_area(void *addr) } /* Caller must hold vmlist_lock */ -struct vm_struct *__remove_vm_area(void *addr) +static struct vm_struct *__remove_vm_area(void *addr) { struct vm_struct **p, *tmp; -- cgit v1.2.3-71-gd317 From 5da6185bca064e35aa73a7c1f27488d2b96434f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Sep 2006 01:50:16 -0700 Subject: [PATCH] NOMMU: Set BDI capabilities for /dev/mem and /dev/kmem Set the backing device info capabilities for /dev/mem and /dev/kmem to permit direct sharing under no-MMU conditions and full mapping capabilities under MMU conditions. Make the BDI used by these available to all directly mappable character devices. Also comment the capabilities for /dev/zero. [akpm@osdl.org: ifdef reductions] Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/mem.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/char_dev.c | 20 ++++++++++++++++++++ include/linux/cdev.h | 2 ++ 3 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 917b20402664..4ac70ec697f0 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -238,6 +238,32 @@ static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } #endif +#ifndef CONFIG_MMU +static unsigned long get_unmapped_area_mem(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + if (!valid_mmap_phys_addr_range(pgoff, len)) + return (unsigned long) -EINVAL; + return pgoff; +} + +/* can't do an in-place private mapping if there's no MMU */ +static inline int private_mapping_ok(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_MAYSHARE; +} +#else +#define get_unmapped_area_mem NULL + +static inline int private_mapping_ok(struct vm_area_struct *vma) +{ + return 1; +} +#endif + static int mmap_mem(struct file * file, struct vm_area_struct * vma) { size_t size = vma->vm_end - vma->vm_start; @@ -245,6 +271,9 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; + if (!private_mapping_ok(vma)) + return -ENOSYS; + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); @@ -782,6 +811,7 @@ static const struct file_operations mem_fops = { .write = write_mem, .mmap = mmap_mem, .open = open_mem, + .get_unmapped_area = get_unmapped_area_mem, }; static const struct file_operations kmem_fops = { @@ -790,6 +820,7 @@ static const struct file_operations kmem_fops = { .write = write_kmem, .mmap = mmap_kmem, .open = open_kmem, + .get_unmapped_area = get_unmapped_area_mem, }; static const struct file_operations null_fops = { @@ -815,6 +846,10 @@ static const struct file_operations zero_fops = { .mmap = mmap_zero, }; +/* + * capabilities for /dev/zero + * - permits private mappings, "copies" are taken of the source of zeros + */ static struct backing_dev_info zero_bdi = { .capabilities = BDI_CAP_MAP_COPY, }; @@ -862,9 +897,13 @@ static int memory_open(struct inode * inode, struct file * filp) switch (iminor(inode)) { case 1: filp->f_op = &mem_fops; + filp->f_mapping->backing_dev_info = + &directly_mappable_cdev_bdi; break; case 2: filp->f_op = &kmem_fops; + filp->f_mapping->backing_dev_info = + &directly_mappable_cdev_bdi; break; case 3: filp->f_op = &null_fops; diff --git a/fs/char_dev.c b/fs/char_dev.c index 3483d3cf8087..0009346d827f 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -19,11 +19,30 @@ #include #include #include +#include #ifdef CONFIG_KMOD #include #endif +/* + * capabilities for /dev/mem, /dev/kmem and similar directly mappable character + * devices + * - permits shared-mmap for read, write and/or exec + * - does not permit private mmap in NOMMU mode (can't do COW) + * - no readahead or I/O queue unplugging required + */ +struct backing_dev_info directly_mappable_cdev_bdi = { + .capabilities = ( +#ifdef CONFIG_MMU + /* permit private copies of the data to be taken */ + BDI_CAP_MAP_COPY | +#endif + /* permit direct mmap, for read, write or exec */ + BDI_CAP_MAP_DIRECT | + BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), +}; + static struct kobj_map *cdev_map; static DEFINE_MUTEX(chrdevs_lock); @@ -461,3 +480,4 @@ EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); EXPORT_SYMBOL(register_chrdev); EXPORT_SYMBOL(unregister_chrdev); +EXPORT_SYMBOL(directly_mappable_cdev_bdi); diff --git a/include/linux/cdev.h b/include/linux/cdev.h index 2216638962d2..ee5f53f2ca15 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -23,5 +23,7 @@ void cdev_del(struct cdev *); void cd_forget(struct inode *); +extern struct backing_dev_info directly_mappable_cdev_bdi; + #endif #endif -- cgit v1.2.3-71-gd317 From dbf8685c8e21404e3a8ed244bd0219d3c4b89101 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Sep 2006 01:50:19 -0700 Subject: [PATCH] NOMMU: Implement /proc/pid/maps for NOMMU Implement /proc/pid/maps for NOMMU by reading the vm_area_list attached to current->mm->context.vmlist. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/nommu-mmap.txt | 3 ++ fs/proc/internal.h | 1 + fs/proc/nommu.c | 20 ++++++++---- fs/proc/task_nommu.c | 74 +++++++++++++++++++++++++++++++++++--------- include/linux/proc_fs.h | 2 ++ 5 files changed, 80 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt index b88ebe4d808c..83f4b083d57e 100644 --- a/Documentation/nommu-mmap.txt +++ b/Documentation/nommu-mmap.txt @@ -116,6 +116,9 @@ FURTHER NOTES ON NO-MMU MMAP (*) A list of all the mappings on the system is visible through /proc/maps in no-MMU mode. + (*) A list of all the mappings in use by a process is visible through + /proc//maps in no-MMU mode. + (*) Supplying MAP_FIXED or a requesting a particular mapping address will result in an error. diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 146a434ba944..987c773dbb20 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -28,6 +28,7 @@ do { \ (vmi)->largest_chunk = 0; \ } while(0) +extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); #endif extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index cff10ab1af63..d7dbdf9e0f49 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -33,19 +33,15 @@ #include "internal.h" /* - * display a list of all the VMAs the kernel knows about - * - nommu kernals have a single flat list + * display a single VMA to a sequenced file */ -static int nommu_vma_list_show(struct seq_file *m, void *v) +int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) { - struct vm_area_struct *vma; unsigned long ino = 0; struct file *file; dev_t dev = 0; int flags, len; - vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); - flags = vma->vm_flags; file = vma->vm_file; @@ -78,6 +74,18 @@ static int nommu_vma_list_show(struct seq_file *m, void *v) return 0; } +/* + * display a list of all the VMAs the kernel knows about + * - nommu kernals have a single flat list + */ +static int nommu_vma_list_show(struct seq_file *m, void *v) +{ + struct vm_area_struct *vma; + + vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); + return nommu_vma_show(m, vma); +} + static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) { struct rb_node *_rb; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 4616ed50ffcd..091aa8e48e02 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -138,25 +138,63 @@ out: } /* - * Albert D. Cahalan suggested to fake entries for the traditional - * sections here. This might be worth investigating. + * display mapping lines for a particular process's /proc/pid/maps */ -static int show_map(struct seq_file *m, void *v) +static int show_map(struct seq_file *m, void *_vml) { - return 0; + struct vm_list_struct *vml = _vml; + return nommu_vma_show(m, vml->vma); } + static void *m_start(struct seq_file *m, loff_t *pos) { + struct proc_maps_private *priv = m->private; + struct vm_list_struct *vml; + struct mm_struct *mm; + loff_t n = *pos; + + /* pin the task and mm whilst we play with them */ + priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + if (!priv->task) + return NULL; + + mm = get_task_mm(priv->task); + if (!mm) { + put_task_struct(priv->task); + priv->task = NULL; + return NULL; + } + + down_read(&mm->mmap_sem); + + /* start from the Nth VMA */ + for (vml = mm->context.vmlist; vml; vml = vml->next) + if (n-- == 0) + return vml; return NULL; } -static void m_stop(struct seq_file *m, void *v) + +static void m_stop(struct seq_file *m, void *_vml) { + struct proc_maps_private *priv = m->private; + + if (priv->task) { + struct mm_struct *mm = priv->task->mm; + up_read(&mm->mmap_sem); + mmput(mm); + put_task_struct(priv->task); + } } -static void *m_next(struct seq_file *m, void *v, loff_t *pos) + +static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) { - return NULL; + struct vm_list_struct *vml = _vml; + + (*pos)++; + return vml ? vml->next : NULL; } -static struct seq_operations proc_pid_maps_op = { + +static struct seq_operations proc_pid_maps_ops = { .start = m_start, .next = m_next, .stop = m_stop, @@ -165,11 +203,19 @@ static struct seq_operations proc_pid_maps_op = { static int maps_open(struct inode *inode, struct file *file) { - int ret; - ret = seq_open(file, &proc_pid_maps_op); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = NULL; + struct proc_maps_private *priv; + int ret = -ENOMEM; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->pid = proc_pid(inode); + ret = seq_open(file, &proc_pid_maps_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } } return ret; } @@ -178,6 +224,6 @@ struct file_operations proc_maps_operations = { .open = maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3435ca38dd14..57f70bc8b24b 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -268,7 +268,9 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode) struct proc_maps_private { struct pid *pid; struct task_struct *task; +#ifdef CONFIG_MMU struct vm_area_struct *tail_vma; +#endif }; #endif /* _LINUX_PROC_FS_H */ -- cgit v1.2.3-71-gd317 From f269fdd1829acc5e53bf57b145003e5733133f2b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Sep 2006 01:50:23 -0700 Subject: [PATCH] NOMMU: move the fallback arch_vma_name() to a sensible place Move the fallback arch_vma_name() to a sensible place (kernel/signal.c). Currently it's in fs/proc/task_mmu.c, a file that is dependent on both CONFIG_PROC_FS and CONFIG_MMU being enabled, but it's used from kernel/signal.c from where it is called unconditionally. [akpm@osdl.org: build fix] Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 5 ----- include/linux/mm.h | 2 +- kernel/signal.c | 5 +++++ 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 0a163a4f7764..6b769afac55a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -122,11 +122,6 @@ struct mem_size_stats unsigned long private_dirty; }; -__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct proc_maps_private *priv = m->private; diff --git a/include/linux/mm.h b/include/linux/mm.h index 22165cb18906..7b703b6d4358 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1131,7 +1131,7 @@ void drop_slab(void); extern int randomize_va_space; #endif -const char *arch_vma_name(struct vm_area_struct *vma); +__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/kernel/signal.c b/kernel/signal.c index bfdb5686fa3e..05853a7337e3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2577,6 +2577,11 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) } #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ +__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + void __init signals_init(void) { sigqueue_cachep = -- cgit v1.2.3-71-gd317 From 7e96287ddc4f42081e18248b6167041c0908004c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 27 Sep 2006 01:50:44 -0700 Subject: [PATCH] kdump: introduce "reset_devices" command line option Resetting the devices during driver initialization can be a costly operation in terms of time (especially scsi devices). This option can be used by drivers to know that user forcibly wants the devices to be reset during initialization. This option can be useful while kernel is booting in unreliable environment. For ex. during kdump boot where devices are in unknown random state and BIOS execution has been skipped. Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ include/linux/init.h | 1 + init/main.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 255ec535bba8..54983246930d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1370,6 +1370,9 @@ running once the system is up. Reserves a hole at the top of the kernel virtual address space. + reset_devices [KNL] Force drivers to reset the underlying device + during initialization. + resume= [SWSUSP] Specify the partition device for software suspend diff --git a/include/linux/init.h b/include/linux/init.h index 6667785dd1ff..e92b1455d7af 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -68,6 +68,7 @@ extern initcall_t __security_initcall_start[], __security_initcall_end[]; /* Defined in init/main.c */ extern char saved_command_line[]; +extern unsigned int reset_devices; /* used by init/main.c */ extern void setup_arch(char **); diff --git a/init/main.c b/init/main.c index 913e48d658ee..0766e69712b2 100644 --- a/init/main.c +++ b/init/main.c @@ -127,6 +127,18 @@ static char *ramdisk_execute_command; /* Setup configured maximum number of CPUs to activate */ static unsigned int max_cpus = NR_CPUS; +/* + * If set, this is an indication to the drivers that reset the underlying + * device before going ahead with the initialization otherwise driver might + * rely on the BIOS and skip the reset operation. + * + * This is useful if kernel is booting in an unreliable environment. + * For ex. kdump situaiton where previous kernel has crashed, BIOS has been + * skipped and devices will be in unknown state. + */ +unsigned int reset_devices; +EXPORT_SYMBOL(reset_devices); + /* * Setup routine for controlling SMP activation * @@ -153,6 +165,14 @@ static int __init maxcpus(char *str) __setup("maxcpus=", maxcpus); +static int __init set_reset_devices(char *str) +{ + reset_devices = 1; + return 1; +} + +__setup("reset_devices", set_reset_devices); + static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -- cgit v1.2.3-71-gd317 From 8e18e2941c53416aa219708e7dcad21fb4bd6794 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:46 -0700 Subject: [PATCH] inode_diet: Replace inode.u.generic_ip with inode.i_private The following patches reduce the size of the VFS inode structure by 28 bytes on a UP x86. (It would be more on an x86_64 system). This is a 10% reduction in the inode size on a UP kernel that is configured in a production mode (i.e., with no spinlock or other debugging functions enabled; if you want to save memory taken up by in-core inodes, the first thing you should do is disable the debugging options; they are responsible for a huge amount of bloat in the VFS inode structure). This patch: The filesystem or device-specific pointer in the inode is inside a union, which is pretty pointless given that all 30+ users of this field have been using the void pointer. Get rid of the union and rename it to i_private, with a comment to explain who is allowed to use the void pointer. This is just a cleanup, but it allows us to reuse the union 'u' for something something where the union will actually be used. [judith@osdl.org: powerpc build fix] Signed-off-by: "Theodore Ts'o" Signed-off-by: Judith Lebzelter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +- arch/s390/hypfs/inode.c | 6 ++--- arch/s390/kernel/debug.c | 2 +- block/blktrace.c | 2 +- drivers/i2c/chips/tps65010.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 12 ++++----- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 4 +-- drivers/misc/ibmasm/ibmasmfs.c | 16 ++++++------ drivers/net/irda/vlsi_ir.h | 2 +- drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c | 2 +- drivers/oprofile/oprofilefs.c | 10 ++++---- drivers/pci/hotplug/cpqphp_sysfs.c | 2 +- drivers/usb/core/devio.c | 2 +- drivers/usb/core/inode.c | 6 ++--- drivers/usb/gadget/inode.c | 6 ++--- drivers/usb/host/isp116x-hcd.c | 2 +- drivers/usb/host/uhci-debug.c | 2 +- drivers/usb/mon/mon_stat.c | 2 +- drivers/usb/mon/mon_text.c | 4 +-- fs/autofs/inode.c | 2 +- fs/autofs/symlink.c | 2 +- fs/binfmt_misc.c | 8 +++--- fs/debugfs/file.c | 4 +-- fs/debugfs/inode.c | 4 +-- fs/devpts/inode.c | 4 +-- fs/freevxfs/vxfs.h | 2 +- fs/freevxfs/vxfs_inode.c | 4 +-- fs/fuse/control.c | 6 ++--- fs/inode.c | 2 +- fs/jffs/inode-v23.c | 34 +++++++++++++------------- fs/libfs.c | 2 +- fs/ocfs2/dlmglue.c | 2 +- include/linux/fs.h | 4 +-- kernel/relay.c | 2 +- security/inode.c | 8 +++--- 36 files changed, 88 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7b4572805db9..b837f12a84ae 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -120,7 +120,7 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry, ret = 0; inode->i_op = &spufs_file_iops; inode->i_fop = fops; - inode->u.generic_ip = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); + inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); d_add(dentry, inode); out: return ret; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 641e6511cf06..446e17d162a5 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -85,7 +85,7 @@ static int hcall_inst_seq_open(struct inode *inode, struct file *file) rc = seq_open(file, &hcall_inst_seq_ops); seq = file->private_data; - seq->private = file->f_dentry->d_inode->u.generic_ip; + seq->private = file->f_dentry->d_inode->i_private; return rc; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index bdade5f2e325..8735024d235b 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -104,13 +104,13 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) static void hypfs_drop_inode(struct inode *inode) { - kfree(inode->u.generic_ip); + kfree(inode->i_private); generic_delete_inode(inode); } static int hypfs_open(struct inode *inode, struct file *filp) { - char *data = filp->f_dentry->d_inode->u.generic_ip; + char *data = filp->f_dentry->d_inode->i_private; struct hypfs_sb_info *fs_info; if (filp->f_mode & FMODE_WRITE) { @@ -352,7 +352,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb, parent->d_inode->i_nlink++; } else BUG(); - inode->u.generic_ip = data; + inode->i_private = data; d_instantiate(dentry, inode); dget(dentry); return dentry; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 7ba20922a535..43f3d0c7e132 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -603,7 +603,7 @@ debug_open(struct inode *inode, struct file *file) debug_info_t *debug_info, *debug_info_snapshot; down(&debug_lock); - debug_info = (struct debug_info*)file->f_dentry->d_inode->u.generic_ip; + debug_info = file->f_dentry->d_inode->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) diff --git a/block/blktrace.c b/block/blktrace.c index 265f7a830619..2b4ef2b89b8d 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -217,7 +217,7 @@ static int blk_trace_remove(request_queue_t *q) static int blk_dropped_open(struct inode *inode, struct file *filp) { - filp->private_data = inode->u.generic_ip; + filp->private_data = inode->i_private; return 0; } diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 0be6fd6a267d..6a7578217177 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -305,7 +305,7 @@ static int dbg_show(struct seq_file *s, void *_) static int dbg_tps_open(struct inode *inode, struct file *file) { - return single_open(file, dbg_show, inode->u.generic_ip); + return single_open(file, dbg_show, inode->i_private); } static struct file_operations debug_fops = { diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a5eb30a06a5c..055cdd089b28 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -64,7 +64,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { inode->i_op = &simple_dir_inode_operations; inode->i_nlink++; @@ -119,7 +119,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, u16 i; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; for (i = 0; i < NUM_COUNTERS; i++) counters[i] = ipath_snap_cntr(dd, i); @@ -139,7 +139,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, struct ipath_devdata *dd; u64 guid; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; guid = be64_to_cpu(dd->ipath_guid); @@ -178,7 +178,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, u32 tmp, tmp2; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; /* so we only initialize non-zero fields. */ memset(portinfo, 0, sizeof portinfo); @@ -325,7 +325,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -381,7 +381,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 5dde380e8dbe..f1cb83688b31 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -141,7 +141,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } @@ -247,7 +247,7 @@ static int ipoib_path_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 4a35caff5d02..0e909b617226 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -175,7 +175,7 @@ static struct dentry *ibmasmfs_create_file (struct super_block *sb, } inode->i_fop = fops; - inode->u.generic_ip = data; + inode->i_private = data; d_add(dentry, inode); return dentry; @@ -244,7 +244,7 @@ static int command_file_open(struct inode *inode, struct file *file) { struct ibmasmfs_command_data *command_data; - if (!inode->u.generic_ip) + if (!inode->i_private) return -ENODEV; command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL); @@ -252,7 +252,7 @@ static int command_file_open(struct inode *inode, struct file *file) return -ENOMEM; command_data->command = NULL; - command_data->sp = inode->u.generic_ip; + command_data->sp = inode->i_private; file->private_data = command_data; return 0; } @@ -351,10 +351,10 @@ static int event_file_open(struct inode *inode, struct file *file) struct ibmasmfs_event_data *event_data; struct service_processor *sp; - if (!inode->u.generic_ip) + if (!inode->i_private) return -ENODEV; - sp = inode->u.generic_ip; + sp = inode->i_private; event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL); if (!event_data) @@ -439,14 +439,14 @@ static int r_heartbeat_file_open(struct inode *inode, struct file *file) { struct ibmasmfs_heartbeat_data *rhbeat; - if (!inode->u.generic_ip) + if (!inode->i_private) return -ENODEV; rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL); if (!rhbeat) return -ENOMEM; - rhbeat->sp = (struct service_processor *)inode->u.generic_ip; + rhbeat->sp = inode->i_private; rhbeat->active = 0; ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); file->private_data = rhbeat; @@ -508,7 +508,7 @@ static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, static int remote_settings_file_open(struct inode *inode, struct file *file) { - file->private_data = inode->u.generic_ip; + file->private_data = inode->i_private; return 0; } diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h index a82a4ba8de4f..c37f0bc4c7f9 100644 --- a/drivers/net/irda/vlsi_ir.h +++ b/drivers/net/irda/vlsi_ir.h @@ -58,7 +58,7 @@ typedef void irqreturn_t; /* PDE() introduced in 2.5.4 */ #ifdef CONFIG_PROC_FS -#define PDE(inode) ((inode)->u.generic_ip) +#define PDE(inode) ((inode)->i_private) #endif /* irda crc16 calculation exported in 2.5.42 */ diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c b/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c index 923275ea0789..b9df06a06ea9 100644 --- a/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c +++ b/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c @@ -54,7 +54,7 @@ static ssize_t write_file_dummy(struct file *file, const char __user *buf, static int open_file_generic(struct inode *inode, struct file *file) { - file->private_data = inode->u.generic_ip; + file->private_data = inode->i_private; return 0; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 71c2da277d6e..deb37354785b 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -110,8 +110,8 @@ static ssize_t ulong_write_file(struct file * file, char const __user * buf, siz static int default_open(struct inode * inode, struct file * filp) { - if (inode->u.generic_ip) - filp->private_data = inode->u.generic_ip; + if (inode->i_private) + filp->private_data = inode->i_private; return 0; } @@ -158,7 +158,7 @@ int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root, if (!d) return -EFAULT; - d->d_inode->u.generic_ip = val; + d->d_inode->i_private = val; return 0; } @@ -171,7 +171,7 @@ int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root, if (!d) return -EFAULT; - d->d_inode->u.generic_ip = val; + d->d_inode->i_private = val; return 0; } @@ -197,7 +197,7 @@ int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root, if (!d) return -EFAULT; - d->d_inode->u.generic_ip = val; + d->d_inode->i_private = val; return 0; } diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index 8b3da007e859..5bab666cd67e 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -140,7 +140,7 @@ struct ctrl_dbg { static int open(struct inode *inode, struct file *file) { - struct controller *ctrl = inode->u.generic_ip; + struct controller *ctrl = inode->i_private; struct ctrl_dbg *dbg; int retval = -ENOMEM; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 218621b9958e..32e03000420c 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -555,7 +555,7 @@ static int usbdev_open(struct inode *inode, struct file *file) if (imajor(inode) == USB_DEVICE_MAJOR) dev = usbdev_lookup_minor(iminor(inode)); if (!dev) - dev = inode->u.generic_ip; + dev = inode->i_private; if (!dev) { kfree(ps); goto out; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 3182c2224ba2..482f253085e5 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -402,8 +402,8 @@ static loff_t default_file_lseek (struct file *file, loff_t offset, int orig) static int default_open (struct inode *inode, struct file *file) { - if (inode->u.generic_ip) - file->private_data = inode->u.generic_ip; + if (inode->i_private) + file->private_data = inode->i_private; return 0; } @@ -509,7 +509,7 @@ static struct dentry *fs_create_file (const char *name, mode_t mode, } else { if (dentry->d_inode) { if (data) - dentry->d_inode->u.generic_ip = data; + dentry->d_inode->i_private = data; if (fops) dentry->d_inode->i_fop = fops; dentry->d_inode->i_uid = uid; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 3bdc5e3ba234..ffaa8c1afad8 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -844,7 +844,7 @@ fail1: static int ep_open (struct inode *inode, struct file *fd) { - struct ep_data *data = inode->u.generic_ip; + struct ep_data *data = inode->i_private; int value = -EBUSY; if (down_interruptible (&data->lock) != 0) @@ -1909,7 +1909,7 @@ fail: static int dev_open (struct inode *inode, struct file *fd) { - struct dev_data *dev = inode->u.generic_ip; + struct dev_data *dev = inode->i_private; int value = -EBUSY; if (dev->state == STATE_DEV_DISABLED) { @@ -1970,7 +1970,7 @@ gadgetfs_make_inode (struct super_block *sb, inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; inode->i_fop = fops; } return inode; diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 5147ed4a6662..8c6b38a0b5bb 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1204,7 +1204,7 @@ static int isp116x_show_dbg(struct seq_file *s, void *unused) static int isp116x_open_seq(struct inode *inode, struct file *file) { - return single_open(file, isp116x_show_dbg, inode->u.generic_ip); + return single_open(file, isp116x_show_dbg, inode->i_private); } static struct file_operations isp116x_debug_fops = { diff --git a/drivers/usb/host/uhci-debug.c b/drivers/usb/host/uhci-debug.c index dc286a48cafd..d1372cb27f33 100644 --- a/drivers/usb/host/uhci-debug.c +++ b/drivers/usb/host/uhci-debug.c @@ -428,7 +428,7 @@ struct uhci_debug { static int uhci_debug_open(struct inode *inode, struct file *file) { - struct uhci_hcd *uhci = inode->u.generic_ip; + struct uhci_hcd *uhci = inode->i_private; struct uhci_debug *up; int ret = -ENOMEM; unsigned long flags; diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c index 1fe01d994a79..86ad2b381c4b 100644 --- a/drivers/usb/mon/mon_stat.c +++ b/drivers/usb/mon/mon_stat.c @@ -28,7 +28,7 @@ static int mon_stat_open(struct inode *inode, struct file *file) if ((sp = kmalloc(sizeof(struct snap), GFP_KERNEL)) == NULL) return -ENOMEM; - mbus = inode->u.generic_ip; + mbus = inode->i_private; sp->slen = snprintf(sp->str, STAT_BUF_SIZE, "nreaders %d events %u text_lost %u\n", diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index f961a770cee2..2fd39b4fa166 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -238,7 +238,7 @@ static int mon_text_open(struct inode *inode, struct file *file) int rc; mutex_lock(&mon_lock); - mbus = inode->u.generic_ip; + mbus = inode->i_private; ubus = mbus->u_bus; rp = kzalloc(sizeof(struct mon_reader_text), GFP_KERNEL); @@ -401,7 +401,7 @@ static int mon_text_release(struct inode *inode, struct file *file) struct mon_event_text *ep; mutex_lock(&mon_lock); - mbus = inode->u.generic_ip; + mbus = inode->i_private; if (mbus->nreaders <= 0) { printk(KERN_ERR TAG ": consistency error on close\n"); diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index c81d6b8c2828..d39d2acd9b38 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -241,7 +241,7 @@ static void autofs_read_inode(struct inode *inode) inode->i_op = &autofs_symlink_inode_operations; sl = &sbi->symlink[n]; - inode->u.generic_ip = sl; + inode->i_private = sl; inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime; inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index 52e8772b066e..c74f2eb65775 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -15,7 +15,7 @@ /* Nothing to release.. */ static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd) { - char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; + char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data; nd_set_link(nd, s); return NULL; } diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 34ebbc191e46..6759b9839ce8 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -517,7 +517,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) static void bm_clear_inode(struct inode *inode) { - kfree(inode->u.generic_ip); + kfree(inode->i_private); } static void kill_node(Node *e) @@ -545,7 +545,7 @@ static void kill_node(Node *e) static ssize_t bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) { - Node *e = file->f_dentry->d_inode->u.generic_ip; + Node *e = file->f_dentry->d_inode->i_private; loff_t pos = *ppos; ssize_t res; char *page; @@ -579,7 +579,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct dentry *root; - Node *e = file->f_dentry->d_inode->u.generic_ip; + Node *e = file->f_dentry->d_inode->i_private; int res = parse_command(buffer, count); switch (res) { @@ -646,7 +646,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, } e->dentry = dget(dentry); - inode->u.generic_ip = e; + inode->i_private = e; inode->i_fop = &bm_entry_operations; d_instantiate(dentry, inode); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index e4b430552c88..bf3901ab1744 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -32,8 +32,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, static int default_open(struct inode *inode, struct file *file) { - if (inode->u.generic_ip) - file->private_data = inode->u.generic_ip; + if (inode->i_private) + file->private_data = inode->i_private; return 0; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 3ca268d2e5a2..717f4821ed02 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -168,7 +168,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, * directory dentry if set. If this paramater is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later - * on. The inode.u.generic_ip pointer will point to this value on + * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. @@ -209,7 +209,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode, if (dentry->d_inode) { if (data) - dentry->d_inode->u.generic_ip = data; + dentry->d_inode->i_private = data; if (fops) dentry->d_inode->i_fop = fops; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f7aef5bb584a..5bf06a10dddf 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -177,7 +177,7 @@ int devpts_pty_new(struct tty_struct *tty) inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); - inode->u.generic_ip = tty; + inode->i_private = tty; dentry = get_node(number); if (!IS_ERR(dentry) && !dentry->d_inode) @@ -196,7 +196,7 @@ struct tty_struct *devpts_get_tty(int number) tty = NULL; if (!IS_ERR(dentry)) { if (dentry->d_inode) - tty = dentry->d_inode->u.generic_ip; + tty = dentry->d_inode->i_private; dput(dentry); } diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h index d35979a58743..c8a92652612a 100644 --- a/fs/freevxfs/vxfs.h +++ b/fs/freevxfs/vxfs.h @@ -252,7 +252,7 @@ enum { * Get filesystem private data from VFS inode. */ #define VXFS_INO(ip) \ - ((struct vxfs_inode_info *)(ip)->u.generic_ip) + ((struct vxfs_inode_info *)(ip)->i_private) /* * Get filesystem private data from VFS superblock. diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ca6a39714771..32a82ed108e4 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -243,7 +243,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) ip->i_blocks = vip->vii_blocks; ip->i_generation = vip->vii_gen; - ip->u.generic_ip = (void *)vip; + ip->i_private = vip; } @@ -338,5 +338,5 @@ vxfs_read_inode(struct inode *ip) void vxfs_clear_inode(struct inode *ip) { - kmem_cache_free(vxfs_inode_cachep, ip->u.generic_ip); + kmem_cache_free(vxfs_inode_cachep, ip->i_private); } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 46fe60b2da23..79ec1f23d4d2 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file) { struct fuse_conn *fc; mutex_lock(&fuse_mutex); - fc = file->f_dentry->d_inode->u.generic_ip; + fc = file->f_dentry->d_inode->i_private; if (fc) fc = fuse_conn_get(fc); mutex_unlock(&fuse_mutex); @@ -98,7 +98,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, inode->i_op = iop; inode->i_fop = fop; inode->i_nlink = nlink; - inode->u.generic_ip = fc; + inode->i_private = fc; d_add(dentry, inode); return dentry; } @@ -150,7 +150,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) for (i = fc->ctl_ndents - 1; i >= 0; i--) { struct dentry *dentry = fc->ctl_dentry[i]; - dentry->d_inode->u.generic_ip = NULL; + dentry->d_inode->i_private = NULL; d_drop(dentry); dput(dentry); } diff --git a/fs/inode.c b/fs/inode.c index 0bf9f0444a96..77e254792025 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -163,7 +163,7 @@ static struct inode *alloc_inode(struct super_block *sb) bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; mapping->backing_dev_info = bdi; } - memset(&inode->u, 0, sizeof(inode->u)); + inode->i_private = 0; inode->i_mapping = mapping; } return inode; diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index b59553d28d13..7358ef87f16b 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -369,7 +369,7 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode, f = jffs_find_file(c, raw_inode->ino); - inode->u.generic_ip = (void *)f; + inode->i_private = (void *)f; insert_inode_hash(inode); return inode; @@ -442,7 +442,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry, }); result = -ENOTDIR; - if (!(old_dir_f = (struct jffs_file *)old_dir->u.generic_ip)) { + if (!(old_dir_f = old_dir->i_private)) { D(printk("jffs_rename(): Old dir invalid.\n")); goto jffs_rename_end; } @@ -456,7 +456,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Find the new directory. */ result = -ENOTDIR; - if (!(new_dir_f = (struct jffs_file *)new_dir->u.generic_ip)) { + if (!(new_dir_f = new_dir->i_private)) { D(printk("jffs_rename(): New dir invalid.\n")); goto jffs_rename_end; } @@ -593,7 +593,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir) } else { ddino = ((struct jffs_file *) - inode->u.generic_ip)->pino; + inode->i_private)->pino; } D3(printk("jffs_readdir(): \"..\" %u\n", ddino)); if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) { @@ -604,7 +604,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir) } filp->f_pos++; } - f = ((struct jffs_file *)inode->u.generic_ip)->children; + f = ((struct jffs_file *)inode->i_private)->children; j = 2; while(f && (f->deleted || j++ < filp->f_pos )) { @@ -668,7 +668,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) } r = -EACCES; - if (!(d = (struct jffs_file *)dir->u.generic_ip)) { + if (!(d = (struct jffs_file *)dir->i_private)) { D(printk("jffs_lookup(): No such inode! (%lu)\n", dir->i_ino)); goto jffs_lookup_end; @@ -739,7 +739,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page) unsigned long read_len; int result; struct inode *inode = (struct inode*)page->mapping->host; - struct jffs_file *f = (struct jffs_file *)inode->u.generic_ip; + struct jffs_file *f = (struct jffs_file *)inode->i_private; struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info; int r; loff_t offset; @@ -828,7 +828,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode) }); lock_kernel(); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; ASSERT(if (!dir_f) { printk(KERN_ERR "jffs_mkdir(): No reference to a " @@ -972,7 +972,7 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type) kfree(_name); }); - dir_f = (struct jffs_file *) dir->u.generic_ip; + dir_f = dir->i_private; c = dir_f->c; result = -ENOENT; @@ -1082,7 +1082,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) if (!old_valid_dev(rdev)) return -EINVAL; lock_kernel(); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; c = dir_f->c; D3(printk (KERN_NOTICE "mknod(): down biglock\n")); @@ -1186,7 +1186,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) kfree(_symname); }); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; ASSERT(if (!dir_f) { printk(KERN_ERR "jffs_symlink(): No reference to a " "jffs_file struct in inode.\n"); @@ -1289,7 +1289,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode, kfree(s); }); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; ASSERT(if (!dir_f) { printk(KERN_ERR "jffs_create(): No reference to a " "jffs_file struct in inode.\n"); @@ -1403,9 +1403,9 @@ jffs_file_write(struct file *filp, const char *buf, size_t count, goto out_isem; } - if (!(f = (struct jffs_file *)inode->u.generic_ip)) { - D(printk("jffs_file_write(): inode->u.generic_ip = 0x%p\n", - inode->u.generic_ip)); + if (!(f = inode->i_private)) { + D(printk("jffs_file_write(): inode->i_private = 0x%p\n", + inode->i_private)); goto out_isem; } @@ -1693,7 +1693,7 @@ jffs_read_inode(struct inode *inode) mutex_unlock(&c->fmc->biglock); return; } - inode->u.generic_ip = (void *)f; + inode->i_private = f; inode->i_mode = f->mode; inode->i_nlink = f->nlink; inode->i_uid = f->uid; @@ -1748,7 +1748,7 @@ jffs_delete_inode(struct inode *inode) lock_kernel(); inode->i_size = 0; inode->i_blocks = 0; - inode->u.generic_ip = NULL; + inode->i_private = NULL; clear_inode(inode); if (inode->i_nlink == 0) { c = (struct jffs_control *) inode->i_sb->s_fs_info; diff --git a/fs/libfs.c b/fs/libfs.c index ac02ea602c3d..2751793beeaa 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -547,7 +547,7 @@ int simple_attr_open(struct inode *inode, struct file *file, attr->get = get; attr->set = set; - attr->data = inode->u.generic_ip; + attr->data = inode->i_private; attr->fmt = fmt; mutex_init(&attr->mutex); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index de887063dcfc..8801e41afe80 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2052,7 +2052,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) mlog_errno(ret); goto out; } - osb = (struct ocfs2_super *) inode->u.generic_ip; + osb = inode->i_private; ocfs2_get_dlm_debug(osb->osb_dlm_debug); priv->p_dlm_debug = osb->osb_dlm_debug; INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1d3e601ece73..4f77ec9c3353 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -554,9 +554,7 @@ struct inode { atomic_t i_writecount; void *i_security; - union { - void *generic_ip; - } u; + void *i_private; /* fs or device private pointer */ #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif diff --git a/kernel/relay.c b/kernel/relay.c index 33345e73485c..85786ff2a4f9 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -669,7 +669,7 @@ EXPORT_SYMBOL_GPL(relay_flush); */ static int relay_file_open(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = inode->u.generic_ip; + struct rchan_buf *buf = inode->i_private; kref_get(&buf->kref); filp->private_data = buf; diff --git a/security/inode.c b/security/inode.c index 47eb63480dac..176aacea8ca4 100644 --- a/security/inode.c +++ b/security/inode.c @@ -44,8 +44,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, static int default_open(struct inode *inode, struct file *file) { - if (inode->u.generic_ip) - file->private_data = inode->u.generic_ip; + if (inode->i_private) + file->private_data = inode->i_private; return 0; } @@ -194,7 +194,7 @@ static int create_by_name(const char *name, mode_t mode, * directory dentry if set. If this paramater is NULL, then the * file will be created in the root of the securityfs filesystem. * @data: a pointer to something that the caller will want to get to later - * on. The inode.u.generic_ip pointer will point to this value on + * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. @@ -240,7 +240,7 @@ struct dentry *securityfs_create_file(const char *name, mode_t mode, if (fops) dentry->d_inode->i_fop = fops; if (data) - dentry->d_inode->u.generic_ip = data; + dentry->d_inode->i_private = data; } exit: return dentry; -- cgit v1.2.3-71-gd317 From 4c1541680f8d189d21dd07b053bc12996574646e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:47 -0700 Subject: [PATCH] inode-diet: Move i_pipe into a union Move the i_pipe pointer into a union that will be shared with i_bdev and i_cdev. Signed-off-by: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f77ec9c3353..ca695fc8d69c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -528,9 +528,10 @@ struct inode { #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; #endif - /* These three should probably be a union */ struct list_head i_devices; - struct pipe_inode_info *i_pipe; + union { + struct pipe_inode_info *i_pipe; + }; struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; -- cgit v1.2.3-71-gd317 From eaf796e7ef6014f208c409b2b14fddcfaafe7e3a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:48 -0700 Subject: [PATCH] inode-diet: Move i_bdev into a union Move the i_bdev pointer in struct inode into a union. Signed-off-by: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 77e254792025..41b462232f7f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -254,7 +254,7 @@ void clear_inode(struct inode *inode) DQUOT_DROP(inode); if (inode->i_sb && inode->i_sb->s_op->clear_inode) inode->i_sb->s_op->clear_inode(inode); - if (inode->i_bdev) + if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); if (inode->i_cdev) cd_forget(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index ca695fc8d69c..98ff684a5b1c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -531,8 +531,8 @@ struct inode { struct list_head i_devices; union { struct pipe_inode_info *i_pipe; + struct block_device *i_bdev; }; - struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; -- cgit v1.2.3-71-gd317 From 577c4eb09d1034d0739e3135fd2cff50588024be Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:49 -0700 Subject: [PATCH] inode-diet: Move i_cdev into a union Move the i_cdev pointer in struct inode into a union. Signed-off-by: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/file_table.c | 2 +- fs/inode.c | 2 +- include/linux/fs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/file_table.c b/fs/file_table.c index 0131ba06e1ee..bc35a40417d7 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -169,7 +169,7 @@ void fastcall __fput(struct file *file) if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); - if (unlikely(inode->i_cdev != NULL)) + if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) cdev_put(inode->i_cdev); fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) diff --git a/fs/inode.c b/fs/inode.c index 41b462232f7f..f5c04dd9ae8a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -256,7 +256,7 @@ void clear_inode(struct inode *inode) inode->i_sb->s_op->clear_inode(inode); if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); - if (inode->i_cdev) + if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); inode->i_state = I_CLEAR; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 98ff684a5b1c..192e69bb55b5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -532,8 +532,8 @@ struct inode { union { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; + struct cdev *i_cdev; }; - struct cdev *i_cdev; int i_cindex; __u32 i_generation; -- cgit v1.2.3-71-gd317 From ba52de123d454b57369f291348266d86f4b35070 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:49 -0700 Subject: [PATCH] inode-diet: Eliminate i_blksize from the inode structure This eliminates the i_blksize field from struct inode. Filesystems that want to provide a per-inode st_blksize can do so by providing their own getattr routine instead of using the generic_fillattr() function. Note that some filesystems were providing pretty much random (and incorrect) values for i_blksize. [bunk@stusta.de: cleanup] [akpm@osdl.org: generic_fillattr() fix] Signed-off-by: "Theodore Ts'o" Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 1 - arch/s390/hypfs/inode.c | 1 - drivers/block/loop.c | 7 +++++-- drivers/infiniband/hw/ipath/ipath_fs.c | 1 - drivers/isdn/capi/capifs.c | 2 -- drivers/misc/ibmasm/ibmasmfs.c | 1 - drivers/oprofile/oprofilefs.c | 1 - drivers/usb/core/inode.c | 1 - drivers/usb/gadget/inode.c | 1 - fs/9p/vfs_inode.c | 4 +--- fs/adfs/inode.c | 1 - fs/afs/inode.c | 1 - fs/autofs/inode.c | 1 - fs/autofs4/inode.c | 1 - fs/befs/linuxvfs.c | 1 - fs/bfs/dir.c | 2 +- fs/bfs/inode.c | 1 - fs/binfmt_misc.c | 1 - fs/cifs/cifsfs.c | 1 - fs/cifs/readdir.c | 5 ++--- fs/coda/coda_linux.c | 2 -- fs/configfs/inode.c | 1 - fs/cramfs/inode.c | 1 - fs/debugfs/inode.c | 1 - fs/devpts/inode.c | 2 -- fs/eventpoll.c | 1 - fs/ext2/ialloc.c | 1 - fs/ext2/inode.c | 1 - fs/ext3/ialloc.c | 1 - fs/ext3/inode.c | 3 --- fs/fat/inode.c | 3 --- fs/freevxfs/vxfs_inode.c | 1 - fs/fuse/inode.c | 1 - fs/hfs/inode.c | 2 -- fs/hfsplus/inode.c | 2 -- fs/hostfs/hostfs_kern.c | 1 - fs/hpfs/inode.c | 1 - fs/hppfs/hppfs_kern.c | 1 - fs/hugetlbfs/inode.c | 1 - fs/isofs/inode.c | 3 +-- fs/jffs/inode-v23.c | 2 -- fs/jffs2/fs.c | 2 -- fs/jfs/jfs_extent.c | 2 +- fs/jfs/jfs_imap.c | 1 - fs/jfs/jfs_inode.c | 1 - fs/jfs/jfs_metapage.c | 2 +- fs/libfs.c | 2 -- fs/minix/bitmap.c | 2 +- fs/minix/inode.c | 4 ++-- fs/ncpfs/inode.c | 1 - fs/nfs/inode.c | 4 ---- fs/ntfs/inode.c | 4 ---- fs/ntfs/mft.c | 5 ----- fs/ocfs2/dlm/dlmfs.c | 2 -- fs/ocfs2/inode.c | 4 ---- fs/pipe.c | 1 - fs/qnx4/inode.c | 1 - fs/ramfs/inode.c | 1 - fs/reiserfs/inode.c | 4 ---- fs/smbfs/inode.c | 2 -- fs/smbfs/proc.c | 1 - fs/stat.c | 3 ++- fs/sysfs/inode.c | 1 - fs/sysv/ialloc.c | 2 +- fs/sysv/inode.c | 2 +- fs/udf/ialloc.c | 1 - fs/udf/inode.c | 2 -- fs/ufs/ialloc.c | 1 - fs/ufs/inode.c | 1 - fs/xfs/linux-2.6/xfs_super.c | 1 - fs/xfs/linux-2.6/xfs_vnode.c | 1 - include/linux/fs.h | 1 - include/linux/nfsd/nfsfh.h | 10 ++-------- include/linux/smb.h | 1 - ipc/mqueue.c | 1 - kernel/cpuset.c | 1 - mm/shmem.c | 1 - net/sunrpc/rpc_pipe.c | 1 - security/inode.c | 1 - security/selinux/selinuxfs.c | 1 - 80 files changed, 21 insertions(+), 125 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index b837f12a84ae..3950ddccb2c8 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -82,7 +82,6 @@ spufs_new_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; out: diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 8735024d235b..813fc21358f9 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -91,7 +91,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; - ret->i_blksize = PAGE_CACHE_SIZE; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7b3b94ddddcc..c774121684d7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -662,7 +662,8 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; - lo->lo_blocksize = mapping->host->i_blksize; + lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? + mapping->host->i_bdev->bd_block_size : PAGE_SIZE; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); complete(&p->wait); @@ -794,7 +795,9 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; - lo_blocksize = inode->i_blksize; + lo_blocksize = S_ISBLK(inode->i_mode) ? + inode->i_bdev->bd_block_size : PAGE_SIZE; + error = 0; } else { goto out_putf; diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 055cdd089b28..c8a8af0fe471 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -61,7 +61,6 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 9ea6bd0ddc35..2dd1b57b0ba4 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -104,7 +104,6 @@ capifs_fill_super(struct super_block *s, void *data, int silent) inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - inode->i_blksize = 1024; inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; @@ -149,7 +148,6 @@ void capifs_new_ncci(unsigned int number, dev_t device) if (!inode) return; inode->i_ino = number+2; - inode->i_blksize = 1024; inode->i_uid = config.setuid ? config.uid : current->fsuid; inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 0e909b617226..b99dc507de2e 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -147,7 +147,6 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) if (ret) { ret->i_mode = mode; ret->i_uid = ret->i_gid = 0; - ret->i_blksize = PAGE_CACHE_SIZE; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index deb37354785b..5756401fb15b 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -31,7 +31,6 @@ static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 482f253085e5..58b4b1012120 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -249,7 +249,6 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index ffaa8c1afad8..2a7162d89799 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1966,7 +1966,6 @@ gadgetfs_make_inode (struct super_block *sb, inode->i_mode = mode; inode->i_uid = default_uid; inode->i_gid = default_gid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index eae50c9d6dc4..7a7ec2d1d2f4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -204,7 +204,6 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = 0; inode->i_rdev = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -950,9 +949,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, inode->i_size = stat->length; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = - (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits; + (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } /** diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 534f3eecc985..7e7a04be1278 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -269,7 +269,6 @@ adfs_iget(struct super_block *sb, struct object_info *obj) inode->i_ino = obj->file_id; inode->i_size = obj->size; inode->i_nlink = 2; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4ebb30a50ed5..6f37754906c2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -72,7 +72,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode) inode->i_ctime.tv_sec = vnode->status.mtime_server; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_version = vnode->fid.unique; inode->i_mapping->a_ops = &afs_fs_aops; diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index d39d2acd9b38..2c9759baad61 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -216,7 +216,6 @@ static void autofs_read_inode(struct inode *inode) inode->i_nlink = 2; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - inode->i_blksize = 1024; if ( ino == AUTOFS_ROOT_INO ) { inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 11a6a9ae51b7..800ce876caec 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -447,7 +447,6 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_uid = 0; inode->i_gid = 0; } - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f6676fbe9484..57020c7a7e65 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -365,7 +365,6 @@ befs_read_inode(struct inode *inode) inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */ inode->i_ctime = inode->i_mtime; inode->i_atime = inode->i_mtime; - inode->i_blksize = befs_sb->block_size; befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num); befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent); diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 26fad9621738..dcf04cb13283 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -102,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_uid = current->fsuid; inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; inode->i_mapping->a_ops = &bfs_aops; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8fc2e8e49dbe..ed27ffb3459e 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -76,7 +76,6 @@ static void bfs_read_inode(struct inode * inode) inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); - inode->i_blksize = PAGE_SIZE; inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 6759b9839ce8..66ba137f8661 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -507,7 +507,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 4197a5043f13..22bcf4d7e7ae 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -253,7 +253,6 @@ cifs_alloc_inode(struct super_block *sb) file data or metadata */ cifs_inode->clientCanCacheRead = FALSE; cifs_inode->clientCanCacheAll = FALSE; - cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; INIT_LIST_HEAD(&cifs_inode->openFileList); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 9aeb58a7d369..b27b34537bf2 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -216,10 +216,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, if (allocation_size < end_of_file) cFYI(1, ("May be sparse file, allocation less than file size")); - cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld", + cFYI(1, ("File Size %ld and blocks %llu", (unsigned long)tmp_inode->i_size, - (unsigned long long)tmp_inode->i_blocks, - tmp_inode->i_blksize)); + (unsigned long long)tmp_inode->i_blocks)); if (S_ISREG(tmp_inode->i_mode)) { cFYI(1, ("File inode")); tmp_inode->i_op = &cifs_file_inode_ops; diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 5597080cb811..95a54253c047 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -110,8 +110,6 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) inode->i_nlink = attr->va_nlink; if (attr->va_size != -1) inode->i_size = attr->va_size; - if (attr->va_blocksize != -1) - inode->i_blksize = attr->va_blocksize; if (attr->va_size != -1) inode->i_blocks = (attr->va_size + 511) >> 9; if (attr->va_atime.tv_sec != -1) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5047e6a7581e..fb18917954a9 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -135,7 +135,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) { struct inode * inode = new_inode(configfs_sb); if (inode) { - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &configfs_aops; inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index d09b6777c41a..ad96b6990715 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -73,7 +73,6 @@ static int cramfs_iget5_set(struct inode *inode, void *opaque) inode->i_uid = cramfs_inode->uid; inode->i_size = cramfs_inode->size; inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_gid = cramfs_inode->gid; /* Struct copy intentional */ inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 717f4821ed02..269e649e6dc6 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -40,7 +40,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 5bf06a10dddf..5f7b5a6025bf 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -113,7 +113,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - inode->i_blksize = 1024; inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; @@ -172,7 +171,6 @@ int devpts_pty_new(struct tty_struct *tty) return -ENOMEM; inode->i_ino = number+2; - inode->i_blksize = 1024; inode->i_uid = config.setuid ? config.uid : current->fsuid; inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3a3567433b92..8d544334bcd2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1590,7 +1590,6 @@ static struct inode *ep_eventpoll_inode(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; return inode; eexit_1: diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 695f69ccf908..2cb545bf0f3c 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -574,7 +574,6 @@ got: inode->i_mode = mode; inode->i_ino = ino; - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index fb4d3220eb8d..dd4e14c221e0 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1094,7 +1094,6 @@ void ext2_read_inode (struct inode * inode) brelse (bh); goto bad_inode; } - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 1e4ded8aa3cd..e45dbd651736 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -559,7 +559,6 @@ got: inode->i_ino = ino; /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c9fc15f8b609..dcf4f1dd108b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2632,9 +2632,6 @@ void ext3_read_inode(struct inode * inode) * recovery code: that's fine, we're about to complete * the process of deleting those. */ } - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size - * (for stat), not the fs block - * size */ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); #ifdef EXT3_FRAGMENTS diff --git a/fs/fat/inode.c b/fs/fat/inode.c index e1035a590664..ab96ae823753 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -370,8 +370,6 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_flags |= S_IMMUTABLE; } MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; - /* this is as close to the truth as we can get ... */ - inode->i_blksize = sbi->cluster_size; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; inode->i_mtime.tv_sec = @@ -1131,7 +1129,6 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_start = 0; inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry); } - inode->i_blksize = sbi->cluster_size; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; MSDOS_I(inode)->i_logstart = 0; diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 32a82ed108e4..4786d51ad3bd 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -239,7 +239,6 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) ip->i_ctime.tv_nsec = 0; ip->i_mtime.tv_nsec = 0; - ip->i_blksize = PAGE_SIZE; ip->i_blocks = vip->vii_blocks; ip->i_generation = vip->vii_gen; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7d25092262ae..cb7cadb0b790 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -118,7 +118,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) inode->i_uid = attr->uid; inode->i_gid = attr->gid; i_size_write(inode, attr->size); - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = attr->blocks; inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 315cf44a90b2..d05641c35fc9 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -154,7 +154,6 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) inode->i_gid = current->fsgid; inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blksize = HFS_SB(sb)->alloc_blksz; HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; HFS_I(inode)->fs_blocks = 0; @@ -284,7 +283,6 @@ static int hfs_read_inode(struct inode *inode, void *data) inode->i_uid = hsb->s_uid; inode->i_gid = hsb->s_gid; inode->i_nlink = 1; - inode->i_blksize = HFS_SB(inode->i_sb)->alloc_blksz; if (idata->key) HFS_I(inode)->cat_key = *idata->key; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 924ecdef8091..0eb1a6092668 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -304,7 +304,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) inode->i_gid = current->fsgid; inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blksize = HFSPLUS_SB(sb).alloc_blksz; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); init_MUTEX(&HFSPLUS_I(inode).extents_lock); atomic_set(&HFSPLUS_I(inode).opencnt, 0); @@ -407,7 +406,6 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); HFSPLUS_I(inode).dev = 0; - inode->i_blksize = HFSPLUS_SB(inode->i_sb).alloc_blksz; if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index b82e3d9c8790..322e876c35ed 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -156,7 +156,6 @@ static int read_name(struct inode *ino, char *name) ino->i_mode = i_mode; ino->i_nlink = i_nlink; ino->i_size = i_size; - ino->i_blksize = i_blksize; ino->i_blocks = i_blocks; return(0); } diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 56f2c338c4d9..bcf6ee36e065 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i) i->i_gid = hpfs_sb(sb)->sb_gid; i->i_mode = hpfs_sb(sb)->sb_mode; hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv; - i->i_blksize = 512; i->i_size = -1; i->i_blocks = -1; diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 3a9bdf58166f..dcb6d2e988b8 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -152,7 +152,6 @@ static void hppfs_read_inode(struct inode *ino) ino->i_mode = proc_ino->i_mode; ino->i_nlink = proc_ino->i_nlink; ino->i_size = proc_ino->i_size; - ino->i_blksize = proc_ino->i_blksize; ino->i_blocks = proc_ino->i_blocks; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c3920c96dadf..e025a31b4c64 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -357,7 +357,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; - inode->i_blksize = HPAGE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 10e47897bac7..4527692f432b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1235,7 +1235,7 @@ static void isofs_read_inode(struct inode *inode) } inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; ei->i_format_parm[0] = 0; ei->i_format_parm[1] = 0; @@ -1291,7 +1291,6 @@ static void isofs_read_inode(struct inode *inode) isonum_711 (de->ext_attr_length)); /* Set the number of blocks for stat() - should be done before RR */ - inode->i_blksize = PAGE_CACHE_SIZE; /* For stat() only */ inode->i_blocks = (inode->i_size + 511) >> 9; /* diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 7358ef87f16b..f5cf9c93e243 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -364,7 +364,6 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode, inode->i_ctime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + 511) >> 9; f = jffs_find_file(c, raw_inode->ino); @@ -1706,7 +1705,6 @@ jffs_read_inode(struct inode *inode) inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + 511) >> 9; if (S_ISREG(inode->i_mode)) { inode->i_op = &jffs_file_inode_operations; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 4780f82825d6..72d9909d95ff 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -263,7 +263,6 @@ void jffs2_read_inode (struct inode *inode) inode->i_nlink = f->inocache->nlink; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + 511) >> 9; switch (inode->i_mode & S_IFMT) { @@ -449,7 +448,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; inode->i_size = 0; diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 4d52593a5fc6..4c74f0944f7e 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -468,7 +468,7 @@ int extRecord(struct inode *ip, xad_t * xp) int extFill(struct inode *ip, xad_t * xp) { int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; - s64 blkno = offsetXAD(xp) >> ip->i_blksize; + s64 blkno = offsetXAD(xp) >> ip->i_blkbits; // assert(ISSPARSE(ip)); diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index ccbe60aff83d..369d7f39c040 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3115,7 +3115,6 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); - ip->i_blksize = ip->i_sb->s_blocksize; ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); ip->i_generation = le32_to_cpu(dip->di_gen); diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 495df402916d..bffaca9ae3a2 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -115,7 +115,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) } jfs_inode->mode2 |= mode; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; jfs_inode->otime = inode->i_ctime.tv_sec; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index e1e0a6e6ebdf..f5afc129d6b1 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -257,7 +257,7 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, int rc = 0; int xflag; s64 xaddr; - sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> + sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_blkbits; if (lblock >= file_blocks) diff --git a/fs/libfs.c b/fs/libfs.c index 2751793beeaa..8db5afb7b0a7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -383,7 +383,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files return -ENOMEM; inode->i_mode = S_IFDIR | 0755; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_op = &simple_dir_inode_operations; @@ -405,7 +404,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files goto out; inode->i_mode = S_IFREG | files->mode; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_fop = files->ops; diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 4a6abc49418e..df6b1075b549 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -254,7 +254,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; inode->i_ino = j; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u)); insert_inode_hash(inode); mark_inode_dirty(inode); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 826b9d830650..c11a4b9fb863 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -396,7 +396,7 @@ static void V1_minix_read_inode(struct inode * inode) inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; for (i = 0; i < 9; i++) minix_inode->u.i1_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); @@ -429,7 +429,7 @@ static void V2_minix_read_inode(struct inode * inode) inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; for (i = 0; i < 10; i++) minix_inode->u.i2_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 8244710e97dd..42e3bef270c9 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -223,7 +223,6 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo) inode->i_nlink = 1; inode->i_uid = server->m.uid; inode->i_gid = server->m.gid; - inode->i_blksize = NCP_BLOCK_SIZE; ncp_update_dates(inode, &nwinfo->i); ncp_update_inode(inode, nwinfo); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 931f52a19579..bc9376ca86cd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -277,10 +277,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; } else { inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; } nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; @@ -969,10 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; } else { inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; } if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 31852121b3f5..933dbd89c2a4 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -556,8 +556,6 @@ static int ntfs_read_locked_inode(struct inode *vi) /* Setup the generic vfs inode parts now. */ - /* This is the optimal IO size (for stat), not the fs block size. */ - vi->i_blksize = PAGE_CACHE_SIZE; /* * This is for checking whether an inode has changed w.r.t. a file so * that the file can be updated if necessary (compare with f_version). @@ -1234,7 +1232,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_blksize = base_vi->i_blksize; vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; @@ -1504,7 +1501,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ni = NTFS_I(vi); base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_blksize = base_vi->i_blksize; vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 578fb3d5e803..584260fd6848 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2637,11 +2637,6 @@ mft_rec_already_initialized: goto undo_mftbmp_alloc; } vi->i_ino = bit; - /* - * This is the optimal IO size (for stat), not the fs block - * size. - */ - vi->i_blksize = PAGE_CACHE_SIZE; /* * This is for checking whether an inode has changed w.r.t. a * file so that the file can be updated if necessary (compare diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 0ff0898a0b9c..0368c6402182 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -335,7 +335,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -362,7 +361,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent, inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 69d3db569166..16e8e74dc966 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -269,7 +269,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_mode = le16_to_cpu(fe->i_mode); inode->i_uid = le32_to_cpu(fe->i_uid); inode->i_gid = le32_to_cpu(fe->i_gid); - inode->i_blksize = (u32)osb->s_clustersize; /* Fast symlinks will have i_size but no allocated clusters. */ if (S_ISLNK(inode->i_mode) && !fe->i_clusters) @@ -1258,8 +1257,6 @@ leave: void ocfs2_refresh_inode(struct inode *inode, struct ocfs2_dinode *fe) { - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); @@ -1270,7 +1267,6 @@ void ocfs2_refresh_inode(struct inode *inode, inode->i_uid = le32_to_cpu(fe->i_uid); inode->i_gid = le32_to_cpu(fe->i_gid); inode->i_mode = le16_to_cpu(fe->i_mode); - inode->i_blksize = (u32) osb->s_clustersize; if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) inode->i_blocks = 0; else diff --git a/fs/pipe.c b/fs/pipe.c index 20352573e025..f3b6f71e9d0b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -879,7 +879,6 @@ static struct inode * get_pipe_inode(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; return inode; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fddbd61c68d0..5a41db2a218d 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -496,7 +496,6 @@ static void qnx4_read_inode(struct inode *inode) inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime); inode->i_ctime.tv_nsec = 0; inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size); - inode->i_blksize = QNX4_DIR_ENTRY_SIZE; memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); if (S_ISREG(inode->i_mode)) { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index b9677335cc8d..bc0e51662424 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -58,7 +58,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 52f1e2136546..8810fda0da46 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -17,8 +17,6 @@ #include #include -extern int reiserfs_default_io_size; /* default io size devuned in super.c */ - static int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); static int reiserfs_prepare_write(struct file *f, struct page *page, @@ -1122,7 +1120,6 @@ static void init_inode(struct inode *inode, struct path *path) ih = PATH_PITEM_HEAD(path); copy_key(INODE_PKEY(inode), &(ih->ih_key)); - inode->i_blksize = reiserfs_default_io_size; INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); REISERFS_I(inode)->i_flags = 0; @@ -1877,7 +1874,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, } // these do not go to on-disk stat data inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - inode->i_blksize = reiserfs_default_io_size; // store in in-core inode the key of stat data and version all // object items will have (directory items will have old offset diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 92cf60aa6121..2c122ee83adb 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -166,7 +166,6 @@ smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr) fattr->f_mtime = inode->i_mtime; fattr->f_ctime = inode->i_ctime; fattr->f_atime = inode->i_atime; - fattr->f_blksize= inode->i_blksize; fattr->f_blocks = inode->i_blocks; fattr->attr = SMB_I(inode)->attr; @@ -200,7 +199,6 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr) inode->i_uid = fattr->f_uid; inode->i_gid = fattr->f_gid; inode->i_ctime = fattr->f_ctime; - inode->i_blksize= fattr->f_blksize; inode->i_blocks = fattr->f_blocks; inode->i_size = fattr->f_size; inode->i_mtime = fattr->f_mtime; diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index c3495059889d..40e174db9872 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -1826,7 +1826,6 @@ smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr) fattr->f_nlink = 1; fattr->f_uid = server->mnt->uid; fattr->f_gid = server->mnt->gid; - fattr->f_blksize = SMB_ST_BLKSIZE; fattr->f_unix = 0; } diff --git a/fs/stat.c b/fs/stat.c index 3a44dcf97da2..60a31d5e5966 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->ctime = inode->i_ctime; stat->size = i_size_read(inode); stat->blocks = inode->i_blocks; - stat->blksize = inode->i_blksize; + stat->blksize = (1 << inode->i_blkbits); } EXPORT_SYMBOL(generic_fillattr); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index fd7cd5f843d2..e79e38d52c00 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -125,7 +125,6 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) { struct inode * inode = new_inode(sysfs_sb); if (inode) { - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 9b585d1081c0..115ab0d6f4bc 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -170,7 +170,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) inode->i_uid = current->fsuid; inode->i_ino = fs16_to_cpu(sbi, ino); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data)); SYSV_I(inode)->i_dir_start_lookup = 0; insert_inode_hash(inode); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 58b2d22142ba..d63c5e48b050 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -201,7 +201,7 @@ static void sysv_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; si = SYSV_I(inode); for (block = 0; block < 10+1+1+1; block++) diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index d1d38238ce65..8206983f2ebf 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -121,7 +121,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) UDF_I_LOCATION(inode).logicalBlockNum = block; UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0); - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; UDF_I_LENEATTR(inode) = 0; UDF_I_LENALLOC(inode) = 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 605f5111b6d8..b223b32db991 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -916,8 +916,6 @@ __udf_read_inode(struct inode *inode) * i_nlink = 1 * i_op = NULL; */ - inode->i_blksize = PAGE_SIZE; - bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident); if (!bh) diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 9501dcd3b213..2ad1259c6eca 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -255,7 +255,6 @@ cg_found: inode->i_gid = current->fsgid; inode->i_ino = cg * uspi->s_ipg + bit; - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; ufsi->i_flags = UFS_I(dir)->i_flags; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 30c6e8a9446c..ee1eaa6f4ec2 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -741,7 +741,6 @@ void ufs_read_inode(struct inode * inode) ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); } - inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/ inode->i_version++; ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4754f342a5d3..9df9ed37d219 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -171,7 +171,6 @@ xfs_revalidate_inode( break; } - inode->i_blksize = xfs_preferred_iosize(mp); inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_blocks = diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 6628d96b6fd6..553fa731ade5 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -122,7 +122,6 @@ vn_revalidate_core( inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; - inode->i_blksize = vap->va_blocksize; if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else diff --git a/include/linux/fs.h b/include/linux/fs.h index 192e69bb55b5..8f74dfbb2edd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -512,7 +512,6 @@ struct inode { struct timespec i_mtime; struct timespec i_ctime; unsigned int i_blkbits; - unsigned long i_blksize; unsigned long i_version; blkcnt_t i_blocks; unsigned short i_bytes; diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index f9edcd2ff3c8..31a3cb617ce0 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -269,14 +269,8 @@ fill_post_wcc(struct svc_fh *fhp) fhp->fh_post_uid = inode->i_uid; fhp->fh_post_gid = inode->i_gid; fhp->fh_post_size = inode->i_size; - if (inode->i_blksize) { - fhp->fh_post_blksize = inode->i_blksize; - fhp->fh_post_blocks = inode->i_blocks; - } else { - fhp->fh_post_blksize = BLOCK_SIZE; - /* how much do we care for accuracy with MinixFS? */ - fhp->fh_post_blocks = (inode->i_size+511) >> 9; - } + fhp->fh_post_blksize = BLOCK_SIZE; + fhp->fh_post_blocks = inode->i_blocks; fhp->fh_post_rdev[0] = htonl((u32)imajor(inode)); fhp->fh_post_rdev[1] = htonl((u32)iminor(inode)); fhp->fh_post_atime = inode->i_atime; diff --git a/include/linux/smb.h b/include/linux/smb.h index 6df3b1501559..f098dff93f6b 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -89,7 +89,6 @@ struct smb_fattr { struct timespec f_atime; struct timespec f_mtime; struct timespec f_ctime; - unsigned long f_blksize; unsigned long f_blocks; int f_unix; }; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index f8381f0660b2..840f8a6fb85f 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -115,7 +115,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index cff41511269f..1b32c2c04c15 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info; diff --git a/mm/shmem.c b/mm/shmem.c index 0a8e29cf87e0..eda907c3a86a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1351,7 +1351,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index f65bea74734d..700c6e061a04 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -488,7 +488,6 @@ rpc_get_inode(struct super_block *sb, int mode) return NULL; inode->i_mode = mode; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { diff --git a/security/inode.c b/security/inode.c index 176aacea8ca4..49ee51529396 100644 --- a/security/inode.c +++ b/security/inode.c @@ -64,7 +64,6 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 00534c302ba2..bab7b386cb8d 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -771,7 +771,6 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) if (ret) { ret->i_mode = mode; ret->i_uid = ret->i_gid = 0; - ret->i_blksize = PAGE_CACHE_SIZE; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } -- cgit v1.2.3-71-gd317 From ebba5f9fcb882306bef7175dee987342ec6fcf2f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Sep 2006 01:50:55 -0700 Subject: [PATCH] consistently use MAX_ERRNO in __syscall_return Consistently use MAX_ERRNO when checking for errors in __syscall_return(). [ralf@linux-mips.org: build fix] Signed-off-by: Randy Dunlap Signed-off-by: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-arm/unistd.h | 3 ++- include/asm-arm26/unistd.h | 3 ++- include/asm-frv/unistd.h | 3 ++- include/asm-h8300/unistd.h | 6 +++--- include/asm-i386/unistd.h | 5 +++-- include/asm-m32r/unistd.h | 5 +++-- include/asm-m68k/unistd.h | 5 +++-- include/asm-m68knommu/unistd.h | 5 +++-- include/asm-s390/unistd.h | 4 +++- include/asm-sh/unistd.h | 7 +++++-- include/asm-sh64/unistd.h | 6 ++++-- include/asm-v850/unistd.h | 5 +++-- include/asm-x86_64/unistd.h | 5 +++-- include/linux/err.h | 4 ++++ 14 files changed, 43 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 1e891f860ef3..2ab4078334bf 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -377,6 +377,7 @@ #endif #ifdef __KERNEL__ +#include #include #define __sys2(x) #x @@ -396,7 +397,7 @@ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-129)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ errno = -(res); \ res = -1; \ } \ diff --git a/include/asm-arm26/unistd.h b/include/asm-arm26/unistd.h index 70eb6d91cfd0..c6d2436c9d34 100644 --- a/include/asm-arm26/unistd.h +++ b/include/asm-arm26/unistd.h @@ -311,6 +311,7 @@ #define __ARM_NR_usr26 (__ARM_NR_BASE+3) #ifdef __KERNEL__ +#include #include #define __sys2(x) #x @@ -322,7 +323,7 @@ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + if ((unsigned long)(res) >= (unsigned long)-MAX_ERRNO) { \ errno = -(res); \ res = -1; \ } \ diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index b80dbd839475..d104d1b91d39 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -320,6 +320,7 @@ #ifdef __KERNEL__ #define NR_syscalls 310 +#include /* * process the return value of a syscall, consigning it to one of two possible fates @@ -329,7 +330,7 @@ #define __syscall_return(type, res) \ do { \ unsigned long __sr2 = (res); \ - if (__builtin_expect(__sr2 >= (unsigned long)(-4095), 0)) { \ + if (__builtin_expect(__sr2 >= (unsigned long)(-MAX_ERRNO), 0)) { \ errno = (-__sr2); \ __sr2 = ~0UL; \ } \ diff --git a/include/asm-h8300/unistd.h b/include/asm-h8300/unistd.h index 226dd596c2da..a2dd90462d80 100644 --- a/include/asm-h8300/unistd.h +++ b/include/asm-h8300/unistd.h @@ -295,14 +295,14 @@ #ifdef __KERNEL__ #define NR_syscalls 289 +#include - -/* user-visible error numbers are in the range -1 - -122: see +/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see */ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ /* avoid using res which is declared to be in register d0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 565d0897b205..bd9987087adc 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -328,14 +328,15 @@ #ifdef __KERNEL__ #define NR_syscalls 319 +#include /* - * user-visible error numbers are in the range -1 - -128: see + * user-visible error numbers are in the range -1 - -MAX_ERRNO: see * */ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-(128 + 1))) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ errno = -(res); \ res = -1; \ } \ diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h index 89f376e6229f..5c6a9ac6cf1a 100644 --- a/include/asm-m32r/unistd.h +++ b/include/asm-m32r/unistd.h @@ -296,8 +296,9 @@ #ifdef __KERNEL__ #define NR_syscalls 285 +#include -/* user-visible error numbers are in the range -1 - -124: see +/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see * */ @@ -305,7 +306,7 @@ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-(124 + 1))) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ /* Avoid using "res" which is declared to be in register r0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index 7c0b6296b45c..751632b904db 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -288,13 +288,14 @@ #ifdef __KERNEL__ #define NR_syscalls 282 +#include -/* user-visible error numbers are in the range -1 - -124: see +/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see */ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ /* avoid using res which is declared to be in register d0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h index 1b2abdf281e1..21fdc37c5c2c 100644 --- a/include/asm-m68knommu/unistd.h +++ b/include/asm-m68knommu/unistd.h @@ -289,13 +289,14 @@ #ifdef __KERNEL__ #define NR_syscalls 282 +#include -/* user-visible error numbers are in the range -1 - -122: see +/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see */ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ /* avoid using res which is declared to be in register d0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index 02b942d85c37..d49c54cb5505 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -342,9 +342,11 @@ #ifdef __KERNEL__ +#include + #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-4095)) {\ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ errno = -(res); \ res = -1; \ } \ diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h index 76b5430cb458..da127d7901af 100644 --- a/include/asm-sh/unistd.h +++ b/include/asm-sh/unistd.h @@ -306,11 +306,14 @@ #ifdef __KERNEL__ -/* user-visible error numbers are in the range -1 - -124: see */ +#include + +/* user-visible error numbers are in the range -1 - -MAX_ERRNO: + * see */ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-124)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ /* Avoid using "res" which is declared to be in register r0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ diff --git a/include/asm-sh64/unistd.h b/include/asm-sh64/unistd.h index 9a1590fffc15..c113566bef33 100644 --- a/include/asm-sh64/unistd.h +++ b/include/asm-sh64/unistd.h @@ -347,8 +347,10 @@ #ifdef __KERNEL__ #define NR_syscalls 321 +#include -/* user-visible error numbers are in the range -1 - -125: see */ +/* user-visible error numbers are in the range -1 - -MAX_ERRNO: + * see */ #define __syscall_return(type, res) \ do { \ @@ -358,7 +360,7 @@ do { \ ** life easier in the system call epilogue (see entry.S) \ */ \ register unsigned long __sr2 __asm__ ("r2") = res; \ - if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ errno = -(res); \ __sr2 = -1; \ } \ diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h index bcb44bfe577a..552b7c873a57 100644 --- a/include/asm-v850/unistd.h +++ b/include/asm-v850/unistd.h @@ -238,12 +238,13 @@ #ifdef __KERNEL__ #include +#include #define __syscall_return(type, res) \ do { \ - /* user-visible error numbers are in the range -1 - -124: \ + /* user-visible error numbers are in the range -1 - -MAX_ERRNO: \ see */ \ - if (__builtin_expect ((unsigned long)(res) >= (unsigned long)(-125), 0)) { \ + if (__builtin_expect ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO), 0)) { \ errno = -(res); \ res = -1; \ } \ diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index eeb98c168e98..6137146516d3 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -623,16 +623,17 @@ __SYSCALL(__NR_move_pages, sys_move_pages) #ifdef __KERNEL__ #define __NR_syscall_max __NR_move_pages +#include #ifndef __NO_STUBS -/* user-visible error numbers are in the range -1 - -4095 */ +/* user-visible error numbers are in the range -1 - -MAX_ERRNO */ #define __syscall_clobber "r11","rcx","memory" #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-127)) { \ + if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ errno = -(res); \ res = -1; \ } \ diff --git a/include/linux/err.h b/include/linux/err.h index cd3b367f7445..1ab1d44f8d3b 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -15,6 +15,8 @@ */ #define MAX_ERRNO 4095 +#ifndef __ASSEMBLY__ + #define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) static inline void *ERR_PTR(long error) @@ -32,4 +34,6 @@ static inline long IS_ERR(const void *ptr) return IS_ERR_VALUE((unsigned long)ptr); } +#endif + #endif /* _LINUX_ERR_H */ -- cgit v1.2.3-71-gd317 From 07563c711fbc25389e58ab9c9f0b9de2fce56760 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 27 Sep 2006 01:50:56 -0700 Subject: [PATCH] EISA bus MODALIAS attributes support Add modalias attribute support for the almost forgotten now EISA bus and (at least some) EISA-aware modules. The modalias entry looks like (for an 3c509 NIC): eisa:sTCM5093 and the in-module alias like: eisa:sTCM5093* The patch moves struct eisa_device_id declaration from include/linux/eisa.h to include/linux/mod_devicetable.h (so that the former now #includes the latter), adds proper MODULE_DEVICE_TABLE(eisa, ...) statements for all drivers with EISA IDs I found (some drivers already have that DEVICE_TABLE declared), and adds recognision of __mod_eisa_device_table to scripts/mod/file2alias.c so that proper modules.alias will be generated. There's no support for /lib/modules/$kver/modules.eisamap, as it's not used by any existing tools, and because with in-kernel modalias mechanism those maps are obsolete anyway. The rationale for this patch is: a) to make EISA bus to act as other busses with modalias support, to unify driver loading b) to foget about EISA finally - with this patch, kernel (who still supports EISA) will be the only one who knows how to choose the necessary drivers for this bus ;) [akpm@osdl.org: fix the kbuild bit] Signed-off-by: Michael Tokarev Cc: Rusty Russell Cc: Randy Dunlap Acked-the-net-bits-by: Jeff Garzik Acked-the-tulip-bit-by: Valerie Henson Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/eisa/eisa-bus.c | 23 +++++++++++++++++++++++ drivers/net/3c509.c | 1 + drivers/net/3c59x.c | 1 + drivers/net/ne3210.c | 1 + drivers/net/tulip/de4x5.c | 1 + drivers/scsi/aha1740.c | 1 + drivers/scsi/aic7xxx/aic7770_osm.c | 3 ++- drivers/scsi/sim710.c | 1 + include/linux/eisa.h | 8 +------- include/linux/mod_devicetable.h | 12 ++++++++++++ scripts/mod/file2alias.c | 12 ++++++++++++ 11 files changed, 56 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c index 6078e2f58817..3a365e159d89 100644 --- a/drivers/eisa/eisa-bus.c +++ b/drivers/eisa/eisa-bus.c @@ -128,9 +128,23 @@ static int eisa_bus_match (struct device *dev, struct device_driver *drv) return 0; } +static int eisa_bus_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) +{ + struct eisa_device *edev = to_eisa_device(dev); + int i = 0; + int length = 0; + + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "MODALIAS=" EISA_DEVICE_MODALIAS_FMT, edev->id.sig); + envp[i] = NULL; + return 0; +} + struct bus_type eisa_bus_type = { .name = "eisa", .match = eisa_bus_match, + .uevent = eisa_bus_uevent, }; int eisa_driver_register (struct eisa_driver *edrv) @@ -160,6 +174,14 @@ static ssize_t eisa_show_state (struct device *dev, struct device_attribute *att static DEVICE_ATTR(enabled, S_IRUGO, eisa_show_state, NULL); +static ssize_t eisa_show_modalias (struct device *dev, struct device_attribute *attr, char *buf) +{ + struct eisa_device *edev = to_eisa_device (dev); + return sprintf (buf, EISA_DEVICE_MODALIAS_FMT "\n", edev->id.sig); +} + +static DEVICE_ATTR(modalias, S_IRUGO, eisa_show_modalias, NULL); + static int __init eisa_init_device (struct eisa_root_device *root, struct eisa_device *edev, int slot) @@ -209,6 +231,7 @@ static int __init eisa_register_device (struct eisa_device *edev) device_create_file (&edev->dev, &dev_attr_signature); device_create_file (&edev->dev, &dev_attr_enabled); + device_create_file (&edev->dev, &dev_attr_modalias); return 0; } diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 59c33925be62..b936373ab2a5 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -225,6 +225,7 @@ static struct eisa_device_id el3_eisa_ids[] = { { "TCM5095" }, { "" } }; +MODULE_DEVICE_TABLE(eisa, el3_eisa_ids); static int el3_eisa_probe (struct device *device); diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index af301f09d674..df42e28cc80f 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -851,6 +851,7 @@ static struct eisa_device_id vortex_eisa_ids[] = { { "TCM5970", CH_3C597 }, { "" } }; +MODULE_DEVICE_TABLE(eisa, vortex_eisa_ids); static int vortex_eisa_probe(struct device *device); static int vortex_eisa_remove(struct device *device); diff --git a/drivers/net/ne3210.c b/drivers/net/ne3210.c index 0fa8e4d22769..d66328975425 100644 --- a/drivers/net/ne3210.c +++ b/drivers/net/ne3210.c @@ -343,6 +343,7 @@ static struct eisa_device_id ne3210_ids[] = { { "NVL1801" }, { "" }, }; +MODULE_DEVICE_TABLE(eisa, ne3210_ids); static struct eisa_driver ne3210_eisa_driver = { .id_table = ne3210_ids, diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index e661d0a9cc64..fb5fa7d68888 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -2114,6 +2114,7 @@ static struct eisa_device_id de4x5_eisa_ids[] = { { "DEC4250", 0 }, /* 0 is the board name index... */ { "" } }; +MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids); static struct eisa_driver de4x5_eisa_driver = { .id_table = de4x5_eisa_ids, diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c index 0e4a7ebe300a..6b35ed8301e0 100644 --- a/drivers/scsi/aha1740.c +++ b/drivers/scsi/aha1740.c @@ -681,6 +681,7 @@ static struct eisa_device_id aha1740_ids[] = { { "ADP0400" }, /* 1744 */ { "" } }; +MODULE_DEVICE_TABLE(eisa, aha1740_ids); static struct eisa_driver aha1740_driver = { .id_table = aha1740_ids, diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c index 867cbe23579b..1ac119733bac 100644 --- a/drivers/scsi/aic7xxx/aic7770_osm.c +++ b/drivers/scsi/aic7xxx/aic7770_osm.c @@ -132,7 +132,8 @@ static struct eisa_device_id aic7770_ids[] = { { "ADP7770", 5 }, /* AIC7770 generic */ { "" } }; - +MODULE_DEVICE_TABLE(eisa, aic7770_ids); + static struct eisa_driver aic7770_driver = { .id_table = aic7770_ids, .driver = { diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c index b27e85428daa..551baccec523 100644 --- a/drivers/scsi/sim710.c +++ b/drivers/scsi/sim710.c @@ -282,6 +282,7 @@ static struct eisa_device_id sim710_eisa_ids[] = { { "HWP0C80" }, { "" } }; +MODULE_DEVICE_TABLE(eisa, sim710_eisa_ids); static __init int sim710_eisa_probe(struct device *dev) diff --git a/include/linux/eisa.h b/include/linux/eisa.h index 4079242dced8..1ff7c1392525 100644 --- a/include/linux/eisa.h +++ b/include/linux/eisa.h @@ -3,8 +3,8 @@ #include #include +#include -#define EISA_SIG_LEN 8 #define EISA_MAX_SLOTS 8 #define EISA_MAX_RESOURCES 4 @@ -27,12 +27,6 @@ #define EISA_CONFIG_ENABLED 1 #define EISA_CONFIG_FORCED 2 -/* The EISA signature, in ASCII form, null terminated */ -struct eisa_device_id { - char sig[EISA_SIG_LEN]; - unsigned long driver_data; -}; - /* There is not much we can say about an EISA device, apart from * signature, slot number, and base address. dma_mask is set by * default to parent device mask..*/ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f7ca0b09075d..e0c393cc7240 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -308,4 +308,16 @@ struct input_device_id { kernel_ulong_t driver_info; }; +/* EISA */ + +#define EISA_SIG_LEN 8 + +/* The EISA signature, in ASCII form, null terminated */ +struct eisa_device_id { + char sig[EISA_SIG_LEN]; + kernel_ulong_t driver_data; +}; + +#define EISA_DEVICE_MODALIAS_FMT "eisa:s%s" + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index de76da80443f..f61c9ccef6aa 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -444,6 +444,14 @@ static int do_input_entry(const char *filename, struct input_device_id *id, return 1; } +static int do_eisa_entry(const char *filename, struct eisa_device_id *eisa, + char *alias) +{ + if (eisa->sig[0]) + sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig); + return 1; +} + /* Ignore any prefix, eg. v850 prepends _ */ static inline int sym_is(const char *symbol, const char *name) { @@ -547,6 +555,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, do_table(symval, sym->st_size, sizeof(struct input_device_id), "input", do_input_entry, mod); + else if (sym_is(symname, "__mod_eisa_device_table")) + do_table(symval, sym->st_size, + sizeof(struct eisa_device_id), "eisa", + do_eisa_entry, mod); } /* Now add out buffered information to the generated C source */ -- cgit v1.2.3-71-gd317 From c18258c6f0848f97e85287f6271c511a092bb784 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:06 -0700 Subject: [PATCH] pid: Implement transfer_pid and use it to simplify de_thread In de_thread we move pids from one process to another, a rather ugly case. The function transfer_pid makes it clear what we are doing, and makes the action atomic. This is useful we ever want to atomically traverse the process group and session lists, in a rcu safe manner. Even if the atomic properties this change should be a win as transfer_pid should be less code to execute than executing both attach_pid and detach_pid, and this should make de_thread slightly smaller as only a single function call needs to be emitted. The only downside is that the code might be slower to execute as the odds are against transfer_pid being in cache. Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 11 ++++------- include/linux/pid.h | 2 ++ kernel/pid.c | 9 +++++++++ 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 54135df2a966..b7aa3d6422d6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -696,23 +696,20 @@ static int de_thread(struct task_struct *tsk) */ /* Become a process group leader with the old leader's pid. - * Note: The old leader also uses thispid until release_task + * The old leader becomes a thread of the this thread group. + * Note: The old leader also uses this pid until release_task * is called. Odd but simple and correct. */ detach_pid(current, PIDTYPE_PID); current->pid = leader->pid; attach_pid(current, PIDTYPE_PID, current->pid); - attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); - attach_pid(current, PIDTYPE_SID, current->signal->session); + transfer_pid(leader, current, PIDTYPE_PGID); + transfer_pid(leader, current, PIDTYPE_SID); list_replace_rcu(&leader->tasks, ¤t->tasks); current->group_leader = current; leader->group_leader = current; - /* Reduce leader to a thread */ - detach_pid(leader, PIDTYPE_PGID); - detach_pid(leader, PIDTYPE_SID); - current->exit_signal = SIGCHLD; BUG_ON(leader->exit_state != EXIT_ZOMBIE); diff --git a/include/linux/pid.h b/include/linux/pid.h index 29960b03bef7..93da7e2d9f30 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -76,6 +76,8 @@ extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr)); extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type)); +extern void FASTCALL(transfer_pid(struct task_struct *old, + struct task_struct *new, enum pid_type)); /* * look up a PID in the hash table. Must be called with the tasklist_lock diff --git a/kernel/pid.c b/kernel/pid.c index 93e212f20671..6db82b68e2f8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -252,6 +252,15 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type) free_pid(pid); } +/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ +void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, + enum pid_type type) +{ + new->pids[type].pid = old->pids[type].pid; + hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); + old->pids[type].pid = NULL; +} + struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result = NULL; -- cgit v1.2.3-71-gd317 From 66f37509fc7191df468a8d183374f48b13bacb73 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 27 Sep 2006 01:51:13 -0700 Subject: [PATCH] fs/nfs/: make code static Signed-off-by: Adrian Bunk Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/namespace.c | 12 +++++++++--- include/linux/nfs_fs.h | 4 ---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 77b00684894d..60408646176b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -26,6 +26,11 @@ LIST_HEAD(nfs_automount_list); static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); int nfs_mountpoint_expiry_timeout = 500 * HZ; +static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs_fh *fh, + struct nfs_fattr *fattr); + /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - arbitrary string to prepend to the path @@ -209,9 +214,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, * @fattr - attributes for new root inode * */ -struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, - const struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr) +static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs_fh *fh, + struct nfs_fattr *fattr) { struct nfs_clone_mount mountdata = { .sb = mnt_parent->mnt_sb, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 36f5bcf513b0..98c9b9f667a5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -315,10 +315,6 @@ extern void nfs_end_data_update(struct inode *); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); -extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, - const struct dentry *dentry, - struct nfs_fh *fh, - struct nfs_fattr *fattr); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ -- cgit v1.2.3-71-gd317 From 1b79e5513d52e8533a08af35a3595dad80c74d1f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Sep 2006 01:51:14 -0700 Subject: [PATCH] add probe_kernel_address() Add a version of __get_user() which is safe to call inside mmap_sem. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 391e7ed1eb3f..a48d7f11c7be 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -19,4 +19,26 @@ static inline unsigned long __copy_from_user_nocache(void *to, #endif /* ARCH_HAS_NOCACHE_UACCESS */ +/** + * probe_kernel_address(): safely attempt to read from a location + * @addr: address to read from - its type is type typeof(retval)* + * @retval: read into this variable + * + * Safely read from address @addr into variable @revtal. If a kernel fault + * happens, handle that and return -EFAULT. + * We ensure that the __get_user() is executed in atomic context so that + * do_page_fault() doesn't attempt to take mmap_sem. This makes + * probe_kernel_address() suitable for use within regions where the caller + * already holds mmap_sem, or other locks which nest inside mmap_sem. + */ +#define probe_kernel_address(addr, retval) \ + ({ \ + long ret; \ + \ + inc_preempt_count(); \ + ret = __get_user(retval, addr); \ + dec_preempt_count(); \ + ret; \ + }) + #endif /* __LINUX_UACCESS_H__ */ -- cgit v1.2.3-71-gd317 From 3a16f7b4a75d68364c3278523f51ac141a12758a Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 29 Jun 2006 12:27:23 -0700 Subject: USB: move to Move to . Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- arch/arm/plat-omap/usb.c | 2 +- drivers/i2c/chips/isp1301_omap.c | 2 +- drivers/usb/gadget/omap_udc.c | 2 +- drivers/usb/host/ohci-hcd.c | 2 +- include/linux/usb/otg.h | 131 +++++++++++++++++++++++++++++++++++++++ include/linux/usb_otg.h | 131 --------------------------------------- 6 files changed, 135 insertions(+), 135 deletions(-) create mode 100644 include/linux/usb/otg.h delete mode 100644 include/linux/usb_otg.h (limited to 'include/linux') diff --git a/arch/arm/plat-omap/usb.c b/arch/arm/plat-omap/usb.c index 9b815327b6a5..7e8096809be2 100644 --- a/arch/arm/plat-omap/usb.c +++ b/arch/arm/plat-omap/usb.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/i2c/chips/isp1301_omap.c b/drivers/i2c/chips/isp1301_omap.c index f92505b94c61..182f04953466 100644 --- a/drivers/i2c/chips/isp1301_omap.c +++ b/drivers/i2c/chips/isp1301_omap.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 2de9748ee673..81f0389fcc94 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index cbf38ae5ae76..7c3d8c60a60f 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -88,7 +88,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h new file mode 100644 index 000000000000..9897f7a818c5 --- /dev/null +++ b/include/linux/usb/otg.h @@ -0,0 +1,131 @@ +// include/linux/usb/otg.h + +/* + * These APIs may be used between USB controllers. USB device drivers + * (for either host or peripheral roles) don't use these calls; they + * continue to use just usb_device and usb_gadget. + */ + + +/* OTG defines lots of enumeration states before device reset */ +enum usb_otg_state { + OTG_STATE_UNDEFINED = 0, + + /* single-role peripheral, and dual-role default-b */ + OTG_STATE_B_IDLE, + OTG_STATE_B_SRP_INIT, + OTG_STATE_B_PERIPHERAL, + + /* extra dual-role default-b states */ + OTG_STATE_B_WAIT_ACON, + OTG_STATE_B_HOST, + + /* dual-role default-a */ + OTG_STATE_A_IDLE, + OTG_STATE_A_WAIT_VRISE, + OTG_STATE_A_WAIT_BCON, + OTG_STATE_A_HOST, + OTG_STATE_A_SUSPEND, + OTG_STATE_A_PERIPHERAL, + OTG_STATE_A_WAIT_VFALL, + OTG_STATE_A_VBUS_ERR, +}; + +/* + * the otg driver needs to interact with both device side and host side + * usb controllers. it decides which controller is active at a given + * moment, using the transceiver, ID signal, HNP and sometimes static + * configuration information (including "board isn't wired for otg"). + */ +struct otg_transceiver { + struct device *dev; + const char *label; + + u8 default_a; + enum usb_otg_state state; + + struct usb_bus *host; + struct usb_gadget *gadget; + + /* to pass extra port status to the root hub */ + u16 port_status; + u16 port_change; + + /* bind/unbind the host controller */ + int (*set_host)(struct otg_transceiver *otg, + struct usb_bus *host); + + /* bind/unbind the peripheral controller */ + int (*set_peripheral)(struct otg_transceiver *otg, + struct usb_gadget *gadget); + + /* effective for B devices, ignored for A-peripheral */ + int (*set_power)(struct otg_transceiver *otg, + unsigned mA); + + /* for non-OTG B devices: set transceiver into suspend mode */ + int (*set_suspend)(struct otg_transceiver *otg, + int suspend); + + /* for B devices only: start session with A-Host */ + int (*start_srp)(struct otg_transceiver *otg); + + /* start or continue HNP role switch */ + int (*start_hnp)(struct otg_transceiver *otg); + +}; + + +/* for board-specific init logic */ +extern int otg_set_transceiver(struct otg_transceiver *); + + +/* for usb host and peripheral controller drivers */ +extern struct otg_transceiver *otg_get_transceiver(void); + +static inline int +otg_start_hnp(struct otg_transceiver *otg) +{ + return otg->start_hnp(otg); +} + + +/* for HCDs */ +static inline int +otg_set_host(struct otg_transceiver *otg, struct usb_bus *host) +{ + return otg->set_host(otg, host); +} + + +/* for usb peripheral controller drivers */ +static inline int +otg_set_peripheral(struct otg_transceiver *otg, struct usb_gadget *periph) +{ + return otg->set_peripheral(otg, periph); +} + +static inline int +otg_set_power(struct otg_transceiver *otg, unsigned mA) +{ + return otg->set_power(otg, mA); +} + +static inline int +otg_set_suspend(struct otg_transceiver *otg, int suspend) +{ + if (otg->set_suspend != NULL) + return otg->set_suspend(otg, suspend); + else + return 0; +} + +static inline int +otg_start_srp(struct otg_transceiver *otg) +{ + return otg->start_srp(otg); +} + + +/* for OTG controller drivers (and maybe other stuff) */ +extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num); diff --git a/include/linux/usb_otg.h b/include/linux/usb_otg.h deleted file mode 100644 index f827f6e203c2..000000000000 --- a/include/linux/usb_otg.h +++ /dev/null @@ -1,131 +0,0 @@ -// include/linux/usb_otg.h - -/* - * These APIs may be used between USB controllers. USB device drivers - * (for either host or peripheral roles) don't use these calls; they - * continue to use just usb_device and usb_gadget. - */ - - -/* OTG defines lots of enumeration states before device reset */ -enum usb_otg_state { - OTG_STATE_UNDEFINED = 0, - - /* single-role peripheral, and dual-role default-b */ - OTG_STATE_B_IDLE, - OTG_STATE_B_SRP_INIT, - OTG_STATE_B_PERIPHERAL, - - /* extra dual-role default-b states */ - OTG_STATE_B_WAIT_ACON, - OTG_STATE_B_HOST, - - /* dual-role default-a */ - OTG_STATE_A_IDLE, - OTG_STATE_A_WAIT_VRISE, - OTG_STATE_A_WAIT_BCON, - OTG_STATE_A_HOST, - OTG_STATE_A_SUSPEND, - OTG_STATE_A_PERIPHERAL, - OTG_STATE_A_WAIT_VFALL, - OTG_STATE_A_VBUS_ERR, -}; - -/* - * the otg driver needs to interact with both device side and host side - * usb controllers. it decides which controller is active at a given - * moment, using the transceiver, ID signal, HNP and sometimes static - * configuration information (including "board isn't wired for otg"). - */ -struct otg_transceiver { - struct device *dev; - const char *label; - - u8 default_a; - enum usb_otg_state state; - - struct usb_bus *host; - struct usb_gadget *gadget; - - /* to pass extra port status to the root hub */ - u16 port_status; - u16 port_change; - - /* bind/unbind the host controller */ - int (*set_host)(struct otg_transceiver *otg, - struct usb_bus *host); - - /* bind/unbind the peripheral controller */ - int (*set_peripheral)(struct otg_transceiver *otg, - struct usb_gadget *gadget); - - /* effective for B devices, ignored for A-peripheral */ - int (*set_power)(struct otg_transceiver *otg, - unsigned mA); - - /* for non-OTG B devices: set transceiver into suspend mode */ - int (*set_suspend)(struct otg_transceiver *otg, - int suspend); - - /* for B devices only: start session with A-Host */ - int (*start_srp)(struct otg_transceiver *otg); - - /* start or continue HNP role switch */ - int (*start_hnp)(struct otg_transceiver *otg); - -}; - - -/* for board-specific init logic */ -extern int otg_set_transceiver(struct otg_transceiver *); - - -/* for usb host and peripheral controller drivers */ -extern struct otg_transceiver *otg_get_transceiver(void); - -static inline int -otg_start_hnp(struct otg_transceiver *otg) -{ - return otg->start_hnp(otg); -} - - -/* for HCDs */ -static inline int -otg_set_host(struct otg_transceiver *otg, struct usb_bus *host) -{ - return otg->set_host(otg, host); -} - - -/* for usb peripheral controller drivers */ -static inline int -otg_set_peripheral(struct otg_transceiver *otg, struct usb_gadget *periph) -{ - return otg->set_peripheral(otg, periph); -} - -static inline int -otg_set_power(struct otg_transceiver *otg, unsigned mA) -{ - return otg->set_power(otg, mA); -} - -static inline int -otg_set_suspend(struct otg_transceiver *otg, int suspend) -{ - if (otg->set_suspend != NULL) - return otg->set_suspend(otg, suspend); - else - return 0; -} - -static inline int -otg_start_srp(struct otg_transceiver *otg) -{ - return otg->start_srp(otg); -} - - -/* for OTG controller drivers (and maybe other stuff) */ -extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num); -- cgit v1.2.3-71-gd317 From 8bb54ab573ecd1b4fe2ed66416a8d99a86e65316 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 1 Jul 2006 22:08:49 -0400 Subject: usbcore: add usb_device_driver definition This patch (as732) adds a usb_device_driver structure, for representing drivers that manage an entire USB device as opposed to just an interface. Support routines like usb_register_device_driver, usb_deregister_device_driver, usb_probe_device, and usb_unbind_device are also added. Unlike an earlier version of this patch, the new code is type-safe. To accomplish this, the existing struct driver embedded in struct usb_driver had to be wrapped in an intermediate wrapper. This enables the core to tell at runtime whether a particular struct driver belongs to a device driver or to an interface driver. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 219 +++++++++++++++++++++++++++++++++------------ drivers/usb/core/generic.c | 17 +--- drivers/usb/core/usb.c | 15 ++-- drivers/usb/core/usb.h | 20 ++++- include/linux/usb.h | 58 ++++++++++-- 5 files changed, 245 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 8dcf2cd0c569..0d4b5dcee3ab 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -84,7 +84,7 @@ static int usb_create_newid_file(struct usb_driver *usb_drv) goto exit; if (usb_drv->probe != NULL) - error = sysfs_create_file(&usb_drv->driver.kobj, + error = sysfs_create_file(&usb_drv->drvwrap.driver.kobj, &driver_attr_new_id.attr); exit: return error; @@ -96,7 +96,7 @@ static void usb_remove_newid_file(struct usb_driver *usb_drv) return; if (usb_drv->probe != NULL) - sysfs_remove_file(&usb_drv->driver.kobj, + sysfs_remove_file(&usb_drv->drvwrap.driver.kobj, &driver_attr_new_id.attr); } @@ -143,18 +143,55 @@ static const struct usb_device_id *usb_match_dynamic_id(struct usb_interface *in } -/* called from driver core with usb_bus_type.subsys writelock */ +/* called from driver core with dev locked */ +static int usb_probe_device(struct device *dev) +{ + struct usb_device_driver *udriver = to_usb_device_driver(dev->driver); + struct usb_device *udev; + int error = -ENODEV; + + dev_dbg(dev, "%s\n", __FUNCTION__); + + if (!is_usb_device(dev)) /* Sanity check */ + return error; + + udev = to_usb_device(dev); + + /* FIXME: resume a suspended device */ + if (udev->state == USB_STATE_SUSPENDED) + return -EHOSTUNREACH; + + /* TODO: Add real matching code */ + + error = udriver->probe(udev); + return error; +} + +/* called from driver core with dev locked */ +static int usb_unbind_device(struct device *dev) +{ + struct usb_device_driver *udriver = to_usb_device_driver(dev->driver); + + udriver->disconnect(to_usb_device(dev)); + return 0; +} + + +/* called from driver core with dev locked */ static int usb_probe_interface(struct device *dev) { - struct usb_interface * intf = to_usb_interface(dev); - struct usb_driver * driver = to_usb_driver(dev->driver); + struct usb_driver *driver = to_usb_driver(dev->driver); + struct usb_interface *intf; const struct usb_device_id *id; int error = -ENODEV; dev_dbg(dev, "%s\n", __FUNCTION__); - if (!driver->probe) + if (is_usb_device(dev)) /* Sanity check */ return error; + + intf = to_usb_interface(dev); + /* FIXME we'd much prefer to just resume it ... */ if (interface_to_usbdev(intf)->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; @@ -182,19 +219,18 @@ static int usb_probe_interface(struct device *dev) return error; } -/* called from driver core with usb_bus_type.subsys writelock */ +/* called from driver core with dev locked */ static int usb_unbind_interface(struct device *dev) { + struct usb_driver *driver = to_usb_driver(dev->driver); struct usb_interface *intf = to_usb_interface(dev); - struct usb_driver *driver = to_usb_driver(intf->dev.driver); intf->condition = USB_INTERFACE_UNBINDING; /* release all urbs for this interface */ usb_disable_interface(interface_to_usbdev(intf), intf); - if (driver && driver->disconnect) - driver->disconnect(intf); + driver->disconnect(intf); /* reset other interface state */ usb_set_interface(interface_to_usbdev(intf), @@ -235,7 +271,7 @@ int usb_driver_claim_interface(struct usb_driver *driver, if (dev->driver) return -EBUSY; - dev->driver = &driver->driver; + dev->driver = &driver->drvwrap.driver; usb_set_intfdata(iface, priv); iface->condition = USB_INTERFACE_BOUND; mark_active(iface); @@ -270,7 +306,7 @@ void usb_driver_release_interface(struct usb_driver *driver, struct device *dev = &iface->dev; /* this should never happen, don't release something that's not ours */ - if (!dev->driver || dev->driver != &driver->driver) + if (!dev->driver || dev->driver != &driver->drvwrap.driver) return; /* don't release from within disconnect() */ @@ -433,24 +469,37 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_match_id); int usb_device_match(struct device *dev, struct device_driver *drv) { - struct usb_interface *intf; - struct usb_driver *usb_drv; - const struct usb_device_id *id; - - /* check for generic driver, which we don't match any device with */ - if (drv == &usb_generic_driver) - return 0; + /* devices and interfaces are handled separately */ + if (is_usb_device(dev)) { - intf = to_usb_interface(dev); - usb_drv = to_usb_driver(drv); + /* interface drivers never match devices */ + if (!is_usb_device_driver(drv)) + return 0; - id = usb_match_id(intf, usb_drv->id_table); - if (id) + /* TODO: Add real matching code */ return 1; - id = usb_match_dynamic_id(intf, usb_drv); - if (id) - return 1; + } else { + struct usb_interface *intf; + struct usb_driver *usb_drv; + const struct usb_device_id *id; + + /* device drivers never match interfaces */ + if (is_usb_device_driver(drv)) + return 0; + + intf = to_usb_interface(dev); + usb_drv = to_usb_driver(drv); + + id = usb_match_id(intf, usb_drv->id_table); + if (id) + return 1; + + id = usb_match_dynamic_id(intf, usb_drv); + if (id) + return 1; + } + return 0; } @@ -481,14 +530,13 @@ static int usb_uevent(struct device *dev, char **envp, int num_envp, /* driver is often null here; dev_dbg() would oops */ pr_debug ("usb %s: uevent\n", dev->bus_id); - /* Must check driver_data here, as on remove driver is always NULL */ - if ((dev->driver == &usb_generic_driver) || - (dev->driver_data == &usb_generic_driver_data)) + if (is_usb_device(dev)) return 0; - - intf = to_usb_interface(dev); - usb_dev = interface_to_usbdev (intf); - alt = intf->cur_altsetting; + else { + intf = to_usb_interface(dev); + usb_dev = interface_to_usbdev(intf); + alt = intf->cur_altsetting; + } if (usb_dev->devnum < 0) { pr_debug ("usb %s: already deleted?\n", dev->bus_id); @@ -569,13 +617,71 @@ static int usb_uevent(struct device *dev, char **envp, #endif /* CONFIG_HOTPLUG */ /** - * usb_register_driver - register a USB driver - * @new_driver: USB operations for the driver + * usb_register_device_driver - register a USB device (not interface) driver + * @new_udriver: USB operations for the device driver * @owner: module owner of this driver. * - * Registers a USB driver with the USB core. The list of unattached - * interfaces will be rescanned whenever a new driver is added, allowing - * the new driver to attach to any recognized devices. + * Registers a USB device driver with the USB core. The list of + * unattached devices will be rescanned whenever a new driver is + * added, allowing the new driver to attach to any recognized devices. + * Returns a negative error code on failure and 0 on success. + */ +int usb_register_device_driver(struct usb_device_driver *new_udriver, + struct module *owner) +{ + int retval = 0; + + if (usb_disabled()) + return -ENODEV; + + new_udriver->drvwrap.for_devices = 1; + new_udriver->drvwrap.driver.name = (char *) new_udriver->name; + new_udriver->drvwrap.driver.bus = &usb_bus_type; + new_udriver->drvwrap.driver.probe = usb_probe_device; + new_udriver->drvwrap.driver.remove = usb_unbind_device; + new_udriver->drvwrap.driver.owner = owner; + + retval = driver_register(&new_udriver->drvwrap.driver); + + if (!retval) { + pr_info("%s: registered new device driver %s\n", + usbcore_name, new_udriver->name); + usbfs_update_special(); + } else { + printk(KERN_ERR "%s: error %d registering device " + " driver %s\n", + usbcore_name, retval, new_udriver->name); + } + + return retval; +} +EXPORT_SYMBOL_GPL(usb_register_device_driver); + +/** + * usb_deregister_device_driver - unregister a USB device (not interface) driver + * @udriver: USB operations of the device driver to unregister + * Context: must be able to sleep + * + * Unlinks the specified driver from the internal USB driver list. + */ +void usb_deregister_device_driver(struct usb_device_driver *udriver) +{ + pr_info("%s: deregistering device driver %s\n", + usbcore_name, udriver->name); + + driver_unregister(&udriver->drvwrap.driver); + usbfs_update_special(); +} +EXPORT_SYMBOL_GPL(usb_deregister_device_driver); + +/** + * usb_register_driver - register a USB interface driver + * @new_driver: USB operations for the interface driver + * @owner: module owner of this driver. + * + * Registers a USB interface driver with the USB core. The list of + * unattached interfaces will be rescanned whenever a new driver is + * added, allowing the new driver to attach to any recognized interfaces. * Returns a negative error code on failure and 0 on success. * * NOTE: if you want your driver to use the USB major number, you must call @@ -589,23 +695,25 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner) if (usb_disabled()) return -ENODEV; - new_driver->driver.name = (char *)new_driver->name; - new_driver->driver.bus = &usb_bus_type; - new_driver->driver.probe = usb_probe_interface; - new_driver->driver.remove = usb_unbind_interface; - new_driver->driver.owner = owner; + new_driver->drvwrap.for_devices = 0; + new_driver->drvwrap.driver.name = (char *) new_driver->name; + new_driver->drvwrap.driver.bus = &usb_bus_type; + new_driver->drvwrap.driver.probe = usb_probe_interface; + new_driver->drvwrap.driver.remove = usb_unbind_interface; + new_driver->drvwrap.driver.owner = owner; spin_lock_init(&new_driver->dynids.lock); INIT_LIST_HEAD(&new_driver->dynids.list); - retval = driver_register(&new_driver->driver); + retval = driver_register(&new_driver->drvwrap.driver); if (!retval) { - pr_info("%s: registered new driver %s\n", + pr_info("%s: registered new interface driver %s\n", usbcore_name, new_driver->name); usbfs_update_special(); usb_create_newid_file(new_driver); } else { - printk(KERN_ERR "%s: error %d registering driver %s\n", + printk(KERN_ERR "%s: error %d registering interface " + " driver %s\n", usbcore_name, retval, new_driver->name); } @@ -614,8 +722,8 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner) EXPORT_SYMBOL_GPL_FUTURE(usb_register_driver); /** - * usb_deregister - unregister a USB driver - * @driver: USB operations of the driver to unregister + * usb_deregister - unregister a USB interface driver + * @driver: USB operations of the interface driver to unregister * Context: must be able to sleep * * Unlinks the specified driver from the internal USB driver list. @@ -626,11 +734,12 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_register_driver); */ void usb_deregister(struct usb_driver *driver) { - pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name); + pr_info("%s: deregistering interface driver %s\n", + usbcore_name, driver->name); usb_remove_newid_file(driver); usb_free_dynids(driver); - driver_unregister(&driver->driver); + driver_unregister(&driver->drvwrap.driver); usbfs_update_special(); } @@ -655,7 +764,7 @@ static int usb_generic_suspend(struct device *dev, pm_message_t message) * marked for FREEZE as soon as their children are already idled. * But those semantics are useless, so we equate the two (sigh). */ - if (dev->driver == &usb_generic_driver) { + if (is_usb_device(dev)) { if (dev->power.power_state.event == message.event) return 0; /* we need to rule out bogus requests through sysfs */ @@ -665,8 +774,7 @@ static int usb_generic_suspend(struct device *dev, pm_message_t message) return usb_port_suspend(to_usb_device(dev)); } - if ((dev->driver == NULL) || - (dev->driver_data == &usb_generic_driver_data)) + if (dev->driver == NULL) return 0; intf = to_usb_interface(dev); @@ -705,15 +813,14 @@ static int usb_generic_resume(struct device *dev) dev->power.power_state.event = PM_EVENT_ON; /* devices resume through their hubs */ - if (dev->driver == &usb_generic_driver) { + if (is_usb_device(dev)) { udev = to_usb_device(dev); if (udev->state == USB_STATE_NOTATTACHED) return 0; return usb_port_resume(udev); } - if ((dev->driver == NULL) || - (dev->driver_data == &usb_generic_driver_data)) { + if (dev->driver == NULL) { dev->power.power_state.event = PM_EVENT_FREEZE; return 0; } diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c index 7bab9769b34f..fa6f34a12b4b 100644 --- a/drivers/usb/core/generic.c +++ b/drivers/usb/core/generic.c @@ -21,14 +21,12 @@ #include #include "usb.h" -static int generic_probe(struct device *dev) +static int generic_probe(struct usb_device *udev) { return 0; } -static int generic_remove(struct device *dev) +static void generic_disconnect(struct usb_device *udev) { - struct usb_device *udev = to_usb_device(dev); - /* if this is only an unbind, not a physical disconnect, then * unconfigure the device */ if (udev->state == USB_STATE_CONFIGURED) @@ -37,17 +35,10 @@ static int generic_remove(struct device *dev) /* in case the call failed or the device was suspended */ if (udev->state >= USB_STATE_CONFIGURED) usb_disable_device(udev, 0); - return 0; } -struct device_driver usb_generic_driver = { - .owner = THIS_MODULE, +struct usb_device_driver usb_generic_driver = { .name = "usb", - .bus = &usb_bus_type, .probe = generic_probe, - .remove = generic_remove, + .disconnect = generic_disconnect, }; - -/* Fun hack to determine if the struct device is a - * usb device or a usb interface. */ -int usb_generic_driver_data; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 0b8c67bcde60..6dfbc284369b 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -123,7 +123,7 @@ static int __find_interface(struct device * dev, void * data) struct usb_interface *intf; /* can't look at usb devices, only interfaces */ - if (dev->driver == &usb_generic_driver) + if (is_usb_device(dev)) return 0; intf = to_usb_interface(dev); @@ -149,7 +149,8 @@ struct usb_interface *usb_find_interface(struct usb_driver *drv, int minor) argb.minor = minor; argb.interface = NULL; - driver_for_each_device(&drv->driver, NULL, &argb, __find_interface); + driver_for_each_device(&drv->drvwrap.driver, NULL, &argb, + __find_interface); return argb.interface; } @@ -204,11 +205,13 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1) device_initialize(&dev->dev); dev->dev.bus = &usb_bus_type; dev->dev.dma_mask = bus->controller->dma_mask; - dev->dev.driver_data = &usb_generic_driver_data; - dev->dev.driver = &usb_generic_driver; + dev->dev.driver = &usb_generic_driver.drvwrap.driver; dev->dev.release = usb_release_dev; dev->state = USB_STATE_ATTACHED; + /* This magic assignment distinguishes devices from interfaces */ + dev->dev.platform_data = &usb_generic_driver; + INIT_LIST_HEAD(&dev->ep0.urb_list); dev->ep0.desc.bLength = USB_DT_ENDPOINT_SIZE; dev->ep0.desc.bDescriptorType = USB_DT_ENDPOINT; @@ -838,7 +841,7 @@ static int __init usb_init(void) retval = usb_hub_init(); if (retval) goto hub_init_failed; - retval = driver_register(&usb_generic_driver); + retval = usb_register_device_driver(&usb_generic_driver, THIS_MODULE); if (!retval) goto out; @@ -868,7 +871,7 @@ static void __exit usb_exit(void) if (nousb) return; - driver_unregister(&usb_generic_driver); + usb_deregister_device_driver(&usb_generic_driver); usb_major_cleanup(); usbfs_cleanup(); usb_deregister(&usbfs_driver); diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 82d397a6f773..1d25ccac7832 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -34,8 +34,24 @@ extern int usb_port_suspend(struct usb_device *dev); extern int usb_port_resume(struct usb_device *dev); extern struct bus_type usb_bus_type; -extern struct device_driver usb_generic_driver; -extern int usb_generic_driver_data; +extern struct usb_device_driver usb_generic_driver; + +/* Here's how we tell apart devices and interfaces. Luckily there's + * no such thing as a platform USB device, so we can steal the use + * of the platform_data field. */ + +static inline int is_usb_device(struct device *dev) +{ + return dev->platform_data == &usb_generic_driver; +} + +/* Do the same for device drivers and interface drivers. */ + +static inline int is_usb_device_driver(struct device_driver *drv) +{ + return container_of(drv, struct usbdrv_wrap, driver)-> + for_devices; +} /* Interfaces and their "power state" are owned by usbcore */ diff --git a/include/linux/usb.h b/include/linux/usb.h index d2bd0c8e0154..b4ccce6d0982 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -540,7 +540,17 @@ struct usb_dynids { }; /** - * struct usb_driver - identifies USB driver to usbcore + * struct usbdrv_wrap - wrapper for driver-model structure + * @driver: The driver-model core driver structure. + * @for_devices: Non-zero for device drivers, 0 for interface drivers. + */ +struct usbdrv_wrap { + struct device_driver driver; + int for_devices; +}; + +/** + * struct usb_driver - identifies USB interface driver to usbcore * @name: The driver name should be unique among USB drivers, * and should normally be the same as the module name. * @probe: Called to see if the driver is willing to manage a particular @@ -567,12 +577,12 @@ struct usb_dynids { * or your driver's probe function will never get called. * @dynids: used internally to hold the list of dynamically added device * ids for this driver. - * @driver: the driver model core driver structure. + * @drvwrap: Driver-model core structure wrapper. * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be * added to this driver by preventing the sysfs file from being created. * - * USB drivers must provide a name, probe() and disconnect() methods, - * and an id_table. Other driver fields are optional. + * USB interface drivers must provide a name, probe() and disconnect() + * methods, and an id_table. Other driver fields are optional. * * The id_table is used in hotplugging. It holds a set of descriptors, * and specialized data may be associated with each entry. That table @@ -606,10 +616,40 @@ struct usb_driver { const struct usb_device_id *id_table; struct usb_dynids dynids; - struct device_driver driver; + struct usbdrv_wrap drvwrap; unsigned int no_dynamic_id:1; }; -#define to_usb_driver(d) container_of(d, struct usb_driver, driver) +#define to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver) + +/** + * struct usb_device_driver - identifies USB device driver to usbcore + * @name: The driver name should be unique among USB drivers, + * and should normally be the same as the module name. + * @probe: Called to see if the driver is willing to manage a particular + * device. If it is, probe returns zero and uses dev_set_drvdata() + * to associate driver-specific data with the device. If unwilling + * to manage the device, return a negative errno value. + * @disconnect: Called when the device is no longer accessible, usually + * because it has been (or is being) disconnected or the driver's + * module is being unloaded. + * @suspend: Called when the device is going to be suspended by the system. + * @resume: Called when the device is being resumed by the system. + * @drvwrap: Driver-model core structure wrapper. + * + * USB drivers must provide all the fields listed above except drvwrap. + */ +struct usb_device_driver { + const char *name; + + int (*probe) (struct usb_device *udev); + void (*disconnect) (struct usb_device *udev); + + int (*suspend) (struct usb_device *udev, pm_message_t message); + int (*resume) (struct usb_device *udev); + struct usbdrv_wrap drvwrap; +}; +#define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ + drvwrap.driver) extern struct bus_type usb_bus_type; @@ -633,13 +673,17 @@ struct usb_class_driver { * use these in module_init()/module_exit() * and don't forget MODULE_DEVICE_TABLE(usb, ...) */ -int usb_register_driver(struct usb_driver *, struct module *); +extern int usb_register_driver(struct usb_driver *, struct module *); static inline int usb_register(struct usb_driver *driver) { return usb_register_driver(driver, THIS_MODULE); } extern void usb_deregister(struct usb_driver *); +extern int usb_register_device_driver(struct usb_device_driver *, + struct module *); +extern void usb_deregister_device_driver(struct usb_device_driver *); + extern int usb_register_dev(struct usb_interface *intf, struct usb_class_driver *class_driver); extern void usb_deregister_dev(struct usb_interface *intf, -- cgit v1.2.3-71-gd317 From 4d064c080265a41324d108fccc26b72106d43db3 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 1 Jul 2006 22:11:44 -0400 Subject: usbcore: track whether interfaces are suspended Currently we rely on intf->dev.power.power_state.event for tracking whether intf is suspended. This is not a reliable technique because that value is owned by the PM core, not by usbcore. This patch (as718b) adds a new flag so that we can accurately tell which interfaces are suspended and which aren't. At first one might think these flags aren't needed, since interfaces will be suspended along with their devices. It turns out there are a couple of intermediate situations where that's not quite true, such as while processing a remote-wakeup request. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb.h | 6 +++--- include/linux/usb.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index cc42972b6bb0..74df0db954c9 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -59,17 +59,17 @@ static inline int is_usb_device_driver(struct device_driver *drv) static inline void mark_active(struct usb_interface *f) { - f->dev.power.power_state.event = PM_EVENT_ON; + f->is_active = 1; } static inline void mark_quiesced(struct usb_interface *f) { - f->dev.power.power_state.event = PM_EVENT_FREEZE; + f->is_active = 0; } static inline int is_active(struct usb_interface *f) { - return f->dev.power.power_state.event == PM_EVENT_ON; + return f->is_active; } diff --git a/include/linux/usb.h b/include/linux/usb.h index b4ccce6d0982..e22f4b386605 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -102,6 +102,7 @@ enum usb_interface_condition { * number from the USB core by calling usb_register_dev(). * @condition: binding state of the interface: not bound, binding * (in probe()), bound to a driver, or unbinding (in disconnect()) + * @is_active: flag set when the interface is bound and not suspended. * @dev: driver model's view of this device * @class_dev: driver model's class view of this device. * @@ -142,6 +143,8 @@ struct usb_interface { int minor; /* minor number this interface is * bound to */ enum usb_interface_condition condition; /* state of binding */ + unsigned is_active:1; /* the interface is not suspended */ + struct device dev; /* interface specific device info */ struct class_device *class_dev; }; -- cgit v1.2.3-71-gd317 From f2ebf92c9e1930a8f79b7eb49a32122931929014 Mon Sep 17 00:00:00 2001 From: Ben Williamson Date: Tue, 1 Aug 2006 11:28:16 +1000 Subject: USB: gmidi: New USB MIDI Gadget class driver. This driver is glue between the USB gadget interface and the ALSA MIDI interface. It allows us to appear as a MIDI Streaming device to a host system on the other end of a USB cable. This includes linux/usb/audio.h and linux/usb/midi.h containing definitions from the relevant USB specifications for USB audio and USB MIDI devices. The following changes have been made since the first RFC posting: * Bug fixes to endpoint handling. * Workaround for USB_REQ_SET_CONFIGURATION handling, not understood yet. * Added SND and SND_RAWMIDI dependencies in Kconfig. * Moved usb_audio.h and usb_midi.h to usb/*.h * Added module parameters for ALSA card index and id. * Added module parameters for USB descriptor IDs and strings. * Removed some unneeded stuff inherited from zero.c, more to go. * Provide DECLARE_* macros for the variable-length structs. * Use kmalloc instead of usb_ep_alloc_buffer. * Limit source to 80 columns. * Return actual error code instead of -ENOMEM in a few places. Signed-off-by: Ben Williamson Cc: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/Kconfig | 14 + drivers/usb/gadget/Makefile | 2 + drivers/usb/gadget/gmidi.c | 1337 +++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/audio.h | 53 ++ include/linux/usb/midi.h | 112 ++++ 5 files changed, 1518 insertions(+) create mode 100644 drivers/usb/gadget/gmidi.c create mode 100644 include/linux/usb/audio.h create mode 100644 include/linux/usb/midi.h (limited to 'include/linux') diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 1a32d96774b4..4301e96c417b 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -404,6 +404,20 @@ config USB_G_SERIAL which includes instructions and a "driver info file" needed to make MS-Windows work with this driver. +config USB_MIDI_GADGET + tristate "MIDI Gadget (EXPERIMENTAL)" + depends on SND && EXPERIMENTAL + select SND_RAWMIDI + help + The MIDI Gadget acts as a USB Audio device, with one MIDI + input and one MIDI output. These MIDI jacks appear as + a sound "card" in the ALSA sound system. Other MIDI + connections can then be made on the gadget system, using + ALSA's aconnect utility etc. + + Say "y" to link the driver statically, or "m" to build a + dynamically linked module called "g_midi". + # put drivers that need isochronous transfer support (for audio # or video class gadget drivers), or specific hardware, here. diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 5a28e61392ec..e71e086a1cfa 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_USB_AT91) += at91_udc.o g_zero-objs := zero.o usbstring.o config.o epautoconf.o g_ether-objs := ether.o usbstring.o config.o epautoconf.o g_serial-objs := serial.o usbstring.o config.o epautoconf.o +g_midi-objs := gmidi.o usbstring.o config.o epautoconf.o gadgetfs-objs := inode.o g_file_storage-objs := file_storage.o usbstring.o config.o \ epautoconf.o @@ -28,4 +29,5 @@ obj-$(CONFIG_USB_ETH) += g_ether.o obj-$(CONFIG_USB_GADGETFS) += gadgetfs.o obj-$(CONFIG_USB_FILE_STORAGE) += g_file_storage.o obj-$(CONFIG_USB_G_SERIAL) += g_serial.o +obj-$(CONFIG_USB_MIDI_GADGET) += g_midi.o diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c new file mode 100644 index 000000000000..b68cecd57411 --- /dev/null +++ b/drivers/usb/gadget/gmidi.c @@ -0,0 +1,1337 @@ +/* + * gmidi.c -- USB MIDI Gadget Driver + * + * Copyright (C) 2006 Thumtronics Pty Ltd. + * Developed for Thumtronics by Grey Innovation + * Ben Williamson + * + * This software is distributed under the terms of the GNU General Public + * License ("GPL") version 2, as published by the Free Software Foundation. + * + * This code is based in part on: + * + * Gadget Zero driver, Copyright (C) 2003-2004 David Brownell. + * USB Audio driver, Copyright (C) 2002 by Takashi Iwai. + * USB MIDI driver, Copyright (C) 2002-2005 Clemens Ladisch. + * + * Refer to the USB Device Class Definition for MIDI Devices: + * http://www.usb.org/developers/devclass_docs/midi10.pdf + */ + +#define DEBUG 1 +// #define VERBOSE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "gadget_chips.h" + +MODULE_AUTHOR("Ben Williamson"); +MODULE_LICENSE("GPL v2"); + +#define DRIVER_VERSION "25 Jul 2006" + +static const char shortname[] = "g_midi"; +static const char longname[] = "MIDI Gadget"; + +static int index = SNDRV_DEFAULT_IDX1; +static char *id = SNDRV_DEFAULT_STR1; + +module_param(index, int, 0444); +MODULE_PARM_DESC(index, "Index value for the USB MIDI Gadget adapter."); +module_param(id, charp, 0444); +MODULE_PARM_DESC(id, "ID string for the USB MIDI Gadget adapter."); + +/* Some systems will want different product identifers published in the + * device descriptor, either numbers or strings or both. These string + * parameters are in UTF-8 (superset of ASCII's 7 bit characters). + */ + +static ushort idVendor; +module_param(idVendor, ushort, S_IRUGO); +MODULE_PARM_DESC(idVendor, "USB Vendor ID"); + +static ushort idProduct; +module_param(idProduct, ushort, S_IRUGO); +MODULE_PARM_DESC(idProduct, "USB Product ID"); + +static ushort bcdDevice; +module_param(bcdDevice, ushort, S_IRUGO); +MODULE_PARM_DESC(bcdDevice, "USB Device version (BCD)"); + +static char *iManufacturer; +module_param(iManufacturer, charp, S_IRUGO); +MODULE_PARM_DESC(iManufacturer, "USB Manufacturer string"); + +static char *iProduct; +module_param(iProduct, charp, S_IRUGO); +MODULE_PARM_DESC(iProduct, "USB Product string"); + +static char *iSerialNumber; +module_param(iSerialNumber, charp, S_IRUGO); +MODULE_PARM_DESC(iSerialNumber, "SerialNumber"); + +/* + * this version autoconfigures as much as possible, + * which is reasonable for most "bulk-only" drivers. + */ +static const char *EP_IN_NAME; +static const char *EP_OUT_NAME; + + +/* big enough to hold our biggest descriptor */ +#define USB_BUFSIZ 256 + + +/* This is a gadget, and the IN/OUT naming is from the host's perspective. + USB -> OUT endpoint -> rawmidi + USB <- IN endpoint <- rawmidi */ +struct gmidi_in_port { + struct gmidi_device* dev; + int active; + uint8_t cable; /* cable number << 4 */ + uint8_t state; +#define STATE_UNKNOWN 0 +#define STATE_1PARAM 1 +#define STATE_2PARAM_1 2 +#define STATE_2PARAM_2 3 +#define STATE_SYSEX_0 4 +#define STATE_SYSEX_1 5 +#define STATE_SYSEX_2 6 + uint8_t data[2]; +}; + +struct gmidi_device { + spinlock_t lock; + struct usb_gadget *gadget; + struct usb_request *req; /* for control responses */ + u8 config; + struct usb_ep *in_ep, *out_ep; + struct snd_card *card; + struct snd_rawmidi *rmidi; + struct snd_rawmidi_substream *in_substream; + struct snd_rawmidi_substream *out_substream; + + /* For the moment we only support one port in + each direction, but in_port is kept as a + separate struct so we can have more later. */ + struct gmidi_in_port in_port; + unsigned long out_triggered; + struct tasklet_struct tasklet; +}; + +static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req); + + +#define xprintk(d,level,fmt,args...) \ + dev_printk(level , &(d)->gadget->dev , fmt , ## args) + +#ifdef DEBUG +#define DBG(dev,fmt,args...) \ + xprintk(dev , KERN_DEBUG , fmt , ## args) +#else +#define DBG(dev,fmt,args...) \ + do { } while (0) +#endif /* DEBUG */ + +#ifdef VERBOSE +#define VDBG DBG +#else +#define VDBG(dev,fmt,args...) \ + do { } while (0) +#endif /* VERBOSE */ + +#define ERROR(dev,fmt,args...) \ + xprintk(dev , KERN_ERR , fmt , ## args) +#define WARN(dev,fmt,args...) \ + xprintk(dev , KERN_WARNING , fmt , ## args) +#define INFO(dev,fmt,args...) \ + xprintk(dev , KERN_INFO , fmt , ## args) + + +static unsigned buflen = 256; +static unsigned qlen = 32; + +module_param(buflen, uint, S_IRUGO); +module_param(qlen, uint, S_IRUGO); + + +/* Thanks to Grey Innovation for donating this product ID. + * + * DO NOT REUSE THESE IDs with a protocol-incompatible driver!! Ever!! + * Instead: allocate your own, using normal USB-IF procedures. + */ +#define DRIVER_VENDOR_NUM 0x17b3 /* Grey Innovation */ +#define DRIVER_PRODUCT_NUM 0x0004 /* Linux-USB "MIDI Gadget" */ + + +/* + * DESCRIPTORS ... most are static, but strings and (full) + * configuration descriptors are built on demand. + */ + +#define STRING_MANUFACTURER 25 +#define STRING_PRODUCT 42 +#define STRING_SERIAL 101 +#define STRING_MIDI_GADGET 250 + +/* We only have the one configuration, it's number 1. */ +#define GMIDI_CONFIG 1 + +/* We have two interfaces- AudioControl and MIDIStreaming */ +#define GMIDI_AC_INTERFACE 0 +#define GMIDI_MS_INTERFACE 1 +#define GMIDI_NUM_INTERFACES 2 + +DECLARE_USB_AC_HEADER_DESCRIPTOR(1); +DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1); +DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(1); + +/* B.1 Device Descriptor */ +static struct usb_device_descriptor device_desc = { + .bLength = USB_DT_DEVICE_SIZE, + .bDescriptorType = USB_DT_DEVICE, + .bcdUSB = __constant_cpu_to_le16(0x0200), + .bDeviceClass = USB_CLASS_PER_INTERFACE, + .idVendor = __constant_cpu_to_le16(DRIVER_VENDOR_NUM), + .idProduct = __constant_cpu_to_le16(DRIVER_PRODUCT_NUM), + .iManufacturer = STRING_MANUFACTURER, + .iProduct = STRING_PRODUCT, + .bNumConfigurations = 1, +}; + +/* B.2 Configuration Descriptor */ +static struct usb_config_descriptor config_desc = { + .bLength = USB_DT_CONFIG_SIZE, + .bDescriptorType = USB_DT_CONFIG, + /* compute wTotalLength on the fly */ + .bNumInterfaces = GMIDI_NUM_INTERFACES, + .bConfigurationValue = GMIDI_CONFIG, + .iConfiguration = STRING_MIDI_GADGET, + /* + * FIXME: When embedding this driver in a device, + * these need to be set to reflect the actual + * power properties of the device. Is it selfpowered? + */ + .bmAttributes = USB_CONFIG_ATT_ONE, + .bMaxPower = 1, +}; + +/* B.3.1 Standard AC Interface Descriptor */ +static const struct usb_interface_descriptor ac_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = GMIDI_AC_INTERFACE, + .bNumEndpoints = 0, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, + .iInterface = STRING_MIDI_GADGET, +}; + +/* B.3.2 Class-Specific AC Interface Descriptor */ +static const struct usb_ac_header_descriptor_1 ac_header_desc = { + .bLength = USB_DT_AC_HEADER_SIZE(1), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = USB_MS_HEADER, + .bcdADC = __constant_cpu_to_le16(0x0100), + .wTotalLength = USB_DT_AC_HEADER_SIZE(1), + .bInCollection = 1, + .baInterfaceNr = { + [0] = GMIDI_MS_INTERFACE, + } +}; + +/* B.4.1 Standard MS Interface Descriptor */ +static const struct usb_interface_descriptor ms_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = GMIDI_MS_INTERFACE, + .bNumEndpoints = 2, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_MIDISTREAMING, + .iInterface = STRING_MIDI_GADGET, +}; + +/* B.4.2 Class-Specific MS Interface Descriptor */ +static const struct usb_ms_header_descriptor ms_header_desc = { + .bLength = USB_DT_MS_HEADER_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = USB_MS_HEADER, + .bcdMSC = __constant_cpu_to_le16(0x0100), + .wTotalLength = USB_DT_MS_HEADER_SIZE + + 2*USB_DT_MIDI_IN_SIZE + + 2*USB_DT_MIDI_OUT_SIZE(1), +}; + +#define JACK_IN_EMB 1 +#define JACK_IN_EXT 2 +#define JACK_OUT_EMB 3 +#define JACK_OUT_EXT 4 + +/* B.4.3 MIDI IN Jack Descriptors */ +static const struct usb_midi_in_jack_descriptor jack_in_emb_desc = { + .bLength = USB_DT_MIDI_IN_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = USB_MS_MIDI_IN_JACK, + .bJackType = USB_MS_EMBEDDED, + .bJackID = JACK_IN_EMB, +}; + +static const struct usb_midi_in_jack_descriptor jack_in_ext_desc = { + .bLength = USB_DT_MIDI_IN_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = USB_MS_MIDI_IN_JACK, + .bJackType = USB_MS_EXTERNAL, + .bJackID = JACK_IN_EXT, +}; + +/* B.4.4 MIDI OUT Jack Descriptors */ +static const struct usb_midi_out_jack_descriptor_1 jack_out_emb_desc = { + .bLength = USB_DT_MIDI_OUT_SIZE(1), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = USB_MS_MIDI_OUT_JACK, + .bJackType = USB_MS_EMBEDDED, + .bJackID = JACK_OUT_EMB, + .bNrInputPins = 1, + .pins = { + [0] = { + .baSourceID = JACK_IN_EXT, + .baSourcePin = 1, + } + } +}; + +static const struct usb_midi_out_jack_descriptor_1 jack_out_ext_desc = { + .bLength = USB_DT_MIDI_OUT_SIZE(1), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = USB_MS_MIDI_OUT_JACK, + .bJackType = USB_MS_EXTERNAL, + .bJackID = JACK_OUT_EXT, + .bNrInputPins = 1, + .pins = { + [0] = { + .baSourceID = JACK_IN_EMB, + .baSourcePin = 1, + } + } +}; + +/* B.5.1 Standard Bulk OUT Endpoint Descriptor */ +static struct usb_endpoint_descriptor bulk_out_desc = { + .bLength = USB_DT_ENDPOINT_AUDIO_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +/* B.5.2 Class-specific MS Bulk OUT Endpoint Descriptor */ +static const struct usb_ms_endpoint_descriptor_1 ms_out_desc = { + .bLength = USB_DT_MS_ENDPOINT_SIZE(1), + .bDescriptorType = USB_DT_CS_ENDPOINT, + .bDescriptorSubtype = USB_MS_GENERAL, + .bNumEmbMIDIJack = 1, + .baAssocJackID = { + [0] = JACK_IN_EMB, + } +}; + +/* B.6.1 Standard Bulk IN Endpoint Descriptor */ +static struct usb_endpoint_descriptor bulk_in_desc = { + .bLength = USB_DT_ENDPOINT_AUDIO_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +/* B.6.2 Class-specific MS Bulk IN Endpoint Descriptor */ +static const struct usb_ms_endpoint_descriptor_1 ms_in_desc = { + .bLength = USB_DT_MS_ENDPOINT_SIZE(1), + .bDescriptorType = USB_DT_CS_ENDPOINT, + .bDescriptorSubtype = USB_MS_GENERAL, + .bNumEmbMIDIJack = 1, + .baAssocJackID = { + [0] = JACK_OUT_EMB, + } +}; + +static const struct usb_descriptor_header *gmidi_function [] = { + (struct usb_descriptor_header *)&ac_interface_desc, + (struct usb_descriptor_header *)&ac_header_desc, + (struct usb_descriptor_header *)&ms_interface_desc, + + (struct usb_descriptor_header *)&ms_header_desc, + (struct usb_descriptor_header *)&jack_in_emb_desc, + (struct usb_descriptor_header *)&jack_in_ext_desc, + (struct usb_descriptor_header *)&jack_out_emb_desc, + (struct usb_descriptor_header *)&jack_out_ext_desc, + /* If you add more jacks, update ms_header_desc.wTotalLength */ + + (struct usb_descriptor_header *)&bulk_out_desc, + (struct usb_descriptor_header *)&ms_out_desc, + (struct usb_descriptor_header *)&bulk_in_desc, + (struct usb_descriptor_header *)&ms_in_desc, + NULL, +}; + +static char manufacturer[50]; +static char product_desc[40] = "MIDI Gadget"; +static char serial_number[20]; + +/* static strings, in UTF-8 */ +static struct usb_string strings [] = { + { STRING_MANUFACTURER, manufacturer, }, + { STRING_PRODUCT, product_desc, }, + { STRING_SERIAL, serial_number, }, + { STRING_MIDI_GADGET, longname, }, + { } /* end of list */ +}; + +static struct usb_gadget_strings stringtab = { + .language = 0x0409, /* en-us */ + .strings = strings, +}; + +static int config_buf(struct usb_gadget *gadget, + u8 *buf, u8 type, unsigned index) +{ + int len; + + /* only one configuration */ + if (index != 0) { + return -EINVAL; + } + len = usb_gadget_config_buf(&config_desc, + buf, USB_BUFSIZ, gmidi_function); + if (len < 0) { + return len; + } + ((struct usb_config_descriptor *)buf)->bDescriptorType = type; + return len; +} + +static struct usb_request* alloc_ep_req(struct usb_ep *ep, unsigned length) +{ + struct usb_request *req; + + req = usb_ep_alloc_request(ep, GFP_ATOMIC); + if (req) { + req->length = length; + req->buf = kmalloc(length, GFP_ATOMIC); + if (!req->buf) { + usb_ep_free_request(ep, req); + req = NULL; + } + } + return req; +} + +static void free_ep_req(struct usb_ep *ep, struct usb_request *req) +{ + kfree(req->buf); + usb_ep_free_request(ep, req); +} + +static const uint8_t gmidi_cin_length[] = { + 0, 0, 2, 3, 3, 1, 2, 3, 3, 3, 3, 3, 2, 2, 3, 1 +}; + +/* + * Receives a chunk of MIDI data. + */ +static void gmidi_read_data(struct usb_ep *ep, int cable, + uint8_t* data, int length) +{ + struct gmidi_device *dev = ep->driver_data; + /* cable is ignored, because for now we only have one. */ + + if (!dev->out_substream) { + /* Nobody is listening - throw it on the floor. */ + return; + } + if (!test_bit(dev->out_substream->number, &dev->out_triggered)) { + return; + } + snd_rawmidi_receive(dev->out_substream, data, length); +} + +static void gmidi_handle_out_data(struct usb_ep *ep, struct usb_request *req) +{ + unsigned i; + u8 *buf = req->buf; + + for (i = 0; i + 3 < req->actual; i += 4) { + if (buf[i] != 0) { + int cable = buf[i] >> 4; + int length = gmidi_cin_length[buf[i] & 0x0f]; + gmidi_read_data(ep, cable, &buf[i + 1], length); + } + } +} + +static void gmidi_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct gmidi_device *dev = ep->driver_data; + int status = req->status; + + switch (status) { + case 0: /* normal completion */ + if (ep == dev->out_ep) { + /* we received stuff. + req is queued again, below */ + gmidi_handle_out_data(ep, req); + } else if (ep == dev->in_ep) { + /* our transmit completed. + see if there's more to go. + gmidi_transmit eats req, don't queue it again. */ + gmidi_transmit(dev, req); + return; + } + break; + + /* this endpoint is normally active while we're configured */ + case -ECONNABORTED: /* hardware forced ep reset */ + case -ECONNRESET: /* request dequeued */ + case -ESHUTDOWN: /* disconnect from host */ + VDBG(dev, "%s gone (%d), %d/%d\n", ep->name, status, + req->actual, req->length); + if (ep == dev->out_ep) { + gmidi_handle_out_data(ep, req); + } + free_ep_req(ep, req); + return; + + case -EOVERFLOW: /* buffer overrun on read means that + * we didn't provide a big enough + * buffer. + */ + default: + DBG(dev, "%s complete --> %d, %d/%d\n", ep->name, + status, req->actual, req->length); + break; + case -EREMOTEIO: /* short read */ + break; + } + + status = usb_ep_queue(ep, req, GFP_ATOMIC); + if (status) { + ERROR(dev, "kill %s: resubmit %d bytes --> %d\n", + ep->name, req->length, status); + usb_ep_set_halt(ep); + /* FIXME recover later ... somehow */ + } +} + +static int set_gmidi_config(struct gmidi_device *dev, gfp_t gfp_flags) +{ + int err = 0; + struct usb_request *req; + struct usb_ep* ep; + unsigned i; + + err = usb_ep_enable(dev->in_ep, &bulk_in_desc); + if (err) { + ERROR(dev, "can't start %s: %d\n", dev->in_ep->name, err); + goto fail; + } + dev->in_ep->driver_data = dev; + + err = usb_ep_enable(dev->out_ep, &bulk_out_desc); + if (err) { + ERROR(dev, "can't start %s: %d\n", dev->out_ep->name, err); + goto fail; + } + dev->out_ep->driver_data = dev; + + /* allocate a bunch of read buffers and queue them all at once. */ + ep = dev->out_ep; + for (i = 0; i < qlen && err == 0; i++) { + req = alloc_ep_req(ep, buflen); + if (req) { + req->complete = gmidi_complete; + err = usb_ep_queue(ep, req, GFP_ATOMIC); + if (err) { + DBG(dev, "%s queue req: %d\n", ep->name, err); + } + } else { + err = -ENOMEM; + } + } +fail: + /* caller is responsible for cleanup on error */ + return err; +} + + +static void gmidi_reset_config(struct gmidi_device *dev) +{ + if (dev->config == 0) { + return; + } + + DBG(dev, "reset config\n"); + + /* just disable endpoints, forcing completion of pending i/o. + * all our completion handlers free their requests in this case. + */ + usb_ep_disable(dev->in_ep); + usb_ep_disable(dev->out_ep); + dev->config = 0; +} + +/* change our operational config. this code must agree with the code + * that returns config descriptors, and altsetting code. + * + * it's also responsible for power management interactions. some + * configurations might not work with our current power sources. + * + * note that some device controller hardware will constrain what this + * code can do, perhaps by disallowing more than one configuration or + * by limiting configuration choices (like the pxa2xx). + */ +static int +gmidi_set_config(struct gmidi_device *dev, unsigned number, gfp_t gfp_flags) +{ + int result = 0; + struct usb_gadget *gadget = dev->gadget; + +#if 0 + /* FIXME */ + /* Hacking this bit out fixes a bug where on receipt of two + USB_REQ_SET_CONFIGURATION messages, we end up with no + buffered OUT requests waiting for data. This is clearly + hiding a bug elsewhere, because if the config didn't + change then we really shouldn't do anything. */ + /* Having said that, when we do "change" from config 1 + to config 1, we at least gmidi_reset_config() which + clears out any requests on endpoints, so it's not like + we leak or anything. */ + if (number == dev->config) { + return 0; + } +#endif + + if (gadget_is_sa1100(gadget) && dev->config) { + /* tx fifo is full, but we can't clear it...*/ + INFO(dev, "can't change configurations\n"); + return -ESPIPE; + } + gmidi_reset_config(dev); + + switch (number) { + case GMIDI_CONFIG: + result = set_gmidi_config(dev, gfp_flags); + break; + default: + result = -EINVAL; + /* FALL THROUGH */ + case 0: + return result; + } + + if (!result && (!dev->in_ep || !dev->out_ep)) { + result = -ENODEV; + } + if (result) { + gmidi_reset_config(dev); + } else { + char *speed; + + switch (gadget->speed) { + case USB_SPEED_LOW: speed = "low"; break; + case USB_SPEED_FULL: speed = "full"; break; + case USB_SPEED_HIGH: speed = "high"; break; + default: speed = "?"; break; + } + + dev->config = number; + INFO(dev, "%s speed\n", speed); + } + return result; +} + + +static void gmidi_setup_complete(struct usb_ep *ep, struct usb_request *req) +{ + if (req->status || req->actual != req->length) { + DBG((struct gmidi_device *) ep->driver_data, + "setup complete --> %d, %d/%d\n", + req->status, req->actual, req->length); + } +} + +/* + * The setup() callback implements all the ep0 functionality that's + * not handled lower down, in hardware or the hardware driver (like + * device and endpoint feature flags, and their status). It's all + * housekeeping for the gadget function we're implementing. Most of + * the work is in config-specific setup. + */ +static int gmidi_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *ctrl) +{ + struct gmidi_device *dev = get_gadget_data(gadget); + struct usb_request *req = dev->req; + int value = -EOPNOTSUPP; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); + + /* usually this stores reply data in the pre-allocated ep0 buffer, + * but config change events will reconfigure hardware. + */ + req->zero = 0; + switch (ctrl->bRequest) { + + case USB_REQ_GET_DESCRIPTOR: + if (ctrl->bRequestType != USB_DIR_IN) { + goto unknown; + } + switch (w_value >> 8) { + + case USB_DT_DEVICE: + value = min(w_length, (u16) sizeof(device_desc)); + memcpy(req->buf, &device_desc, value); + break; + case USB_DT_CONFIG: + value = config_buf(gadget, req->buf, + w_value >> 8, + w_value & 0xff); + if (value >= 0) { + value = min(w_length, (u16)value); + } + break; + + case USB_DT_STRING: + /* wIndex == language code. + * this driver only handles one language, you can + * add string tables for other languages, using + * any UTF-8 characters + */ + value = usb_gadget_get_string(&stringtab, + w_value & 0xff, req->buf); + if (value >= 0) { + value = min(w_length, (u16)value); + } + break; + } + break; + + /* currently two configs, two speeds */ + case USB_REQ_SET_CONFIGURATION: + if (ctrl->bRequestType != 0) { + goto unknown; + } + if (gadget->a_hnp_support) { + DBG(dev, "HNP available\n"); + } else if (gadget->a_alt_hnp_support) { + DBG(dev, "HNP needs a different root port\n"); + } else { + VDBG(dev, "HNP inactive\n"); + } + spin_lock(&dev->lock); + value = gmidi_set_config(dev, w_value, GFP_ATOMIC); + spin_unlock(&dev->lock); + break; + case USB_REQ_GET_CONFIGURATION: + if (ctrl->bRequestType != USB_DIR_IN) { + goto unknown; + } + *(u8 *)req->buf = dev->config; + value = min(w_length, (u16)1); + break; + + /* until we add altsetting support, or other interfaces, + * only 0/0 are possible. pxa2xx only supports 0/0 (poorly) + * and already killed pending endpoint I/O. + */ + case USB_REQ_SET_INTERFACE: + if (ctrl->bRequestType != USB_RECIP_INTERFACE) { + goto unknown; + } + spin_lock(&dev->lock); + if (dev->config && w_index < GMIDI_NUM_INTERFACES + && w_value == 0) + { + u8 config = dev->config; + + /* resets interface configuration, forgets about + * previous transaction state (queued bufs, etc) + * and re-inits endpoint state (toggle etc) + * no response queued, just zero status == success. + * if we had more than one interface we couldn't + * use this "reset the config" shortcut. + */ + gmidi_reset_config(dev); + gmidi_set_config(dev, config, GFP_ATOMIC); + value = 0; + } + spin_unlock(&dev->lock); + break; + case USB_REQ_GET_INTERFACE: + if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE)) { + goto unknown; + } + if (!dev->config) { + break; + } + if (w_index >= GMIDI_NUM_INTERFACES) { + value = -EDOM; + break; + } + *(u8 *)req->buf = 0; + value = min(w_length, (u16)1); + break; + + default: +unknown: + VDBG(dev, "unknown control req%02x.%02x v%04x i%04x l%d\n", + ctrl->bRequestType, ctrl->bRequest, + w_value, w_index, w_length); + } + + /* respond with data transfer before status phase? */ + if (value >= 0) { + req->length = value; + req->zero = value < w_length; + value = usb_ep_queue(gadget->ep0, req, GFP_ATOMIC); + if (value < 0) { + DBG(dev, "ep_queue --> %d\n", value); + req->status = 0; + gmidi_setup_complete(gadget->ep0, req); + } + } + + /* device either stalls (value < 0) or reports success */ + return value; +} + +static void gmidi_disconnect(struct usb_gadget *gadget) +{ + struct gmidi_device *dev = get_gadget_data(gadget); + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + gmidi_reset_config(dev); + + /* a more significant application might have some non-usb + * activities to quiesce here, saving resources like power + * or pushing the notification up a network stack. + */ + spin_unlock_irqrestore(&dev->lock, flags); + + /* next we may get setup() calls to enumerate new connections; + * or an unbind() during shutdown (including removing module). + */ +} + +static void /* __init_or_exit */ gmidi_unbind(struct usb_gadget *gadget) +{ + struct gmidi_device *dev = get_gadget_data(gadget); + struct snd_card* card; + + DBG(dev, "unbind\n"); + + card = dev->card; + dev->card = NULL; + if (card) { + snd_card_free(card); + } + + /* we've already been disconnected ... no i/o is active */ + if (dev->req) { + dev->req->length = USB_BUFSIZ; + free_ep_req(gadget->ep0, dev->req); + } + kfree(dev); + set_gadget_data(gadget, NULL); +} + +static int gmidi_snd_free(struct snd_device *device) +{ + return 0; +} + +static void gmidi_transmit_packet(struct usb_request* req, uint8_t p0, + uint8_t p1, uint8_t p2, uint8_t p3) +{ + unsigned length = req->length; + + uint8_t* buf = (uint8_t*)req->buf + length; + buf[0] = p0; + buf[1] = p1; + buf[2] = p2; + buf[3] = p3; + req->length = length + 4; +} + +/* + * Converts MIDI commands to USB MIDI packets. + */ +static void gmidi_transmit_byte(struct usb_request* req, + struct gmidi_in_port* port, uint8_t b) +{ + uint8_t p0 = port->cable; + + if (b >= 0xf8) { + gmidi_transmit_packet(req, p0 | 0x0f, b, 0, 0); + } else if (b >= 0xf0) { + switch (b) { + case 0xf0: + port->data[0] = b; + port->state = STATE_SYSEX_1; + break; + case 0xf1: + case 0xf3: + port->data[0] = b; + port->state = STATE_1PARAM; + break; + case 0xf2: + port->data[0] = b; + port->state = STATE_2PARAM_1; + break; + case 0xf4: + case 0xf5: + port->state = STATE_UNKNOWN; + break; + case 0xf6: + gmidi_transmit_packet(req, p0 | 0x05, 0xf6, 0, 0); + port->state = STATE_UNKNOWN; + break; + case 0xf7: + switch (port->state) { + case STATE_SYSEX_0: + gmidi_transmit_packet(req, + p0 | 0x05, 0xf7, 0, 0); + break; + case STATE_SYSEX_1: + gmidi_transmit_packet(req, + p0 | 0x06, port->data[0], 0xf7, 0); + break; + case STATE_SYSEX_2: + gmidi_transmit_packet(req, + p0 | 0x07, port->data[0], + port->data[1], 0xf7); + break; + } + port->state = STATE_UNKNOWN; + break; + } + } else if (b >= 0x80) { + port->data[0] = b; + if (b >= 0xc0 && b <= 0xdf) + port->state = STATE_1PARAM; + else + port->state = STATE_2PARAM_1; + } else { /* b < 0x80 */ + switch (port->state) { + case STATE_1PARAM: + if (port->data[0] < 0xf0) { + p0 |= port->data[0] >> 4; + } else { + p0 |= 0x02; + port->state = STATE_UNKNOWN; + } + gmidi_transmit_packet(req, p0, port->data[0], b, 0); + break; + case STATE_2PARAM_1: + port->data[1] = b; + port->state = STATE_2PARAM_2; + break; + case STATE_2PARAM_2: + if (port->data[0] < 0xf0) { + p0 |= port->data[0] >> 4; + port->state = STATE_2PARAM_1; + } else { + p0 |= 0x03; + port->state = STATE_UNKNOWN; + } + gmidi_transmit_packet(req, + p0, port->data[0], port->data[1], b); + break; + case STATE_SYSEX_0: + port->data[0] = b; + port->state = STATE_SYSEX_1; + break; + case STATE_SYSEX_1: + port->data[1] = b; + port->state = STATE_SYSEX_2; + break; + case STATE_SYSEX_2: + gmidi_transmit_packet(req, + p0 | 0x04, port->data[0], port->data[1], b); + port->state = STATE_SYSEX_0; + break; + } + } +} + +static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req) +{ + struct usb_ep* ep = dev->in_ep; + struct gmidi_in_port* port = &dev->in_port; + + if (!ep) { + return; + } + if (!req) { + req = alloc_ep_req(ep, buflen); + } + if (!req) { + ERROR(dev, "gmidi_transmit: alloc_ep_request failed\n"); + return; + } + req->length = 0; + req->complete = gmidi_complete; + + if (port->active) { + while (req->length + 3 < buflen) { + uint8_t b; + if (snd_rawmidi_transmit(dev->in_substream, &b, 1) + != 1) + { + port->active = 0; + break; + } + gmidi_transmit_byte(req, port, b); + } + } + if (req->length > 0) { + usb_ep_queue(ep, req, GFP_ATOMIC); + } else { + free_ep_req(ep, req); + } +} + +static void gmidi_in_tasklet(unsigned long data) +{ + struct gmidi_device* dev = (struct gmidi_device*)data; + + gmidi_transmit(dev, NULL); +} + +static int gmidi_in_open(struct snd_rawmidi_substream *substream) +{ + struct gmidi_device* dev = substream->rmidi->private_data; + + VDBG(dev, "gmidi_in_open\n"); + dev->in_substream = substream; + dev->in_port.state = STATE_UNKNOWN; + return 0; +} + +static int gmidi_in_close(struct snd_rawmidi_substream *substream) +{ + VDBG(dev, "gmidi_in_close\n"); + return 0; +} + +static void gmidi_in_trigger(struct snd_rawmidi_substream *substream, int up) +{ + struct gmidi_device* dev = substream->rmidi->private_data; + + VDBG(dev, "gmidi_in_trigger %d\n", up); + dev->in_port.active = up; + if (up) { + tasklet_hi_schedule(&dev->tasklet); + } +} + +static int gmidi_out_open(struct snd_rawmidi_substream *substream) +{ + struct gmidi_device* dev = substream->rmidi->private_data; + + VDBG(dev, "gmidi_out_open\n"); + dev->out_substream = substream; + return 0; +} + +static int gmidi_out_close(struct snd_rawmidi_substream *substream) +{ + VDBG(dev, "gmidi_out_close\n"); + return 0; +} + +static void gmidi_out_trigger(struct snd_rawmidi_substream *substream, int up) +{ + struct gmidi_device* dev = substream->rmidi->private_data; + + VDBG(dev, "gmidi_out_trigger %d\n", up); + if (up) { + set_bit(substream->number, &dev->out_triggered); + } else { + clear_bit(substream->number, &dev->out_triggered); + } +} + +static struct snd_rawmidi_ops gmidi_in_ops = { + .open = gmidi_in_open, + .close = gmidi_in_close, + .trigger = gmidi_in_trigger, +}; + +static struct snd_rawmidi_ops gmidi_out_ops = { + .open = gmidi_out_open, + .close = gmidi_out_close, + .trigger = gmidi_out_trigger +}; + +/* register as a sound "card" */ +static int gmidi_register_card(struct gmidi_device *dev) +{ + struct snd_card *card; + struct snd_rawmidi *rmidi; + int err; + int out_ports = 1; + int in_ports = 1; + static struct snd_device_ops ops = { + .dev_free = gmidi_snd_free, + }; + + card = snd_card_new(index, id, THIS_MODULE, 0); + if (!card) { + ERROR(dev, "snd_card_new failed\n"); + err = -ENOMEM; + goto fail; + } + dev->card = card; + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, dev, &ops); + if (err < 0) { + ERROR(dev, "snd_device_new failed: error %d\n", err); + goto fail; + } + + strcpy(card->driver, longname); + strcpy(card->longname, longname); + strcpy(card->shortname, shortname); + + /* Set up rawmidi */ + dev->in_port.dev = dev; + dev->in_port.active = 0; + snd_component_add(card, "MIDI"); + err = snd_rawmidi_new(card, "USB MIDI Gadget", 0, + out_ports, in_ports, &rmidi); + if (err < 0) { + ERROR(dev, "snd_rawmidi_new failed: error %d\n", err); + goto fail; + } + dev->rmidi = rmidi; + strcpy(rmidi->name, card->shortname); + rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT | + SNDRV_RAWMIDI_INFO_INPUT | + SNDRV_RAWMIDI_INFO_DUPLEX; + rmidi->private_data = dev; + + /* Yes, rawmidi OUTPUT = USB IN, and rawmidi INPUT = USB OUT. + It's an upside-down world being a gadget. */ + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &gmidi_in_ops); + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &gmidi_out_ops); + + snd_card_set_dev(card, &dev->gadget->dev); + + /* register it - we're ready to go */ + err = snd_card_register(card); + if (err < 0) { + ERROR(dev, "snd_card_register failed\n"); + goto fail; + } + + VDBG(dev, "gmidi_register_card finished ok\n"); + return 0; + +fail: + if (dev->card) { + snd_card_free(dev->card); + dev->card = NULL; + } + return err; +} + +/* + * Creates an output endpoint, and initializes output ports. + */ +static int __devinit gmidi_bind(struct usb_gadget *gadget) +{ + struct gmidi_device *dev; + struct usb_ep *in_ep, *out_ep; + int gcnum, err = 0; + + /* support optional vendor/distro customization */ + if (idVendor) { + if (!idProduct) { + printk(KERN_ERR "idVendor needs idProduct!\n"); + return -ENODEV; + } + device_desc.idVendor = cpu_to_le16(idVendor); + device_desc.idProduct = cpu_to_le16(idProduct); + if (bcdDevice) { + device_desc.bcdDevice = cpu_to_le16(bcdDevice); + } + } + if (iManufacturer) { + strlcpy(manufacturer, iManufacturer, sizeof(manufacturer)); + } else { + snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s", + system_utsname.sysname, system_utsname.release, + gadget->name); + } + if (iProduct) { + strlcpy(product_desc, iProduct, sizeof(product_desc)); + } + if (iSerialNumber) { + device_desc.iSerialNumber = STRING_SERIAL, + strlcpy(serial_number, iSerialNumber, sizeof(serial_number)); + } + + /* Bulk-only drivers like this one SHOULD be able to + * autoconfigure on any sane usb controller driver, + * but there may also be important quirks to address. + */ + usb_ep_autoconfig_reset(gadget); + in_ep = usb_ep_autoconfig(gadget, &bulk_in_desc); + if (!in_ep) { +autoconf_fail: + printk(KERN_ERR "%s: can't autoconfigure on %s\n", + shortname, gadget->name); + return -ENODEV; + } + EP_IN_NAME = in_ep->name; + in_ep->driver_data = in_ep; /* claim */ + + out_ep = usb_ep_autoconfig(gadget, &bulk_out_desc); + if (!out_ep) { + goto autoconf_fail; + } + EP_OUT_NAME = out_ep->name; + out_ep->driver_data = out_ep; /* claim */ + + gcnum = usb_gadget_controller_number(gadget); + if (gcnum >= 0) { + device_desc.bcdDevice = cpu_to_le16(0x0200 + gcnum); + } else { + /* gmidi is so simple (no altsettings) that + * it SHOULD NOT have problems with bulk-capable hardware. + * so warn about unrecognized controllers, don't panic. + */ + printk(KERN_WARNING "%s: controller '%s' not recognized\n", + shortname, gadget->name); + device_desc.bcdDevice = __constant_cpu_to_le16(0x9999); + } + + + /* ok, we made sense of the hardware ... */ + dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + if (!dev) { + return -ENOMEM; + } + spin_lock_init(&dev->lock); + dev->gadget = gadget; + dev->in_ep = in_ep; + dev->out_ep = out_ep; + set_gadget_data(gadget, dev); + tasklet_init(&dev->tasklet, gmidi_in_tasklet, (unsigned long)dev); + + /* preallocate control response and buffer */ + dev->req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL); + if (!dev->req) { + err = -ENOMEM; + goto fail; + } + dev->req->buf = usb_ep_alloc_buffer(gadget->ep0, USB_BUFSIZ, + &dev->req->dma, GFP_KERNEL); + if (!dev->req->buf) { + err = -ENOMEM; + goto fail; + } + + dev->req->complete = gmidi_setup_complete; + + device_desc.bMaxPacketSize0 = gadget->ep0->maxpacket; + + gadget->ep0->driver_data = dev; + + INFO(dev, "%s, version: " DRIVER_VERSION "\n", longname); + INFO(dev, "using %s, OUT %s IN %s\n", gadget->name, + EP_OUT_NAME, EP_IN_NAME); + + /* register as an ALSA sound card */ + err = gmidi_register_card(dev); + if (err < 0) { + goto fail; + } + + VDBG(dev, "gmidi_bind finished ok\n"); + return 0; + +fail: + gmidi_unbind(gadget); + return err; +} + + +static void gmidi_suspend(struct usb_gadget *gadget) +{ + struct gmidi_device *dev = get_gadget_data(gadget); + + if (gadget->speed == USB_SPEED_UNKNOWN) { + return; + } + + DBG(dev, "suspend\n"); +} + +static void gmidi_resume(struct usb_gadget *gadget) +{ + struct gmidi_device *dev = get_gadget_data(gadget); + + DBG(dev, "resume\n"); +} + + +static struct usb_gadget_driver gmidi_driver = { + .speed = USB_SPEED_FULL, + .function = (char *)longname, + .bind = gmidi_bind, + .unbind = __exit_p(gmidi_unbind), + + .setup = gmidi_setup, + .disconnect = gmidi_disconnect, + + .suspend = gmidi_suspend, + .resume = gmidi_resume, + + .driver = { + .name = (char *)shortname, + .owner = THIS_MODULE, + }, +}; + +static int __init gmidi_init(void) +{ + return usb_gadget_register_driver(&gmidi_driver); +} +module_init(gmidi_init); + +static void __exit gmidi_cleanup(void) +{ + usb_gadget_unregister_driver(&gmidi_driver); +} +module_exit(gmidi_cleanup); + diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h new file mode 100644 index 000000000000..6bd235994dc2 --- /dev/null +++ b/include/linux/usb/audio.h @@ -0,0 +1,53 @@ +/* + * -- USB Audio definitions. + * + * Copyright (C) 2006 Thumtronics Pty Ltd. + * Developed for Thumtronics by Grey Innovation + * Ben Williamson + * + * This software is distributed under the terms of the GNU General Public + * License ("GPL") version 2, as published by the Free Software Foundation. + * + * This file holds USB constants and structures defined + * by the USB Device Class Definition for Audio Devices. + * Comments below reference relevant sections of that document: + * + * http://www.usb.org/developers/devclass_docs/audio10.pdf + */ + +#ifndef __LINUX_USB_AUDIO_H +#define __LINUX_USB_AUDIO_H + +#include + +/* A.2 Audio Interface Subclass Codes */ +#define USB_SUBCLASS_AUDIOCONTROL 0x01 +#define USB_SUBCLASS_AUDIOSTREAMING 0x02 +#define USB_SUBCLASS_MIDISTREAMING 0x03 + +/* 4.3.2 Class-Specific AC Interface Descriptor */ +struct usb_ac_header_descriptor { + __u8 bLength; // 8+n + __u8 bDescriptorType; // USB_DT_CS_INTERFACE + __u8 bDescriptorSubtype; // USB_MS_HEADER + __le16 bcdADC; // 0x0100 + __le16 wTotalLength; // includes Unit and Terminal desc. + __u8 bInCollection; // n + __u8 baInterfaceNr[]; // [n] +} __attribute__ ((packed)); + +#define USB_DT_AC_HEADER_SIZE(n) (8+(n)) + +/* As above, but more useful for defining your own descriptors: */ +#define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) \ +struct usb_ac_header_descriptor_##n { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubtype; \ + __le16 bcdADC; \ + __le16 wTotalLength; \ + __u8 bInCollection; \ + __u8 baInterfaceNr[n]; \ +} __attribute__ ((packed)) + +#endif diff --git a/include/linux/usb/midi.h b/include/linux/usb/midi.h new file mode 100644 index 000000000000..11a97d5ffd34 --- /dev/null +++ b/include/linux/usb/midi.h @@ -0,0 +1,112 @@ +/* + * -- USB MIDI definitions. + * + * Copyright (C) 2006 Thumtronics Pty Ltd. + * Developed for Thumtronics by Grey Innovation + * Ben Williamson + * + * This software is distributed under the terms of the GNU General Public + * License ("GPL") version 2, as published by the Free Software Foundation. + * + * This file holds USB constants and structures defined + * by the USB Device Class Definition for MIDI Devices. + * Comments below reference relevant sections of that document: + * + * http://www.usb.org/developers/devclass_docs/midi10.pdf + */ + +#ifndef __LINUX_USB_MIDI_H +#define __LINUX_USB_MIDI_H + +#include + +/* A.1 MS Class-Specific Interface Descriptor Subtypes */ +#define USB_MS_HEADER 0x01 +#define USB_MS_MIDI_IN_JACK 0x02 +#define USB_MS_MIDI_OUT_JACK 0x03 +#define USB_MS_ELEMENT 0x04 + +/* A.2 MS Class-Specific Endpoint Descriptor Subtypes */ +#define USB_MS_GENERAL 0x01 + +/* A.3 MS MIDI IN and OUT Jack Types */ +#define USB_MS_EMBEDDED 0x01 +#define USB_MS_EXTERNAL 0x02 + +/* 6.1.2.1 Class-Specific MS Interface Header Descriptor */ +struct usb_ms_header_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __le16 bcdMSC; + __le16 wTotalLength; +} __attribute__ ((packed)); + +#define USB_DT_MS_HEADER_SIZE 7 + +/* 6.1.2.2 MIDI IN Jack Descriptor */ +struct usb_midi_in_jack_descriptor { + __u8 bLength; + __u8 bDescriptorType; // USB_DT_CS_INTERFACE + __u8 bDescriptorSubtype; // USB_MS_MIDI_IN_JACK + __u8 bJackType; // USB_MS_EMBEDDED/EXTERNAL + __u8 bJackID; + __u8 iJack; +} __attribute__ ((packed)); + +#define USB_DT_MIDI_IN_SIZE 6 + +struct usb_midi_source_pin { + __u8 baSourceID; + __u8 baSourcePin; +} __attribute__ ((packed)); + +/* 6.1.2.3 MIDI OUT Jack Descriptor */ +struct usb_midi_out_jack_descriptor { + __u8 bLength; + __u8 bDescriptorType; // USB_DT_CS_INTERFACE + __u8 bDescriptorSubtype; // USB_MS_MIDI_OUT_JACK + __u8 bJackType; // USB_MS_EMBEDDED/EXTERNAL + __u8 bJackID; + __u8 bNrInputPins; // p + struct usb_midi_source_pin pins[]; // [p] + /*__u8 iJack; -- ommitted due to variable-sized pins[] */ +} __attribute__ ((packed)); + +#define USB_DT_MIDI_OUT_SIZE(p) (7 + 2 * (p)) + +/* As above, but more useful for defining your own descriptors: */ +#define DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(p) \ +struct usb_midi_out_jack_descriptor_##p { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubtype; \ + __u8 bJackType; \ + __u8 bJackID; \ + __u8 bNrInputPins; \ + struct usb_midi_source_pin pins[p]; \ + __u8 iJack; \ +} __attribute__ ((packed)) + +/* 6.2.2 Class-Specific MS Bulk Data Endpoint Descriptor */ +struct usb_ms_endpoint_descriptor { + __u8 bLength; // 4+n + __u8 bDescriptorType; // USB_DT_CS_ENDPOINT + __u8 bDescriptorSubtype; // USB_MS_GENERAL + __u8 bNumEmbMIDIJack; // n + __u8 baAssocJackID[]; // [n] +} __attribute__ ((packed)); + +#define USB_DT_MS_ENDPOINT_SIZE(n) (4 + (n)) + +/* As above, but more useful for defining your own descriptors: */ +#define DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(n) \ +struct usb_ms_endpoint_descriptor_##n { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubtype; \ + __u8 bNumEmbMIDIJack; \ + __u8 baAssocJackID[n]; \ +} __attribute__ ((packed)) + +#endif -- cgit v1.2.3-71-gd317 From 3d5b2510f6e361e2203e163c03b93d0026de5629 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 30 Jul 2006 18:43:43 +0200 Subject: USB: making the kernel -Wshadow clean - USB & completion include/linux/usb.h causes a lot of -Wshadow warnings - fix them. include/linux/usb.h:901: warning: declaration of 'complete' shadows a global declaration include/linux/completion.h:52: warning: shadowed declaration is here include/linux/usb.h:932: warning: declaration of 'complete' shadows a global declaration include/linux/completion.h:52: warning: shadowed declaration is here include/linux/usb.h:967: warning: declaration of 'complete' shadows a global declaration include/linux/completion.h:52: warning: shadowed declaration is here Signed-off-by: Jesper Juhl Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e22f4b386605..3d5cfa731680 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -932,7 +932,7 @@ struct urb * @setup_packet: pointer to the setup_packet buffer * @transfer_buffer: pointer to the transfer buffer * @buffer_length: length of the transfer buffer - * @complete: pointer to the usb_complete_t function + * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * * Initializes a control urb with the proper information needed to submit @@ -944,7 +944,7 @@ static inline void usb_fill_control_urb (struct urb *urb, unsigned char *setup_packet, void *transfer_buffer, int buffer_length, - usb_complete_t complete, + usb_complete_t complete_fn, void *context) { spin_lock_init(&urb->lock); @@ -953,7 +953,7 @@ static inline void usb_fill_control_urb (struct urb *urb, urb->setup_packet = setup_packet; urb->transfer_buffer = transfer_buffer; urb->transfer_buffer_length = buffer_length; - urb->complete = complete; + urb->complete = complete_fn; urb->context = context; } @@ -964,7 +964,7 @@ static inline void usb_fill_control_urb (struct urb *urb, * @pipe: the endpoint pipe * @transfer_buffer: pointer to the transfer buffer * @buffer_length: length of the transfer buffer - * @complete: pointer to the usb_complete_t function + * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * * Initializes a bulk urb with the proper information needed to submit it @@ -975,7 +975,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb, unsigned int pipe, void *transfer_buffer, int buffer_length, - usb_complete_t complete, + usb_complete_t complete_fn, void *context) { spin_lock_init(&urb->lock); @@ -983,7 +983,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb, urb->pipe = pipe; urb->transfer_buffer = transfer_buffer; urb->transfer_buffer_length = buffer_length; - urb->complete = complete; + urb->complete = complete_fn; urb->context = context; } @@ -994,7 +994,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb, * @pipe: the endpoint pipe * @transfer_buffer: pointer to the transfer buffer * @buffer_length: length of the transfer buffer - * @complete: pointer to the usb_complete_t function + * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * @interval: what to set the urb interval to, encoded like * the endpoint descriptor's bInterval value. @@ -1010,7 +1010,7 @@ static inline void usb_fill_int_urb (struct urb *urb, unsigned int pipe, void *transfer_buffer, int buffer_length, - usb_complete_t complete, + usb_complete_t complete_fn, void *context, int interval) { @@ -1019,7 +1019,7 @@ static inline void usb_fill_int_urb (struct urb *urb, urb->pipe = pipe; urb->transfer_buffer = transfer_buffer; urb->transfer_buffer_length = buffer_length; - urb->complete = complete; + urb->complete = complete_fn; urb->context = context; if (dev->speed == USB_SPEED_HIGH) urb->interval = 1 << (interval - 1); -- cgit v1.2.3-71-gd317 From b7cfaaaf86571732c7728e95a2231a860385463c Mon Sep 17 00:00:00 2001 From: "Luiz Fernando N. Capitulino" Date: Wed, 27 Sep 2006 11:58:53 -0700 Subject: USB: New functions to check endpoints info. These functions makes USB driver's code simpler when dealing with endpoints by avoiding them from accessing the endpoint's descriptor structure directly when they only need to know the endpoint's transfer type and/or direction. Please, read each functions' documentation in order to know how to use them. Signed-off-by: Luiz Fernando N. Capitulino Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb.h | 14 +++++ 2 files changed, 158 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 9ebfc0fe819d..82837d45b484 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -482,6 +482,138 @@ int usb_get_current_frame_number(struct usb_device *dev) return dev->bus->op->get_frame_number (dev); } +/** + * usb_endpoint_dir_in - check if the endpoint has IN direction + * @epd: endpoint to be checked + * + * Returns true if the endpoint is of type IN, otherwise it returns false. + */ +int usb_endpoint_dir_in(const struct usb_endpoint_descriptor *epd) +{ + return ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN); +} + +/** + * usb_endpoint_dir_out - check if the endpoint has OUT direction + * @epd: endpoint to be checked + * + * Returns true if the endpoint is of type OUT, otherwise it returns false. + */ +int usb_endpoint_dir_out(const struct usb_endpoint_descriptor *epd) +{ + return ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT); +} + +/** + * usb_endpoint_xfer_bulk - check if the endpoint has bulk transfer type + * @epd: endpoint to be checked + * + * Returns true if the endpoint is of type bulk, otherwise it returns false. + */ +int usb_endpoint_xfer_bulk(const struct usb_endpoint_descriptor *epd) +{ + return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_BULK); +} + +/** + * usb_endpoint_xfer_int - check if the endpoint has interrupt transfer type + * @epd: endpoint to be checked + * + * Returns true if the endpoint is of type interrupt, otherwise it returns + * false. + */ +int usb_endpoint_xfer_int(const struct usb_endpoint_descriptor *epd) +{ + return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_INT); +} + +/** + * usb_endpoint_xfer_isoc - check if the endpoint has isochronous transfer type + * @epd: endpoint to be checked + * + * Returns true if the endpoint is of type isochronous, otherwise it returns + * false. + */ +int usb_endpoint_xfer_isoc(const struct usb_endpoint_descriptor *epd) +{ + return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_ISOC); +} + +/** + * usb_endpoint_is_bulk_in - check if the endpoint is bulk IN + * @epd: endpoint to be checked + * + * Returns true if the endpoint has bulk transfer type and IN direction, + * otherwise it returns false. + */ +int usb_endpoint_is_bulk_in(const struct usb_endpoint_descriptor *epd) +{ + return (usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_in(epd)); +} + +/** + * usb_endpoint_is_bulk_out - check if the endpoint is bulk OUT + * @epd: endpoint to be checked + * + * Returns true if the endpoint has bulk transfer type and OUT direction, + * otherwise it returns false. + */ +int usb_endpoint_is_bulk_out(const struct usb_endpoint_descriptor *epd) +{ + return (usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_out(epd)); +} + +/** + * usb_endpoint_is_int_in - check if the endpoint is interrupt IN + * @epd: endpoint to be checked + * + * Returns true if the endpoint has interrupt transfer type and IN direction, + * otherwise it returns false. + */ +int usb_endpoint_is_int_in(const struct usb_endpoint_descriptor *epd) +{ + return (usb_endpoint_xfer_int(epd) && usb_endpoint_dir_in(epd)); +} + +/** + * usb_endpoint_is_int_out - check if the endpoint is interrupt OUT + * @epd: endpoint to be checked + * + * Returns true if the endpoint has interrupt transfer type and OUT direction, + * otherwise it returns false. + */ +int usb_endpoint_is_int_out(const struct usb_endpoint_descriptor *epd) +{ + return (usb_endpoint_xfer_int(epd) && usb_endpoint_dir_out(epd)); +} + +/** + * usb_endpoint_is_isoc_in - check if the endpoint is isochronous IN + * @epd: endpoint to be checked + * + * Returns true if the endpoint has isochronous transfer type and IN direction, + * otherwise it returns false. + */ +int usb_endpoint_is_isoc_in(const struct usb_endpoint_descriptor *epd) +{ + return (usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_in(epd)); +} + +/** + * usb_endpoint_is_isoc_out - check if the endpoint is isochronous OUT + * @epd: endpoint to be checked + * + * Returns true if the endpoint has isochronous transfer type and OUT direction, + * otherwise it returns false. + */ +int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor *epd) +{ + return (usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_out(epd)); +} + /*-------------------------------------------------------------------*/ /* * __usb_get_extra_descriptor() finds a descriptor of specific type in the @@ -909,6 +1041,18 @@ EXPORT_SYMBOL(__usb_get_extra_descriptor); EXPORT_SYMBOL(usb_find_device); EXPORT_SYMBOL(usb_get_current_frame_number); +EXPORT_SYMBOL_GPL(usb_endpoint_dir_in); +EXPORT_SYMBOL_GPL(usb_endpoint_dir_out); +EXPORT_SYMBOL_GPL(usb_endpoint_xfer_bulk); +EXPORT_SYMBOL_GPL(usb_endpoint_xfer_int); +EXPORT_SYMBOL_GPL(usb_endpoint_xfer_isoc); +EXPORT_SYMBOL_GPL(usb_endpoint_is_bulk_in); +EXPORT_SYMBOL_GPL(usb_endpoint_is_bulk_out); +EXPORT_SYMBOL_GPL(usb_endpoint_is_int_in); +EXPORT_SYMBOL_GPL(usb_endpoint_is_int_out); +EXPORT_SYMBOL_GPL(usb_endpoint_is_isoc_in); +EXPORT_SYMBOL_GPL(usb_endpoint_is_isoc_out); + EXPORT_SYMBOL (usb_buffer_alloc); EXPORT_SYMBOL (usb_buffer_free); diff --git a/include/linux/usb.h b/include/linux/usb.h index 3d5cfa731680..f807479ef65b 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -467,6 +467,20 @@ static inline int usb_make_path (struct usb_device *dev, char *buf, /*-------------------------------------------------------------------------*/ +extern int usb_endpoint_dir_in(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_dir_out(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_xfer_bulk(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_xfer_int(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_xfer_isoc(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_is_bulk_in(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_is_bulk_out(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_is_int_in(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_is_int_out(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_is_isoc_in(const struct usb_endpoint_descriptor *epd); +extern int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor *epd); + +/*-------------------------------------------------------------------------*/ + #define USB_DEVICE_ID_MATCH_DEVICE \ (USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT) #define USB_DEVICE_ID_MATCH_DEV_RANGE \ -- cgit v1.2.3-71-gd317 From dfe0d3ba20e860d0b9a16c4c6524180b8f93be05 Mon Sep 17 00:00:00 2001 From: Matthew Dharm Date: Sun, 13 Aug 2006 17:30:14 -0700 Subject: USB Storage: add rio karma eject support This changeset from Keith Bennett (via Bob Copeland) moves the Karma initializer to its own file and adds trapping of the START_STOP command to enable eject of the device. Signed-off-by: Keith Bennett Signed-off-by: Bob Copeland Signed-off-by: Matthew Dharm Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/Kconfig | 12 +++ drivers/usb/storage/Makefile | 1 + drivers/usb/storage/initializers.c | 73 ----------------- drivers/usb/storage/initializers.h | 1 - drivers/usb/storage/karma.c | 155 +++++++++++++++++++++++++++++++++++++ drivers/usb/storage/karma.h | 7 ++ drivers/usb/storage/unusual_devs.h | 2 +- drivers/usb/storage/usb.c | 11 +++ include/linux/usb_usual.h | 3 + 9 files changed, 190 insertions(+), 75 deletions(-) create mode 100644 drivers/usb/storage/karma.c create mode 100644 drivers/usb/storage/karma.h (limited to 'include/linux') diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig index be9eec225743..86e48c42d6af 100644 --- a/drivers/usb/storage/Kconfig +++ b/drivers/usb/storage/Kconfig @@ -135,6 +135,18 @@ config USB_STORAGE_ONETOUCH this input in any keybinding software. (e.g. gnome's keyboard short- cuts) +config USB_STORAGE_KARMA + bool "Support for Rio Karma music player" + depends on USB_STORAGE + help + Say Y here to include additional code to support the Rio Karma + USB interface. + + This code places the Rio Karma into mass storage mode, enabling + it to be mounted as an ordinary filesystem. Performing an eject + on the resulting scsi device node returns the Karma to normal + operation. + config USB_LIBUSUAL bool "The shared table of common (or usual) storage devices" depends on USB diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile index 8cbba22508a4..023969b4385b 100644 --- a/drivers/usb/storage/Makefile +++ b/drivers/usb/storage/Makefile @@ -20,6 +20,7 @@ usb-storage-obj-$(CONFIG_USB_STORAGE_DATAFAB) += datafab.o usb-storage-obj-$(CONFIG_USB_STORAGE_JUMPSHOT) += jumpshot.o usb-storage-obj-$(CONFIG_USB_STORAGE_ALAUDA) += alauda.o usb-storage-obj-$(CONFIG_USB_STORAGE_ONETOUCH) += onetouch.o +usb-storage-obj-$(CONFIG_USB_STORAGE_KARMA) += karma.o usb-storage-objs := scsiglue.o protocol.o transport.o usb.o \ initializers.o $(usb-storage-obj-y) diff --git a/drivers/usb/storage/initializers.c b/drivers/usb/storage/initializers.c index ab173b30076e..5b06f9240d05 100644 --- a/drivers/usb/storage/initializers.c +++ b/drivers/usb/storage/initializers.c @@ -45,12 +45,6 @@ #include "debug.h" #include "transport.h" -#define RIO_MSC 0x08 -#define RIOP_INIT "RIOP\x00\x01\x08" -#define RIOP_INIT_LEN 7 -#define RIO_SEND_LEN 40 -#define RIO_RECV_LEN 0x200 - /* This places the Shuttle/SCM USB<->SCSI bridge devices in multi-target * mode */ int usb_stor_euscsi_init(struct us_data *us) @@ -97,70 +91,3 @@ int usb_stor_ucr61s2b_init(struct us_data *us) return (res ? -1 : 0); } - -/* Place the Rio Karma into mass storage mode. - * - * The initialization begins by sending 40 bytes starting - * RIOP\x00\x01\x08\x00, which the device will ack with a 512-byte - * packet with the high four bits set and everything else null. - * - * Next, we send RIOP\x80\x00\x08\x00. Each time, a 512 byte response - * must be read, but we must loop until byte 5 in the response is 0x08, - * indicating success. */ -int rio_karma_init(struct us_data *us) -{ - int result, partial; - char *recv; - unsigned long timeout; - - // us->iobuf is big enough to hold cmd but not receive - if (!(recv = kmalloc(RIO_RECV_LEN, GFP_KERNEL))) - goto die_nomem; - - US_DEBUGP("Initializing Karma...\n"); - - memset(us->iobuf, 0, RIO_SEND_LEN); - memcpy(us->iobuf, RIOP_INIT, RIOP_INIT_LEN); - - result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, - us->iobuf, RIO_SEND_LEN, &partial); - if (result != USB_STOR_XFER_GOOD) - goto die; - - result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, - recv, RIO_RECV_LEN, &partial); - if (result != USB_STOR_XFER_GOOD) - goto die; - - us->iobuf[4] = 0x80; - us->iobuf[5] = 0; - timeout = jiffies + msecs_to_jiffies(3000); - for (;;) { - US_DEBUGP("Sending init command\n"); - result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, - us->iobuf, RIO_SEND_LEN, &partial); - if (result != USB_STOR_XFER_GOOD) - goto die; - - result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, - recv, RIO_RECV_LEN, &partial); - if (result != USB_STOR_XFER_GOOD) - goto die; - - if (recv[5] == RIO_MSC) - break; - if (time_after(jiffies, timeout)) - goto die; - msleep(10); - } - US_DEBUGP("Karma initialized.\n"); - kfree(recv); - return 0; - -die: - kfree(recv); -die_nomem: - US_DEBUGP("Could not initialize karma.\n"); - return USB_STOR_TRANSPORT_FAILED; -} - diff --git a/drivers/usb/storage/initializers.h b/drivers/usb/storage/initializers.h index 927f7781080f..e2967a4d48a2 100644 --- a/drivers/usb/storage/initializers.h +++ b/drivers/usb/storage/initializers.h @@ -47,4 +47,3 @@ int usb_stor_euscsi_init(struct us_data *us); /* This function is required to activate all four slots on the UCR-61S2B * flash reader */ int usb_stor_ucr61s2b_init(struct us_data *us); -int rio_karma_init(struct us_data *us); diff --git a/drivers/usb/storage/karma.c b/drivers/usb/storage/karma.c new file mode 100644 index 000000000000..0d79ae5683f7 --- /dev/null +++ b/drivers/usb/storage/karma.c @@ -0,0 +1,155 @@ +/* Driver for Rio Karma + * + * (c) 2006 Bob Copeland + * (c) 2006 Keith Bennett + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include + +#include "usb.h" +#include "transport.h" +#include "debug.h" +#include "karma.h" + +#define RIO_PREFIX "RIOP\x00" +#define RIO_PREFIX_LEN 5 +#define RIO_SEND_LEN 40 +#define RIO_RECV_LEN 0x200 + +#define RIO_ENTER_STORAGE 0x1 +#define RIO_LEAVE_STORAGE 0x2 +#define RIO_RESET 0xC + +extern int usb_stor_Bulk_transport(struct scsi_cmnd *, struct us_data *); + +struct karma_data { + int in_storage; + char *recv; +}; + +/* + * Send commands to Rio Karma. + * + * For each command we send 40 bytes starting 'RIOP\0' followed by + * the command number and a sequence number, which the device will ack + * with a 512-byte packet with the high four bits set and everything + * else null. Then we send 'RIOP\x80' followed by a zero and the + * sequence number, until byte 5 in the response repeats the sequence + * number. + */ +static int rio_karma_send_command(char cmd, struct us_data *us) +{ + int result, partial; + unsigned long timeout; + static unsigned char seq = 1; + struct karma_data *data = (struct karma_data *) us->extra; + + US_DEBUGP("karma: sending command %04x\n", cmd); + memset(us->iobuf, 0, RIO_SEND_LEN); + memcpy(us->iobuf, RIO_PREFIX, RIO_PREFIX_LEN); + us->iobuf[5] = cmd; + us->iobuf[6] = seq; + + timeout = jiffies + msecs_to_jiffies(6000); + for (;;) { + result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + us->iobuf, RIO_SEND_LEN, &partial); + if (result != USB_STOR_XFER_GOOD) + goto err; + + result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, + data->recv, RIO_RECV_LEN, &partial); + if (result != USB_STOR_XFER_GOOD) + goto err; + + if (data->recv[5] == seq) + break; + + if (time_after(jiffies, timeout)) + goto err; + + us->iobuf[4] = 0x80; + us->iobuf[5] = 0; + msleep(50); + } + + seq++; + if (seq == 0) + seq = 1; + + US_DEBUGP("karma: sent command %04x\n", cmd); + return 0; +err: + US_DEBUGP("karma: command %04x failed\n", cmd); + return USB_STOR_TRANSPORT_FAILED; +} + +/* + * Trap START_STOP and READ_10 to leave/re-enter storage mode. + * Everything else is propagated to the normal bulk layer. + */ +int rio_karma_transport(struct scsi_cmnd *srb, struct us_data *us) +{ + int ret; + struct karma_data *data = (struct karma_data *) us->extra; + + if (srb->cmnd[0] == READ_10 && !data->in_storage) { + ret = rio_karma_send_command(RIO_ENTER_STORAGE, us); + if (ret) + return ret; + + data->in_storage = 1; + return usb_stor_Bulk_transport(srb, us); + } else if (srb->cmnd[0] == START_STOP) { + ret = rio_karma_send_command(RIO_LEAVE_STORAGE, us); + if (ret) + return ret; + + data->in_storage = 0; + return rio_karma_send_command(RIO_RESET, us); + } + return usb_stor_Bulk_transport(srb, us); +} + +static void rio_karma_destructor(void *extra) +{ + struct karma_data *data = (struct karma_data *) extra; + kfree(data->recv); +} + +int rio_karma_init(struct us_data *us) +{ + int ret = 0; + struct karma_data *data = kzalloc(sizeof(struct karma_data), GFP_NOIO); + if (!data) + goto out; + + data->recv = kmalloc(RIO_RECV_LEN, GFP_NOIO); + if (!data->recv) { + kfree(data); + goto out; + } + + us->extra = data; + us->extra_destructor = rio_karma_destructor; + ret = rio_karma_send_command(RIO_ENTER_STORAGE, us); + data->in_storage = (ret == 0); +out: + return ret; +} diff --git a/drivers/usb/storage/karma.h b/drivers/usb/storage/karma.h new file mode 100644 index 000000000000..8a60972af8c5 --- /dev/null +++ b/drivers/usb/storage/karma.h @@ -0,0 +1,7 @@ +#ifndef _KARMA_USB_H +#define _KARMA_USB_H + +extern int rio_karma_init(struct us_data *us); +extern int rio_karma_transport(struct scsi_cmnd *srb, struct us_data *us); + +#endif diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index fa49357289c4..1f11c9d44eaa 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -221,7 +221,7 @@ UNUSUAL_DEV( 0x0457, 0x0151, 0x0100, 0x0100, UNUSUAL_DEV( 0x045a, 0x5210, 0x0101, 0x0101, "Rio", "Rio Karma", - US_SC_SCSI, US_PR_BULK, rio_karma_init, 0), + US_SC_SCSI, US_PR_KARMA, rio_karma_init, 0), /* Patch submitted by Philipp Friedrich */ UNUSUAL_DEV( 0x0482, 0x0100, 0x0100, 0x0100, diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 8d7bdcb5924d..b8d6031b0975 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -98,6 +98,9 @@ #ifdef CONFIG_USB_STORAGE_ALAUDA #include "alauda.h" #endif +#ifdef CONFIG_USB_STORAGE_KARMA +#include "karma.h" +#endif /* Some informational data */ MODULE_AUTHOR("Matthew Dharm "); @@ -646,6 +649,14 @@ static int get_transport(struct us_data *us) break; #endif +#ifdef CONFIG_USB_STORAGE_KARMA + case US_PR_KARMA: + us->transport_name = "Rio Karma/Bulk"; + us->transport = rio_karma_transport; + us->transport_reset = usb_stor_Bulk_reset; + break; +#endif + default: return -EIO; } diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index e7fc5fed5b98..2ae76fe52ff7 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -108,6 +108,9 @@ enum { US_DO_ALL_FLAGS }; #ifdef CONFIG_USB_STORAGE_ALAUDA #define US_PR_ALAUDA 0xf4 /* Alauda chipsets */ #endif +#ifdef CONFIG_USB_STORAGE_KARMA +#define US_PR_KARMA 0xf5 /* Rio Karma */ +#endif #define US_PR_DEVICE 0xff /* Use device's value */ -- cgit v1.2.3-71-gd317 From 095bc335360a51623dd8571839bbf465851a7f4b Mon Sep 17 00:00:00 2001 From: "Luiz Fernando N. Capitulino" Date: Sat, 26 Aug 2006 23:48:11 -0300 Subject: USB core: Use const where possible. This patch marks some USB core's functions parameters as const. This improves the design (we're saying to the caller that its parameter is not going to be modified) and may help in compiler's optimisation work. Signed-off-by: Luiz Fernando N. Capitulino Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb.c | 21 +++++++++++---------- drivers/usb/core/usb.h | 4 ++-- include/linux/usb.h | 18 +++++++++--------- 3 files changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 82837d45b484..4eb98eb3804f 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -67,7 +67,8 @@ static int nousb; /* Disable USB when built into kernel image */ * Don't call this function unless you are bound to one of the interfaces * on this device or you have locked the device! */ -struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, unsigned ifnum) +struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev, + unsigned ifnum) { struct usb_host_config *config = dev->actconfig; int i; @@ -100,8 +101,8 @@ struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, unsigned ifnum) * Don't call this function unless you are bound to the intf interface * or you have locked the device! */ -struct usb_host_interface *usb_altnum_to_altsetting(struct usb_interface *intf, - unsigned int altnum) +struct usb_host_interface *usb_altnum_to_altsetting(const struct usb_interface *intf, + unsigned int altnum) { int i; @@ -356,7 +357,7 @@ void usb_put_intf(struct usb_interface *intf) * case the driver already owns the device lock.) */ int usb_lock_device_for_reset(struct usb_device *udev, - struct usb_interface *iface) + const struct usb_interface *iface) { unsigned long jiffies_expire = jiffies + HZ; @@ -852,8 +853,8 @@ void usb_buffer_unmap (struct urb *urb) * * Reverse the effect of this call with usb_buffer_unmap_sg(). */ -int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int nents) +int usb_buffer_map_sg(const struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int nents) { struct usb_bus *bus; struct device *controller; @@ -887,8 +888,8 @@ int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe, * Use this when you are re-using a scatterlist's data buffers for * another USB request. */ -void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int n_hw_ents) +void usb_buffer_dmasync_sg(const struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents) { struct usb_bus *bus; struct device *controller; @@ -913,8 +914,8 @@ void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe, * * Reverses the effect of usb_buffer_map_sg(). */ -void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int n_hw_ents) +void usb_buffer_unmap_sg(const struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents) { struct usb_bus *bus; struct device *controller; diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 6096ead2758c..67da6d0b316f 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -53,7 +53,7 @@ extern struct usb_device_driver usb_generic_driver; * no such thing as a platform USB device, so we can steal the use * of the platform_data field. */ -static inline int is_usb_device(struct device *dev) +static inline int is_usb_device(const struct device *dev) { return dev->platform_data == &usb_generic_driver; } @@ -78,7 +78,7 @@ static inline void mark_quiesced(struct usb_interface *f) f->is_active = 0; } -static inline int is_active(struct usb_interface *f) +static inline int is_active(const struct usb_interface *f) { return f->is_active; } diff --git a/include/linux/usb.h b/include/linux/usb.h index f807479ef65b..26d8a5f36896 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -387,7 +387,7 @@ extern void usb_put_dev(struct usb_device *dev); #define usb_unlock_device(udev) up(&(udev)->dev.sem) #define usb_trylock_device(udev) down_trylock(&(udev)->dev.sem) extern int usb_lock_device_for_reset(struct usb_device *udev, - struct usb_interface *iface); + const struct usb_interface *iface); /* USB port reset for device reinitialization */ extern int usb_reset_device(struct usb_device *dev); @@ -426,10 +426,10 @@ const struct usb_device_id *usb_match_id(struct usb_interface *interface, extern struct usb_interface *usb_find_interface(struct usb_driver *drv, int minor); -extern struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, +extern struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev, unsigned ifnum); extern struct usb_host_interface *usb_altnum_to_altsetting( - struct usb_interface *intf, unsigned int altnum); + const struct usb_interface *intf, unsigned int altnum); /** @@ -1064,14 +1064,14 @@ void usb_buffer_unmap (struct urb *urb); #endif struct scatterlist; -int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int nents); +int usb_buffer_map_sg(const struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int nents); #if 0 -void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int n_hw_ents); +void usb_buffer_dmasync_sg(const struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents); #endif -void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int n_hw_ents); +void usb_buffer_unmap_sg(const struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents); /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * -- cgit v1.2.3-71-gd317 From 088dc270e1da03744d977cbd9edd4311af142348 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Aug 2006 12:08:19 -0400 Subject: usbcore: help drivers to change device configs It's generally a bad idea for USB interface drivers to try to change a device's configuration, and usbcore doesn't provide any way for them to do it. However in a few exceptional circumstances it can make sense. This patch (as767) adds a roundabout mechanism to help drivers that may need it. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb.h | 3 +++ 2 files changed, 62 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 49cfd7928a1c..333b22c68aa4 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1493,6 +1493,65 @@ free_interfaces: return 0; } +struct set_config_request { + struct usb_device *udev; + int config; + struct work_struct work; +}; + +/* Worker routine for usb_driver_set_configuration() */ +static void driver_set_config_work(void *_req) +{ + struct set_config_request *req = _req; + + usb_lock_device(req->udev); + usb_set_configuration(req->udev, req->config); + usb_unlock_device(req->udev); + usb_put_dev(req->udev); + kfree(req); +} + +/** + * usb_driver_set_configuration - Provide a way for drivers to change device configurations + * @udev: the device whose configuration is being updated + * @config: the configuration being chosen. + * Context: In process context, must be able to sleep + * + * Device interface drivers are not allowed to change device configurations. + * This is because changing configurations will destroy the interface the + * driver is bound to and create new ones; it would be like a floppy-disk + * driver telling the computer to replace the floppy-disk drive with a + * tape drive! + * + * Still, in certain specialized circumstances the need may arise. This + * routine gets around the normal restrictions by using a work thread to + * submit the change-config request. + * + * Returns 0 if the request was succesfully queued, error code otherwise. + * The caller has no way to know whether the queued request will eventually + * succeed. + */ +int usb_driver_set_configuration(struct usb_device *udev, int config) +{ + struct set_config_request *req; + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + req->udev = udev; + req->config = config; + INIT_WORK(&req->work, driver_set_config_work, req); + + usb_get_dev(udev); + if (!schedule_work(&req->work)) { + usb_put_dev(udev); + kfree(req); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(usb_driver_set_configuration); + // synchronous request completion model EXPORT_SYMBOL(usb_control_msg); EXPORT_SYMBOL(usb_bulk_msg); diff --git a/include/linux/usb.h b/include/linux/usb.h index 26d8a5f36896..f104efa04d79 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1099,6 +1099,9 @@ extern int usb_clear_halt(struct usb_device *dev, int pipe); extern int usb_reset_configuration(struct usb_device *dev); extern int usb_set_interface(struct usb_device *dev, int ifnum, int alternate); +/* this request isn't really synchronous, but it belongs with the others */ +extern int usb_driver_set_configuration(struct usb_device *udev, int config); + /* * timeouts, in milliseconds, used for sending/receiving control messages * they typically complete within a few frames (msec) after they're issued -- cgit v1.2.3-71-gd317 From a6d2bb9ff919b4685bd684620ec7a1ffa8bf2349 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Aug 2006 11:27:36 -0400 Subject: USB: remove struct usb_operations All of the currently-supported USB host controller drivers use the HCD bus-glue framework. As part of the program for flattening out the glue layer, this patch (as769) removes the usb_operations structure. All function calls now go directly to the HCD routines (slightly renamed to remain within the "usb_" namespace). The patch also removes usb_alloc_bus(), because it's not useful in the HCD framework and it wasn't referenced anywhere. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 49 +++++----------------------------------------- drivers/usb/core/hcd.h | 32 +++++++----------------------- drivers/usb/core/message.c | 4 ++-- drivers/usb/core/urb.c | 13 +++++------- drivers/usb/core/usb.c | 10 +++++----- include/linux/usb.h | 4 ---- 6 files changed, 24 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index ea20a3a5a9b9..2102c4deec1e 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -731,30 +731,6 @@ static void usb_bus_init (struct usb_bus *bus) kref_init(&bus->kref); } -/** - * usb_alloc_bus - creates a new USB host controller structure - * @op: pointer to a struct usb_operations that this bus structure should use - * Context: !in_interrupt() - * - * Creates a USB host controller bus structure with the specified - * usb_operations and initializes all the necessary internal objects. - * - * If no memory is available, NULL is returned. - * - * The caller should call usb_put_bus() when it is finished with the structure. - */ -struct usb_bus *usb_alloc_bus (struct usb_operations *op) -{ - struct usb_bus *bus; - - bus = kzalloc (sizeof *bus, GFP_KERNEL); - if (!bus) - return NULL; - usb_bus_init (bus); - bus->op = op; - return bus; -} - /*-------------------------------------------------------------------------*/ /** @@ -1102,7 +1078,7 @@ static void urb_unlink (struct urb *urb) * expects usb_submit_urb() to have sanity checked and conditioned all * inputs in the urb */ -static int hcd_submit_urb (struct urb *urb, gfp_t mem_flags) +int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) { int status; struct usb_hcd *hcd = urb->dev->bus->hcpriv; @@ -1211,7 +1187,7 @@ done: /*-------------------------------------------------------------------------*/ /* called in any context */ -static int hcd_get_frame_number (struct usb_device *udev) +int usb_hcd_get_frame_number (struct usb_device *udev) { struct usb_hcd *hcd = (struct usb_hcd *)udev->bus->hcpriv; if (!HC_IS_RUNNING (hcd->state)) @@ -1253,7 +1229,7 @@ unlink1 (struct usb_hcd *hcd, struct urb *urb) * caller guarantees urb won't be recycled till both unlink() * and the urb's completion function return */ -static int hcd_unlink_urb (struct urb *urb, int status) +int usb_hcd_unlink_urb (struct urb *urb, int status) { struct usb_host_endpoint *ep; struct usb_hcd *hcd = NULL; @@ -1351,8 +1327,8 @@ done: * example: a qh stored in ep->hcpriv, holding state related to endpoint * type, maxpacket size, toggle, halt status, and scheduling. */ -static void -hcd_endpoint_disable (struct usb_device *udev, struct usb_host_endpoint *ep) +void usb_hcd_endpoint_disable (struct usb_device *udev, + struct usb_host_endpoint *ep) { struct usb_hcd *hcd; struct urb *urb; @@ -1589,20 +1565,6 @@ EXPORT_SYMBOL (usb_bus_start_enum); /*-------------------------------------------------------------------------*/ -/* - * usb_hcd_operations - adapts usb_bus framework to HCD framework (bus glue) - */ -static struct usb_operations usb_hcd_operations = { - .get_frame_number = hcd_get_frame_number, - .submit_urb = hcd_submit_urb, - .unlink_urb = hcd_unlink_urb, - .buffer_alloc = hcd_buffer_alloc, - .buffer_free = hcd_buffer_free, - .disable = hcd_endpoint_disable, -}; - -/*-------------------------------------------------------------------------*/ - /** * usb_hcd_giveback_urb - return URB from HCD to device driver * @hcd: host controller returning the URB @@ -1744,7 +1706,6 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver, dev_set_drvdata(dev, hcd); usb_bus_init(&hcd->self); - hcd->self.op = &usb_hcd_operations; hcd->self.hcpriv = hcd; hcd->self.release = &hcd_release; hcd->self.controller = dev; diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index fc71a08a1af4..83e229914797 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -139,28 +139,6 @@ struct hcd_timeout { /* timeouts we allocate */ /*-------------------------------------------------------------------------*/ -/* - * FIXME usb_operations should vanish or become hc_driver, - * when usb_bus and usb_hcd become the same thing. - */ - -struct usb_operations { - int (*get_frame_number) (struct usb_device *usb_dev); - int (*submit_urb) (struct urb *urb, gfp_t mem_flags); - int (*unlink_urb) (struct urb *urb, int status); - - /* allocate dma-consistent buffer for URB_DMA_NOMAPPING */ - void *(*buffer_alloc)(struct usb_bus *bus, size_t size, - gfp_t mem_flags, - dma_addr_t *dma); - void (*buffer_free)(struct usb_bus *bus, size_t size, - void *addr, dma_addr_t dma); - - void (*disable)(struct usb_device *udev, - struct usb_host_endpoint *ep); -}; - -/* each driver provides one of these, and hardware init support */ struct pt_regs; @@ -222,7 +200,13 @@ struct hc_driver { /* Needed only if port-change IRQs are level-triggered */ }; -extern void usb_hcd_giveback_urb (struct usb_hcd *hcd, struct urb *urb, struct pt_regs *regs); +extern int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags); +extern int usb_hcd_unlink_urb (struct urb *urb, int status); +extern void usb_hcd_giveback_urb (struct usb_hcd *hcd, struct urb *urb, + struct pt_regs *regs); +extern void usb_hcd_endpoint_disable (struct usb_device *udev, + struct usb_host_endpoint *ep); +extern int usb_hcd_get_frame_number (struct usb_device *udev); extern struct usb_hcd *usb_create_hcd (const struct hc_driver *driver, struct device *dev, char *bus_name); @@ -361,8 +345,6 @@ extern long usb_calc_bus_time (int speed, int is_input, /*-------------------------------------------------------------------------*/ -extern struct usb_bus *usb_alloc_bus (struct usb_operations *); - extern void usb_set_device_state(struct usb_device *udev, enum usb_device_state new_state); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 333b22c68aa4..1580c81a0db7 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -984,8 +984,8 @@ void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr) ep = dev->ep_in[epnum]; dev->ep_in[epnum] = NULL; } - if (ep && dev->bus && dev->bus->op && dev->bus->op->disable) - dev->bus->op->disable(dev, ep); + if (ep && dev->bus) + usb_hcd_endpoint_disable(dev, ep); } /** diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 9864988377c7..576919927f53 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -221,7 +221,6 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) { int pipe, temp, max; struct usb_device *dev; - struct usb_operations *op; int is_out; if (!urb || urb->hcpriv || !urb->complete) @@ -233,8 +232,6 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) if (dev->bus->controller->power.power_state.event != PM_EVENT_ON || dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; - if (!(op = dev->bus->op) || !op->submit_urb) - return -ENODEV; urb->status = -EINPROGRESS; urb->actual_length = 0; @@ -376,7 +373,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) urb->interval = temp; } - return op->submit_urb (urb, mem_flags); + return usb_hcd_submit_urb (urb, mem_flags); } /*-------------------------------------------------------------------*/ @@ -440,9 +437,9 @@ int usb_unlink_urb(struct urb *urb) { if (!urb) return -EINVAL; - if (!(urb->dev && urb->dev->bus && urb->dev->bus->op)) + if (!(urb->dev && urb->dev->bus)) return -ENODEV; - return urb->dev->bus->op->unlink_urb(urb, -ECONNRESET); + return usb_hcd_unlink_urb(urb, -ECONNRESET); } /** @@ -468,13 +465,13 @@ int usb_unlink_urb(struct urb *urb) void usb_kill_urb(struct urb *urb) { might_sleep(); - if (!(urb && urb->dev && urb->dev->bus && urb->dev->bus->op)) + if (!(urb && urb->dev && urb->dev->bus)) return; spin_lock_irq(&urb->lock); ++urb->reject; spin_unlock_irq(&urb->lock); - urb->dev->bus->op->unlink_urb(urb, -ENOENT); + usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); spin_lock_irq(&urb->lock); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 4eb98eb3804f..7ab9d29215f8 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -480,7 +480,7 @@ exit: */ int usb_get_current_frame_number(struct usb_device *dev) { - return dev->bus->op->get_frame_number (dev); + return usb_hcd_get_frame_number (dev); } /** @@ -677,9 +677,9 @@ void *usb_buffer_alloc ( dma_addr_t *dma ) { - if (!dev || !dev->bus || !dev->bus->op || !dev->bus->op->buffer_alloc) + if (!dev || !dev->bus) return NULL; - return dev->bus->op->buffer_alloc (dev->bus, size, mem_flags, dma); + return hcd_buffer_alloc (dev->bus, size, mem_flags, dma); } /** @@ -700,11 +700,11 @@ void usb_buffer_free ( dma_addr_t dma ) { - if (!dev || !dev->bus || !dev->bus->op || !dev->bus->op->buffer_free) + if (!dev || !dev->bus) return; if (!addr) return; - dev->bus->op->buffer_free (dev->bus, size, addr, dma); + hcd_buffer_free (dev->bus, size, addr, dma); } /** diff --git a/include/linux/usb.h b/include/linux/usb.h index f104efa04d79..4709033f8fa7 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -257,8 +257,6 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size, /* ----------------------------------------------------------------------- */ -struct usb_operations; - /* USB device number allocation bitmap */ struct usb_devmap { unsigned long devicemap[128 / (8*sizeof(unsigned long))]; @@ -279,7 +277,6 @@ struct usb_bus { * round-robin allocation */ struct usb_devmap devmap; /* device address allocation map */ - struct usb_operations *op; /* Operations (specific to the HC) */ struct usb_device *root_hub; /* Root hub */ struct list_head bus_list; /* list of busses */ void *hcpriv; /* Host Controller private data */ @@ -1051,7 +1048,6 @@ extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_unlink_urb(struct urb *urb); extern void usb_kill_urb(struct urb *urb); -#define HAVE_USB_BUFFERS void *usb_buffer_alloc (struct usb_device *dev, size_t size, gfp_t mem_flags, dma_addr_t *dma); void usb_buffer_free (struct usb_device *dev, size_t size, -- cgit v1.2.3-71-gd317 From dd990f16a39d4e615c0b70a0ab50b79b32bfb16d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Aug 2006 11:29:56 -0400 Subject: usbcore: Add flag for whether a host controller uses DMA This patch (as770b) introduces a new field to usb_bus: a flag indicating whether or not the host controller uses DMA. This serves to encapsulate the computation. It also means we will have only one spot to update if the DMA API changes. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++++--- include/linux/usb.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 2102c4deec1e..0cc14206920a 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1152,7 +1152,7 @@ doit: /* lower level hcd code should use *_dma exclusively, * unless it uses pio or talks to another transport. */ - if (hcd->self.controller->dma_mask) { + if (hcd->self.uses_dma) { if (usb_pipecontrol (urb->pipe) && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) urb->setup_dma = dma_map_single ( @@ -1585,8 +1585,9 @@ void usb_hcd_giveback_urb (struct usb_hcd *hcd, struct urb *urb, struct pt_regs at_root_hub = (urb->dev == hcd->self.root_hub); urb_unlink (urb); - /* lower level hcd code should use *_dma exclusively */ - if (hcd->self.controller->dma_mask && !at_root_hub) { + /* lower level hcd code should use *_dma exclusively if the + * host controller does DMA */ + if (hcd->self.uses_dma && !at_root_hub) { if (usb_pipecontrol (urb->pipe) && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) dma_unmap_single (hcd->self.controller, urb->setup_dma, @@ -1710,6 +1711,7 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver, hcd->self.release = &hcd_release; hcd->self.controller = dev; hcd->self.bus_name = bus_name; + hcd->self.uses_dma = (dev->dma_mask != NULL); init_timer(&hcd->rh_timer); hcd->rh_timer.function = rh_timer_func; diff --git a/include/linux/usb.h b/include/linux/usb.h index 4709033f8fa7..09661759621f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -269,6 +269,7 @@ struct usb_bus { struct device *controller; /* host/master side hardware */ int busnum; /* Bus number (in order of reg) */ char *bus_name; /* stable id (PCI slot_name etc) */ + u8 uses_dma; /* Does the host controller use DMA? */ u8 otg_port; /* 0, or number of OTG/HNP port */ unsigned is_b_host:1; /* true during some HNP roleswitches */ unsigned b_hnp_enable:1; /* OTG: did A-Host enable HNP? */ -- cgit v1.2.3-71-gd317 From 1720058343fa43a1a25bfad9e62ea06e7e9743b6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Aug 2006 11:32:52 -0400 Subject: usbcore: trim down usb_bus structure As part of the ongoing program to flatten out the HCD bus-glue layer, this patch (as771b) eliminates the hcpriv, release, and kref fields from struct usb_bus. hcpriv and release were not being used for anything worthwhile, and kref has been moved into the enclosing usb_hcd structure. Along with those changes, the patch gets rid of usb_bus_get and usb_bus_put, replacing them with usb_get_hcd and usb_put_hcd. The one interesting aspect is that the dev_set_drvdata call was removed from usb_put_hcd, where it clearly doesn't belong. This means the driver private data won't get reset to NULL. It shouldn't cause any problems, since the private data is undefined when no driver is bound. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/buffer.c | 4 +-- drivers/usb/core/hcd.c | 67 +++++++++++++++--------------------------- drivers/usb/core/hcd.h | 13 ++++---- drivers/usb/core/usb.c | 5 ++-- drivers/usb/gadget/dummy_hcd.c | 8 ++--- drivers/usb/host/ehci-dbg.c | 6 ++-- drivers/usb/host/ohci-dbg.c | 6 ++-- drivers/usb/mon/mon_main.c | 6 ++-- include/linux/usb.h | 3 -- 9 files changed, 44 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index f4f4ef0f377a..840442a25b61 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -104,7 +104,7 @@ void *hcd_buffer_alloc ( dma_addr_t *dma ) { - struct usb_hcd *hcd = bus->hcpriv; + struct usb_hcd *hcd = bus_to_hcd(bus); int i; /* some USB hosts just use PIO */ @@ -127,7 +127,7 @@ void hcd_buffer_free ( dma_addr_t dma ) { - struct usb_hcd *hcd = bus->hcpriv; + struct usb_hcd *hcd = bus_to_hcd(bus); int i; if (!addr) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 0cc14206920a..9dfc812de034 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -664,31 +664,6 @@ static int usb_rh_urb_dequeue (struct usb_hcd *hcd, struct urb *urb) /*-------------------------------------------------------------------------*/ -/* exported only within usbcore */ -struct usb_bus *usb_bus_get(struct usb_bus *bus) -{ - if (bus) - kref_get(&bus->kref); - return bus; -} - -static void usb_host_release(struct kref *kref) -{ - struct usb_bus *bus = container_of(kref, struct usb_bus, kref); - - if (bus->release) - bus->release(bus); -} - -/* exported only within usbcore */ -void usb_bus_put(struct usb_bus *bus) -{ - if (bus) - kref_put(&bus->kref, usb_host_release); -} - -/*-------------------------------------------------------------------------*/ - static struct class *usb_host_class; int usb_host_init(void) @@ -720,15 +695,12 @@ static void usb_bus_init (struct usb_bus *bus) bus->devnum_next = 1; bus->root_hub = NULL; - bus->hcpriv = NULL; bus->busnum = -1; bus->bandwidth_allocated = 0; bus->bandwidth_int_reqs = 0; bus->bandwidth_isoc_reqs = 0; INIT_LIST_HEAD (&bus->bus_list); - - kref_init(&bus->kref); } /*-------------------------------------------------------------------------*/ @@ -1081,7 +1053,7 @@ static void urb_unlink (struct urb *urb) int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) { int status; - struct usb_hcd *hcd = urb->dev->bus->hcpriv; + struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); struct usb_host_endpoint *ep; unsigned long flags; @@ -1189,7 +1161,8 @@ done: /* called in any context */ int usb_hcd_get_frame_number (struct usb_device *udev) { - struct usb_hcd *hcd = (struct usb_hcd *)udev->bus->hcpriv; + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + if (!HC_IS_RUNNING (hcd->state)) return -ESHUTDOWN; return hcd->driver->get_frame_number (hcd); @@ -1262,7 +1235,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status) spin_lock (&hcd_data_lock); sys = &urb->dev->dev; - hcd = urb->dev->bus->hcpriv; + hcd = bus_to_hcd(urb->dev->bus); if (hcd == NULL) { retval = -ENODEV; goto done; @@ -1333,7 +1306,7 @@ void usb_hcd_endpoint_disable (struct usb_device *udev, struct usb_hcd *hcd; struct urb *urb; - hcd = udev->bus->hcpriv; + hcd = bus_to_hcd(udev->bus); WARN_ON (!HC_IS_RUNNING (hcd->state) && hcd->state != HC_STATE_HALT && udev->state != USB_STATE_NOTATTACHED); @@ -1673,14 +1646,6 @@ EXPORT_SYMBOL_GPL (usb_hc_died); /*-------------------------------------------------------------------------*/ -static void hcd_release (struct usb_bus *bus) -{ - struct usb_hcd *hcd; - - hcd = container_of(bus, struct usb_hcd, self); - kfree(hcd); -} - /** * usb_create_hcd - create and initialize an HCD structure * @driver: HC driver that will use this hcd @@ -1705,10 +1670,9 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver, return NULL; } dev_set_drvdata(dev, hcd); + kref_init(&hcd->kref); usb_bus_init(&hcd->self); - hcd->self.hcpriv = hcd; - hcd->self.release = &hcd_release; hcd->self.controller = dev; hcd->self.bus_name = bus_name; hcd->self.uses_dma = (dev->dma_mask != NULL); @@ -1725,10 +1689,25 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver, } EXPORT_SYMBOL (usb_create_hcd); +static void hcd_release (struct kref *kref) +{ + struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref); + + kfree(hcd); +} + +struct usb_hcd *usb_get_hcd (struct usb_hcd *hcd) +{ + if (hcd) + kref_get (&hcd->kref); + return hcd; +} +EXPORT_SYMBOL (usb_get_hcd); + void usb_put_hcd (struct usb_hcd *hcd) { - dev_set_drvdata(hcd->self.controller, NULL); - usb_bus_put(&hcd->self); + if (hcd) + kref_put (&hcd->kref, hcd_release); } EXPORT_SYMBOL (usb_put_hcd); diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index 83e229914797..7a2bcba2ae61 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -55,12 +55,13 @@ /*-------------------------------------------------------------------------*/ -struct usb_hcd { /* usb_bus.hcpriv points to this */ +struct usb_hcd { /* * housekeeping */ struct usb_bus self; /* hcd is-a bus */ + struct kref kref; /* reference counter */ const char *product_desc; /* product/vendor string */ char irq_descr[24]; /* driver + bus # */ @@ -129,8 +130,10 @@ static inline struct usb_bus *hcd_to_bus (struct usb_hcd *hcd) return &hcd->self; } - -// urb.hcpriv is really hardware-specific +static inline struct usb_hcd *bus_to_hcd (struct usb_bus *bus) +{ + return container_of(bus, struct usb_hcd, self); +} struct hcd_timeout { /* timeouts we allocate */ struct list_head timeout_list; @@ -210,6 +213,7 @@ extern int usb_hcd_get_frame_number (struct usb_device *udev); extern struct usb_hcd *usb_create_hcd (const struct hc_driver *driver, struct device *dev, char *bus_name); +extern struct usb_hcd *usb_get_hcd (struct usb_hcd *hcd); extern void usb_put_hcd (struct usb_hcd *hcd); extern int usb_add_hcd(struct usb_hcd *hcd, unsigned int irqnum, unsigned long irqflags); @@ -356,9 +360,6 @@ extern struct list_head usb_bus_list; extern struct mutex usb_bus_list_lock; extern wait_queue_head_t usb_kill_urb_queue; -extern struct usb_bus *usb_bus_get (struct usb_bus *bus); -extern void usb_bus_put (struct usb_bus *bus); - extern void usb_enable_root_hub_irq (struct usb_bus *bus); extern int usb_find_interface_driver (struct usb_device *dev, diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 7ab9d29215f8..b0c0a993338f 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -169,7 +169,7 @@ static void usb_release_dev(struct device *dev) udev = to_usb_device(dev); usb_destroy_configuration(udev); - usb_bus_put(udev->bus); + usb_put_hcd(bus_to_hcd(udev->bus)); kfree(udev->product); kfree(udev->manufacturer); kfree(udev->serial); @@ -197,8 +197,7 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1) if (!dev) return NULL; - bus = usb_bus_get(bus); - if (!bus) { + if (!usb_get_hcd(bus_to_hcd(bus))) { kfree(dev); return NULL; } diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index 7d1c22c34957..fdab97a27c08 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -889,11 +889,9 @@ EXPORT_SYMBOL (net2280_set_fifo_mode); static void dummy_gadget_release (struct device *dev) { -#if 0 /* usb_bus_put isn't EXPORTed! */ struct dummy *dum = gadget_dev_to_dummy (dev); - usb_bus_put (&dummy_to_hcd (dum)->self); -#endif + usb_put_hcd (dummy_to_hcd (dum)); } static int dummy_udc_probe (struct platform_device *pdev) @@ -915,9 +913,7 @@ static int dummy_udc_probe (struct platform_device *pdev) if (rc < 0) return rc; -#if 0 /* usb_bus_get isn't EXPORTed! */ - usb_bus_get (&dummy_to_hcd (dum)->self); -#endif + usb_get_hcd (dummy_to_hcd (dum)); platform_set_drvdata (pdev, dum); device_create_file (&dum->gadget.dev, &dev_attr_function); diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 65ac9fef3a7c..215ce6d06394 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -451,7 +451,7 @@ show_async (struct class_device *class_dev, char *buf) *buf = 0; bus = class_get_devdata(class_dev); - hcd = bus->hcpriv; + hcd = bus_to_hcd(bus); ehci = hcd_to_ehci (hcd); next = buf; size = PAGE_SIZE; @@ -497,7 +497,7 @@ show_periodic (struct class_device *class_dev, char *buf) seen_count = 0; bus = class_get_devdata(class_dev); - hcd = bus->hcpriv; + hcd = bus_to_hcd(bus); ehci = hcd_to_ehci (hcd); next = buf; size = PAGE_SIZE; @@ -634,7 +634,7 @@ show_registers (struct class_device *class_dev, char *buf) static char label [] = ""; bus = class_get_devdata(class_dev); - hcd = bus->hcpriv; + hcd = bus_to_hcd(bus); ehci = hcd_to_ehci (hcd); next = buf; size = PAGE_SIZE; diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c index da52609a9290..534d07dcb824 100644 --- a/drivers/usb/host/ohci-dbg.c +++ b/drivers/usb/host/ohci-dbg.c @@ -477,7 +477,7 @@ show_async (struct class_device *class_dev, char *buf) unsigned long flags; bus = class_get_devdata(class_dev); - hcd = bus->hcpriv; + hcd = bus_to_hcd(bus); ohci = hcd_to_ohci(hcd); /* display control and bulk lists together, for simplicity */ @@ -510,7 +510,7 @@ show_periodic (struct class_device *class_dev, char *buf) seen_count = 0; bus = class_get_devdata(class_dev); - hcd = bus->hcpriv; + hcd = bus_to_hcd(bus); ohci = hcd_to_ohci(hcd); next = buf; size = PAGE_SIZE; @@ -607,7 +607,7 @@ show_registers (struct class_device *class_dev, char *buf) u32 rdata; bus = class_get_devdata(class_dev); - hcd = bus->hcpriv; + hcd = bus_to_hcd(bus); ohci = hcd_to_ohci(hcd); regs = ohci->regs; next = buf; diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c index 275a66f83058..e0ed36cdfd8b 100644 --- a/drivers/usb/mon/mon_main.c +++ b/drivers/usb/mon/mon_main.c @@ -265,7 +265,6 @@ static void mon_dissolve(struct mon_bus *mbus, struct usb_bus *ubus) ubus->mon_bus = NULL; mbus->u_bus = NULL; mb(); - // usb_bus_put(ubus); } /* @@ -297,10 +296,9 @@ static void mon_bus_init(struct dentry *mondir, struct usb_bus *ubus) INIT_LIST_HEAD(&mbus->r_list); /* - * This usb_bus_get here is superfluous, because we receive - * a notification if usb_bus is about to be removed. + * We don't need to take a reference to ubus, because we receive + * a notification if the bus is about to be removed. */ - // usb_bus_get(ubus); mbus->u_bus = ubus; ubus->mon_bus = mbus; diff --git a/include/linux/usb.h b/include/linux/usb.h index 09661759621f..c66303285a45 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -280,7 +280,6 @@ struct usb_bus { struct usb_devmap devmap; /* device address allocation map */ struct usb_device *root_hub; /* Root hub */ struct list_head bus_list; /* list of busses */ - void *hcpriv; /* Host Controller private data */ int bandwidth_allocated; /* on this bus: how much of the time * reserved for periodic (intr/iso) @@ -295,8 +294,6 @@ struct usb_bus { struct dentry *usbfs_dentry; /* usbfs dentry entry for the bus */ struct class_device *class_dev; /* class device for this bus */ - struct kref kref; /* reference counting for this bus */ - void (*release)(struct usb_bus *bus); #if defined(CONFIG_USB_MON) struct mon_bus *mon_bus; /* non-null when associated */ -- cgit v1.2.3-71-gd317 From b6956ffa595db97656d5901ca8fec77ef272d41a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Aug 2006 15:46:48 -0400 Subject: usbcore: store each usb_device's level in the tree This patch (as778) adds a field to struct usb_device to store the device's level in the USB tree. In itself this number isn't really important. But the overhead is very low, and in a later patch it will be used for preventing bogus warnings from the lockdep checker. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 1 + include/linux/usb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f5adce049b35..78e910b2046c 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2414,6 +2414,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, usb_set_device_state(udev, USB_STATE_POWERED); udev->speed = USB_SPEED_UNKNOWN; udev->bus_mA = hub->mA_per_port; + udev->level = hdev->level + 1; /* set the address */ choose_address(udev); diff --git a/include/linux/usb.h b/include/linux/usb.h index c66303285a45..df5c93eb3ce9 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -348,6 +348,7 @@ struct usb_device { unsigned short bus_mA; /* Current available from the bus */ u8 portnum; /* Parent port number (origin 1) */ + u8 level; /* Number of USB hub ancestors */ int have_langid; /* whether string_langid is valid */ int string_langid; /* language ID for strings */ -- cgit v1.2.3-71-gd317 From 645daaab0b6adc35c1838df2a82f9d729fdb1767 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Aug 2006 15:47:02 -0400 Subject: usbcore: add autosuspend/autoresume infrastructure This patch (as739) adds the basic infrastructure for USB autosuspend and autoresume. The main features are: PM usage counters added to struct usb_device and struct usb_interface, indicating whether it's okay to autosuspend them or they are currently in use. Flag added to usb_device indicating whether the current suspend/resume operation originated from outside or as an autosuspend/autoresume. Flag added to usb_driver indicating whether the driver supports autosuspend. If not, no device bound to the driver will be autosuspended. Mutex added to usb_device for protecting PM operations. Unlike the device semaphore, the locking rule for the pm_mutex is that you must acquire the locks going _up_ the device tree. New routines handling autosuspend/autoresume requests for interfaces and devices. Suspend and resume requests are propagated up the device tree (but not outside the USB subsystem). work_struct added to usb_device, for carrying out delayed autosuspend requests. Autoresume added (and autosuspend prevented) during probe and disconnect. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 419 ++++++++++++++++++++++++++++++++++++++++++---- drivers/usb/core/hub.c | 49 ++++-- drivers/usb/core/usb.c | 23 +++ drivers/usb/core/usb.h | 14 ++ include/linux/usb.h | 33 ++++ 5 files changed, 493 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index a5d11461f5a9..2b2000ac05ab 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -157,12 +157,13 @@ static int usb_probe_device(struct device *dev) udev = to_usb_device(dev); - /* FIXME: resume a suspended device */ - if (udev->state == USB_STATE_SUSPENDED) - return -EHOSTUNREACH; - /* TODO: Add real matching code */ + /* The device should always appear to be in use + * unless the driver suports autosuspend. + */ + udev->pm_usage_cnt = !(udriver->supports_autosuspend); + error = udriver->probe(udev); return error; } @@ -182,6 +183,7 @@ static int usb_probe_interface(struct device *dev) { struct usb_driver *driver = to_usb_driver(dev->driver); struct usb_interface *intf; + struct usb_device *udev; const struct usb_device_id *id; int error = -ENODEV; @@ -191,10 +193,7 @@ static int usb_probe_interface(struct device *dev) return error; intf = to_usb_interface(dev); - - /* FIXME we'd much prefer to just resume it ... */ - if (interface_to_usbdev(intf)->state == USB_STATE_SUSPENDED) - return -EHOSTUNREACH; + udev = interface_to_usbdev(intf); id = usb_match_id(intf, driver->id_table); if (!id) @@ -202,18 +201,31 @@ static int usb_probe_interface(struct device *dev) if (id) { dev_dbg(dev, "%s - got id\n", __FUNCTION__); + error = usb_autoresume_device(udev, 1); + if (error) + return error; + /* Interface "power state" doesn't correspond to any hardware * state whatsoever. We use it to record when it's bound to * a driver that may start I/0: it's not frozen/quiesced. */ mark_active(intf); intf->condition = USB_INTERFACE_BINDING; + + /* The interface should always appear to be in use + * unless the driver suports autosuspend. + */ + intf->pm_usage_cnt = !(driver->supports_autosuspend); + error = driver->probe(intf, id); if (error) { mark_quiesced(intf); + intf->needs_remote_wakeup = 0; intf->condition = USB_INTERFACE_UNBOUND; } else intf->condition = USB_INTERFACE_BOUND; + + usb_autosuspend_device(udev, 1); } return error; @@ -224,9 +236,15 @@ static int usb_unbind_interface(struct device *dev) { struct usb_driver *driver = to_usb_driver(dev->driver); struct usb_interface *intf = to_usb_interface(dev); + struct usb_device *udev; + int error; intf->condition = USB_INTERFACE_UNBINDING; + /* Autoresume for set_interface call below */ + udev = interface_to_usbdev(intf); + error = usb_autoresume_device(udev, 1); + /* release all urbs for this interface */ usb_disable_interface(interface_to_usbdev(intf), intf); @@ -237,8 +255,13 @@ static int usb_unbind_interface(struct device *dev) intf->altsetting[0].desc.bInterfaceNumber, 0); usb_set_intfdata(intf, NULL); + intf->condition = USB_INTERFACE_UNBOUND; mark_quiesced(intf); + intf->needs_remote_wakeup = 0; + + if (!error) + usb_autosuspend_device(udev, 1); return 0; } @@ -267,14 +290,19 @@ int usb_driver_claim_interface(struct usb_driver *driver, struct usb_interface *iface, void* priv) { struct device *dev = &iface->dev; + struct usb_device *udev = interface_to_usbdev(iface); if (dev->driver) return -EBUSY; dev->driver = &driver->drvwrap.driver; usb_set_intfdata(iface, priv); + + mutex_lock_nested(&udev->pm_mutex, udev->level); iface->condition = USB_INTERFACE_BOUND; mark_active(iface); + iface->pm_usage_cnt = !(driver->supports_autosuspend); + mutex_unlock(&udev->pm_mutex); /* if interface was already added, bind now; else let * the future device_add() bind it, bypassing probe() @@ -304,6 +332,7 @@ void usb_driver_release_interface(struct usb_driver *driver, struct usb_interface *iface) { struct device *dev = &iface->dev; + struct usb_device *udev = interface_to_usbdev(iface); /* this should never happen, don't release something that's not ours */ if (!dev->driver || dev->driver != &driver->drvwrap.driver) @@ -321,8 +350,12 @@ void usb_driver_release_interface(struct usb_driver *driver, dev->driver = NULL; usb_set_intfdata(iface, NULL); + + mutex_lock_nested(&udev->pm_mutex, udev->level); iface->condition = USB_INTERFACE_UNBOUND; mark_quiesced(iface); + iface->needs_remote_wakeup = 0; + mutex_unlock(&udev->pm_mutex); } EXPORT_SYMBOL(usb_driver_release_interface); @@ -751,7 +784,7 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_deregister); #ifdef CONFIG_PM -/* Caller has locked udev */ +/* Caller has locked udev->pm_mutex */ static int suspend_device(struct usb_device *udev, pm_message_t msg) { struct usb_device_driver *udriver; @@ -763,6 +796,7 @@ static int suspend_device(struct usb_device *udev, pm_message_t msg) /* For devices that don't have a driver, we do a standard suspend. */ if (udev->dev.driver == NULL) { + udev->do_remote_wakeup = 0; status = usb_port_suspend(udev); goto done; } @@ -771,12 +805,13 @@ static int suspend_device(struct usb_device *udev, pm_message_t msg) status = udriver->suspend(udev, msg); done: + // dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status); if (status == 0) udev->dev.power.power_state.event = msg.event; return status; } -/* Caller has locked udev */ +/* Caller has locked udev->pm_mutex */ static int resume_device(struct usb_device *udev) { struct usb_device_driver *udriver; @@ -796,12 +831,13 @@ static int resume_device(struct usb_device *udev) status = udriver->resume(udev); done: + // dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status); if (status == 0) udev->dev.power.power_state.event = PM_EVENT_ON; return status; } -/* Caller has locked intf's usb_device */ +/* Caller has locked intf's usb_device's pm_mutex */ static int suspend_interface(struct usb_interface *intf, pm_message_t msg) { struct usb_driver *driver; @@ -812,31 +848,33 @@ static int suspend_interface(struct usb_interface *intf, pm_message_t msg) !is_active(intf)) goto done; - if (intf->dev.driver == NULL) /* This can't happen */ + if (intf->condition == USB_INTERFACE_UNBOUND) /* This can't happen */ goto done; driver = to_usb_driver(intf->dev.driver); if (driver->suspend && driver->resume) { status = driver->suspend(intf, msg); - if (status) + if (status == 0) + mark_quiesced(intf); + else if (!interface_to_usbdev(intf)->auto_pm) dev_err(&intf->dev, "%s error %d\n", "suspend", status); - else - mark_quiesced(intf); } else { // FIXME else if there's no suspend method, disconnect... + // Not possible if auto_pm is set... dev_warn(&intf->dev, "no suspend for driver %s?\n", driver->name); mark_quiesced(intf); } done: + // dev_dbg(&intf->dev, "%s: status %d\n", __FUNCTION__, status); if (status == 0) intf->dev.power.power_state.event = msg.event; return status; } -/* Caller has locked intf's usb_device */ +/* Caller has locked intf's usb_device's pm_mutex */ static int resume_interface(struct usb_interface *intf) { struct usb_driver *driver; @@ -846,8 +884,12 @@ static int resume_interface(struct usb_interface *intf) is_active(intf)) goto done; + /* Don't let autoresume interfere with unbinding */ + if (intf->condition == USB_INTERFACE_UNBINDING) + goto done; + /* Can't resume it if it doesn't have a driver. */ - if (intf->dev.driver == NULL) { + if (intf->condition == USB_INTERFACE_UNBOUND) { status = -ENOTCONN; goto done; } @@ -867,18 +909,88 @@ static int resume_interface(struct usb_interface *intf) } done: + // dev_dbg(&intf->dev, "%s: status %d\n", __FUNCTION__, status); if (status == 0) intf->dev.power.power_state.event = PM_EVENT_ON; return status; } -/* Caller has locked udev */ +/** + * usb_suspend_both - suspend a USB device and its interfaces + * @udev: the usb_device to suspend + * @msg: Power Management message describing this state transition + * + * This is the central routine for suspending USB devices. It calls the + * suspend methods for all the interface drivers in @udev and then calls + * the suspend method for @udev itself. If an error occurs at any stage, + * all the interfaces which were suspended are resumed so that they remain + * in the same state as the device. + * + * If an autosuspend is in progress (@udev->auto_pm is set), the routine + * checks first to make sure that neither the device itself or any of its + * active interfaces is in use (pm_usage_cnt is greater than 0). If they + * are, the autosuspend fails. + * + * If the suspend succeeds, the routine recursively queues an autosuspend + * request for @udev's parent device, thereby propagating the change up + * the device tree. If all of the parent's children are now suspended, + * the parent will autosuspend in turn. + * + * The suspend method calls are subject to mutual exclusion under control + * of @udev's pm_mutex. Many of these calls are also under the protection + * of @udev's device lock (including all requests originating outside the + * USB subsystem), but autosuspend requests generated by a child device or + * interface driver may not be. Usbcore will insure that the method calls + * do not arrive during bind, unbind, or reset operations. However, drivers + * must be prepared to handle suspend calls arriving at unpredictable times. + * The only way to block such calls is to do an autoresume (preventing + * autosuspends) while holding @udev's device lock (preventing outside + * suspends). + * + * The caller must hold @udev->pm_mutex. + * + * This routine can run only in process context. + */ int usb_suspend_both(struct usb_device *udev, pm_message_t msg) { int status = 0; int i = 0; struct usb_interface *intf; + struct usb_device *parent = udev->parent; + + cancel_delayed_work(&udev->autosuspend); + if (udev->state == USB_STATE_NOTATTACHED) + return 0; + if (udev->state == USB_STATE_SUSPENDED) + return 0; + udev->do_remote_wakeup = device_may_wakeup(&udev->dev); + + /* For autosuspend, fail fast if anything is in use. + * Also fail if any interfaces require remote wakeup but it + * isn't available. */ + if (udev->auto_pm) { + if (udev->pm_usage_cnt > 0) + return -EBUSY; + if (udev->actconfig) { + for (; i < udev->actconfig->desc.bNumInterfaces; i++) { + intf = udev->actconfig->interface[i]; + if (!is_active(intf)) + continue; + if (intf->pm_usage_cnt > 0) + return -EBUSY; + if (intf->needs_remote_wakeup && + !udev->do_remote_wakeup) { + dev_dbg(&udev->dev, + "remote wakeup needed for autosuspend\n"); + return -EOPNOTSUPP; + } + } + i = 0; + } + } + + /* Suspend all the interfaces and then udev itself */ if (udev->actconfig) { for (; i < udev->actconfig->desc.bNumInterfaces; i++) { intf = udev->actconfig->interface[i]; @@ -896,40 +1008,282 @@ int usb_suspend_both(struct usb_device *udev, pm_message_t msg) intf = udev->actconfig->interface[i]; resume_interface(intf); } - } + + /* If the suspend succeeded, propagate it up the tree */ + } else if (parent) + usb_autosuspend_device(parent, 0); + + // dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status); return status; } -/* Caller has locked udev */ +/** + * usb_resume_both - resume a USB device and its interfaces + * @udev: the usb_device to resume + * + * This is the central routine for resuming USB devices. It calls the + * the resume method for @udev and then calls the resume methods for all + * the interface drivers in @udev. + * + * Before starting the resume, the routine calls itself recursively for + * the parent device of @udev, thereby propagating the change up the device + * tree and assuring that @udev will be able to resume. If the parent is + * unable to resume successfully, the routine fails. + * + * The resume method calls are subject to mutual exclusion under control + * of @udev's pm_mutex. Many of these calls are also under the protection + * of @udev's device lock (including all requests originating outside the + * USB subsystem), but autoresume requests generated by a child device or + * interface driver may not be. Usbcore will insure that the method calls + * do not arrive during bind, unbind, or reset operations. However, drivers + * must be prepared to handle resume calls arriving at unpredictable times. + * The only way to block such calls is to do an autoresume (preventing + * other autoresumes) while holding @udev's device lock (preventing outside + * resumes). + * + * The caller must hold @udev->pm_mutex. + * + * This routine can run only in process context. + */ int usb_resume_both(struct usb_device *udev) { - int status; + int status = 0; int i; struct usb_interface *intf; + struct usb_device *parent = udev->parent; + + cancel_delayed_work(&udev->autosuspend); + if (udev->state == USB_STATE_NOTATTACHED) + return -ENODEV; - /* Can't resume if the parent is suspended */ - if (udev->parent && udev->parent->state == USB_STATE_SUSPENDED) { - dev_warn(&udev->dev, "can't resume; parent is suspended\n"); - return -EHOSTUNREACH; + /* Propagate the resume up the tree, if necessary */ + if (udev->state == USB_STATE_SUSPENDED) { + if (parent) { + mutex_lock_nested(&parent->pm_mutex, parent->level); + parent->auto_pm = 1; + status = usb_resume_both(parent); + } else { + + /* We can't progagate beyond the USB subsystem, + * so if a root hub's controller is suspended + * then we're stuck. */ + if (udev->dev.parent->power.power_state.event != + PM_EVENT_ON) + status = -EHOSTUNREACH; + } + if (status == 0 && udev->state == USB_STATE_SUSPENDED) + status = resume_device(udev); + if (parent) + mutex_unlock(&parent->pm_mutex); } - status = resume_device(udev); + /* Now the parent won't suspend until we are finished */ + if (status == 0 && udev->actconfig) { for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { intf = udev->actconfig->interface[i]; resume_interface(intf); } } + + // dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status); + return status; +} + +#ifdef CONFIG_USB_SUSPEND + +/** + * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces + * @udev - the usb_device to autosuspend + * @dec_usage_cnt - flag to decrement @udev's PM-usage counter + * + * This routine should be called when a core subsystem is finished using + * @udev and wants to allow it to autosuspend. Examples would be when + * @udev's device file in usbfs is closed or after a configuration change. + * + * @dec_usage_cnt should be 1 if the subsystem previously incremented + * @udev's usage counter (such as by passing 1 to usb_autoresume_device); + * otherwise it should be 0. + * + * If the usage counter for @udev or any of its active interfaces is greater + * than 0, the autosuspend request will not be queued. (If an interface + * driver does not support autosuspend then its usage counter is permanently + * positive.) Likewise, if an interface driver requires remote-wakeup + * capability during autosuspend but remote wakeup is disabled, the + * autosuspend will fail. + * + * Often the caller will hold @udev's device lock, but this is not + * necessary. + * + * This routine can run only in process context. + */ +void usb_autosuspend_device(struct usb_device *udev, int dec_usage_cnt) +{ + mutex_lock_nested(&udev->pm_mutex, udev->level); + udev->pm_usage_cnt -= dec_usage_cnt; + if (udev->pm_usage_cnt <= 0) + schedule_delayed_work(&udev->autosuspend, + USB_AUTOSUSPEND_DELAY); + mutex_unlock(&udev->pm_mutex); + // dev_dbg(&udev->dev, "%s: cnt %d\n", + // __FUNCTION__, udev->pm_usage_cnt); +} + +/** + * usb_autoresume_device - immediately autoresume a USB device and its interfaces + * @udev - the usb_device to autoresume + * @inc_usage_cnt - flag to increment @udev's PM-usage counter + * + * This routine should be called when a core subsystem wants to use @udev + * and needs to guarantee that it is not suspended. In addition, the + * caller can prevent @udev from being autosuspended subsequently. (Note + * that this will not prevent suspend events originating in the PM core.) + * Examples would be when @udev's device file in usbfs is opened (autosuspend + * should be prevented until the file is closed) or when a remote-wakeup + * request is received (later autosuspends should not be prevented). + * + * @inc_usage_cnt should be 1 to increment @udev's usage counter and prevent + * autosuspends. This prevention will persist until the usage counter is + * decremented again (such as by passing 1 to usb_autosuspend_device). + * Otherwise @inc_usage_cnt should be 0 to leave the usage counter unchanged. + * Regardless, if the autoresume fails then the usage counter is not + * incremented. + * + * Often the caller will hold @udev's device lock, but this is not + * necessary (and attempting it might cause deadlock). + * + * This routine can run only in process context. + */ +int usb_autoresume_device(struct usb_device *udev, int inc_usage_cnt) +{ + int status; + + mutex_lock_nested(&udev->pm_mutex, udev->level); + udev->pm_usage_cnt += inc_usage_cnt; + udev->auto_pm = 1; + status = usb_resume_both(udev); + if (status != 0) + udev->pm_usage_cnt -= inc_usage_cnt; + mutex_unlock(&udev->pm_mutex); + // dev_dbg(&udev->dev, "%s: status %d cnt %d\n", + // __FUNCTION__, status, udev->pm_usage_cnt); + return status; +} + +/** + * usb_autopm_put_interface - decrement a USB interface's PM-usage counter + * @intf - the usb_interface whose counter should be decremented + * + * This routine should be called by an interface driver when it is + * finished using @intf and wants to allow it to autosuspend. A typical + * example would be a character-device driver when its device file is + * closed. + * + * The routine decrements @intf's usage counter. When the counter reaches + * 0, a delayed autosuspend request for @intf's device is queued. When + * the delay expires, if @intf->pm_usage_cnt is still <= 0 along with all + * the other usage counters for the sibling interfaces and @intf's + * usb_device, the device and all its interfaces will be autosuspended. + * + * Note that @intf->pm_usage_cnt is owned by the interface driver. The + * core will not change its value other than the increment and decrement + * in usb_autopm_get_interface and usb_autopm_put_interface. The driver + * may use this simple counter-oriented discipline or may set the value + * any way it likes. + * + * If the driver has set @intf->needs_remote_wakeup then autosuspend will + * take place only if the device's remote-wakeup facility is enabled. + * + * Suspend method calls queued by this routine can arrive at any time + * while @intf is resumed and its usage counter is equal to 0. They are + * not protected by the usb_device's lock but only by its pm_mutex. + * Drivers must provide their own synchronization. + * + * This routine can run only in process context. + */ +void usb_autopm_put_interface(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + + mutex_lock_nested(&udev->pm_mutex, udev->level); + if (intf->condition != USB_INTERFACE_UNBOUND) { + if (--intf->pm_usage_cnt <= 0) + schedule_delayed_work(&udev->autosuspend, + USB_AUTOSUSPEND_DELAY); + } + mutex_unlock(&udev->pm_mutex); + // dev_dbg(&intf->dev, "%s: cnt %d\n", + // __FUNCTION__, intf->pm_usage_cnt); +} +EXPORT_SYMBOL_GPL(usb_autopm_put_interface); + +/** + * usb_autopm_get_interface - increment a USB interface's PM-usage counter + * @intf - the usb_interface whose counter should be incremented + * + * This routine should be called by an interface driver when it wants to + * use @intf and needs to guarantee that it is not suspended. In addition, + * the routine prevents @intf from being autosuspended subsequently. (Note + * that this will not prevent suspend events originating in the PM core.) + * This prevention will persist until usb_autopm_put_interface() is called + * or @intf is unbound. A typical example would be a character-device + * driver when its device file is opened. + * + * The routine increments @intf's usage counter. So long as the counter + * is greater than 0, autosuspend will not be allowed for @intf or its + * usb_device. When the driver is finished using @intf it should call + * usb_autopm_put_interface() to decrement the usage counter and queue + * a delayed autosuspend request (if the counter is <= 0). + * + * Note that @intf->pm_usage_cnt is owned by the interface driver. The + * core will not change its value other than the increment and decrement + * in usb_autopm_get_interface and usb_autopm_put_interface. The driver + * may use this simple counter-oriented discipline or may set the value + * any way it likes. + * + * Resume method calls generated by this routine can arrive at any time + * while @intf is suspended. They are not protected by the usb_device's + * lock but only by its pm_mutex. Drivers must provide their own + * synchronization. + * + * This routine can run only in process context. + */ +int usb_autopm_get_interface(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + int status; + + mutex_lock_nested(&udev->pm_mutex, udev->level); + if (intf->condition == USB_INTERFACE_UNBOUND) + status = -ENODEV; + else { + ++intf->pm_usage_cnt; + udev->auto_pm = 1; + status = usb_resume_both(udev); + if (status != 0) + --intf->pm_usage_cnt; + } + mutex_unlock(&udev->pm_mutex); + // dev_dbg(&intf->dev, "%s: status %d cnt %d\n", + // __FUNCTION__, status, intf->pm_usage_cnt); return status; } +EXPORT_SYMBOL_GPL(usb_autopm_get_interface); + +#endif /* CONFIG_USB_SUSPEND */ static int usb_suspend(struct device *dev, pm_message_t message) { int status; - if (is_usb_device(dev)) - status = usb_suspend_both(to_usb_device(dev), message); - else + if (is_usb_device(dev)) { + struct usb_device *udev = to_usb_device(dev); + + mutex_lock_nested(&udev->pm_mutex, udev->level); + udev->auto_pm = 0; + status = usb_suspend_both(udev, message); + mutex_unlock(&udev->pm_mutex); + } else status = 0; return status; } @@ -939,7 +1293,12 @@ static int usb_resume(struct device *dev) int status; if (is_usb_device(dev)) { - status = usb_resume_both(to_usb_device(dev)); + struct usb_device *udev = to_usb_device(dev); + + mutex_lock_nested(&udev->pm_mutex, udev->level); + udev->auto_pm = 0; + status = usb_resume_both(udev); + mutex_unlock(&udev->pm_mutex); /* Rebind drivers that had no suspend method? */ } else diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 78e910b2046c..dee812bc6c43 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1017,19 +1017,22 @@ void usb_set_device_state(struct usb_device *udev, if (udev->state == USB_STATE_NOTATTACHED) ; /* do nothing */ else if (new_state != USB_STATE_NOTATTACHED) { - udev->state = new_state; /* root hub wakeup capabilities are managed out-of-band * and may involve silicon errata ... ignore them here. */ if (udev->parent) { - if (new_state == USB_STATE_CONFIGURED) + if (udev->state == USB_STATE_SUSPENDED + || new_state == USB_STATE_SUSPENDED) + ; /* No change to wakeup settings */ + else if (new_state == USB_STATE_CONFIGURED) device_init_wakeup(&udev->dev, (udev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_WAKEUP)); - else if (new_state != USB_STATE_SUSPENDED) + else device_init_wakeup(&udev->dev, 0); } + udev->state = new_state; } else recursively_mark_NOTATTACHED(udev); spin_unlock_irqrestore(&device_state_lock, flags); @@ -1507,7 +1510,7 @@ static int hub_port_suspend(struct usb_hub *hub, int port1, * NOTE: OTG devices may issue remote wakeup (or SRP) even when * we don't explicitly enable it here. */ - if (device_may_wakeup(&udev->dev)) { + if (udev->do_remote_wakeup) { status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, USB_DEVICE_REMOTE_WAKEUP, 0, @@ -1533,7 +1536,8 @@ static int hub_port_suspend(struct usb_hub *hub, int port1, USB_CTRL_SET_TIMEOUT); } else { /* device has up to 10 msec to fully suspend */ - dev_dbg(&udev->dev, "usb suspend\n"); + dev_dbg(&udev->dev, "usb %ssuspend\n", + udev->auto_pm ? "auto-" : ""); usb_set_device_state(udev, USB_STATE_SUSPENDED); msleep(10); } @@ -1573,7 +1577,8 @@ static int __usb_port_suspend (struct usb_device *udev, int port1) status = hub_port_suspend(hdev_to_hub(udev->parent), port1, udev); else { - dev_dbg(&udev->dev, "usb suspend\n"); + dev_dbg(&udev->dev, "usb %ssuspend\n", + udev->auto_pm ? "auto-" : ""); usb_set_device_state(udev, USB_STATE_SUSPENDED); } return status; @@ -1687,7 +1692,8 @@ hub_port_resume(struct usb_hub *hub, int port1, struct usb_device *udev) /* drive resume for at least 20 msec */ if (udev) - dev_dbg(&udev->dev, "RESUME\n"); + dev_dbg(&udev->dev, "usb %sresume\n", + udev->auto_pm ? "auto-" : ""); msleep(25); #define LIVE_FLAGS ( USB_PORT_STAT_POWER \ @@ -1754,8 +1760,11 @@ int usb_port_resume(struct usb_device *udev) // NOTE this fails if parent is also suspended... status = hub_port_resume(hdev_to_hub(udev->parent), udev->portnum, udev); - } else + } else { + dev_dbg(&udev->dev, "usb %sresume\n", + udev->auto_pm ? "auto-" : ""); status = finish_port_resume(udev); + } if (status < 0) dev_dbg(&udev->dev, "can't resume, status %d\n", status); return status; @@ -1765,19 +1774,23 @@ static int remote_wakeup(struct usb_device *udev) { int status = 0; - /* don't repeat RESUME sequence if this device - * was already woken up by some other task - */ + /* All this just to avoid sending a port-resume message + * to the parent hub! */ + usb_lock_device(udev); + mutex_lock_nested(&udev->pm_mutex, udev->level); if (udev->state == USB_STATE_SUSPENDED) { - dev_dbg(&udev->dev, "RESUME (wakeup)\n"); + dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-"); /* TRSMRCY = 10 msec */ msleep(10); status = finish_port_resume(udev); + if (status == 0) + udev->dev.power.power_state.event = PM_EVENT_ON; } + mutex_unlock(&udev->pm_mutex); if (status == 0) - usb_resume_both(udev); + usb_autoresume_device(udev, 0); usb_unlock_device(udev); return status; } @@ -1834,7 +1847,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) == PM_EVENT_ON #endif ) { - dev_dbg(&intf->dev, "port %d nyet suspended\n", port1); + if (!hdev->auto_pm) + dev_dbg(&intf->dev, "port %d nyet suspended\n", + port1); return -EBUSY; } } @@ -2587,7 +2602,7 @@ static void hub_events(void) * stub "device" node was never suspended. */ if (i) - usb_resume_both(hdev); + usb_autoresume_device(hdev, 0); /* If this is an inactive or suspended hub, do nothing */ if (hub->quiescing) @@ -2993,6 +3008,9 @@ int usb_reset_composite_device(struct usb_device *udev, return -EINVAL; } + /* Prevent autosuspend during the reset */ + usb_autoresume_device(udev, 1); + if (iface && iface->condition != USB_INTERFACE_BINDING) iface = NULL; @@ -3034,6 +3052,7 @@ int usb_reset_composite_device(struct usb_device *udev, } } + usb_autosuspend_device(udev, 1); return ret; } EXPORT_SYMBOL(usb_reset_composite_device); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index b0c0a993338f..6b029cdb8671 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -168,6 +168,10 @@ static void usb_release_dev(struct device *dev) udev = to_usb_device(dev); +#ifdef CONFIG_PM + cancel_delayed_work(&udev->autosuspend); + flush_scheduled_work(); +#endif usb_destroy_configuration(udev); usb_put_hcd(bus_to_hcd(udev->bus)); kfree(udev->product); @@ -176,6 +180,21 @@ static void usb_release_dev(struct device *dev) kfree(udev); } +#ifdef CONFIG_PM + +/* usb_autosuspend_work - callback routine to autosuspend a USB device */ +static void usb_autosuspend_work(void *_udev) +{ + struct usb_device *udev = _udev; + + mutex_lock_nested(&udev->pm_mutex, udev->level); + udev->auto_pm = 1; + usb_suspend_both(udev, PMSG_SUSPEND); + mutex_unlock(&udev->pm_mutex); +} + +#endif + /** * usb_alloc_dev - usb device constructor (usbcore-internal) * @parent: hub to which device is connected; null to allocate a root hub @@ -251,6 +270,10 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1) dev->parent = parent; INIT_LIST_HEAD(&dev->filelist); +#ifdef CONFIG_PM + mutex_init(&dev->pm_mutex); + INIT_WORK(&dev->autosuspend, usb_autosuspend_work, dev); +#endif return dev; } diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 5162cb370215..10688ad73c6d 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -49,6 +49,20 @@ static inline int usb_resume_both(struct usb_device *udev) #endif +#ifdef CONFIG_USB_SUSPEND + +#define USB_AUTOSUSPEND_DELAY (HZ*2) + +extern void usb_autosuspend_device(struct usb_device *udev, int dec_busy_cnt); +extern int usb_autoresume_device(struct usb_device *udev, int inc_busy_cnt); + +#else + +#define usb_autosuspend_device(udev, dec_busy_cnt) do {} while (0) +#define usb_autoresume_device(udev, inc_busy_cnt) 0 + +#endif + extern struct bus_type usb_bus_type; extern struct usb_device_driver usb_generic_driver; diff --git a/include/linux/usb.h b/include/linux/usb.h index df5c93eb3ce9..0da15b0b02be 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -19,6 +19,7 @@ #include /* for struct file_operations */ #include /* for struct completion */ #include /* for current && schedule_timeout */ +#include /* for struct mutex */ struct usb_device; struct usb_driver; @@ -103,8 +104,12 @@ enum usb_interface_condition { * @condition: binding state of the interface: not bound, binding * (in probe()), bound to a driver, or unbinding (in disconnect()) * @is_active: flag set when the interface is bound and not suspended. + * @needs_remote_wakeup: flag set when the driver requires remote-wakeup + * capability during autosuspend. * @dev: driver model's view of this device * @class_dev: driver model's class view of this device. + * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not + * allowed unless the counter is 0. * * USB device drivers attach to interfaces on a physical device. Each * interface encapsulates a single high level function, such as feeding @@ -144,9 +149,11 @@ struct usb_interface { * bound to */ enum usb_interface_condition condition; /* state of binding */ unsigned is_active:1; /* the interface is not suspended */ + unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ struct device dev; /* interface specific device info */ struct class_device *class_dev; + int pm_usage_cnt; /* usage counter for autosuspend */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) #define interface_to_usbdev(intf) \ @@ -372,6 +379,15 @@ struct usb_device { int maxchild; /* Number of ports if hub */ struct usb_device *children[USB_MAXCHILDREN]; + +#ifdef CONFIG_PM + struct work_struct autosuspend; /* for delayed autosuspends */ + struct mutex pm_mutex; /* protects PM operations */ + int pm_usage_cnt; /* usage counter for autosuspend */ + + unsigned auto_pm:1; /* autosuspend/resume in progress */ + unsigned do_remote_wakeup:1; /* remote wakeup should be enabled */ +#endif }; #define to_usb_device(d) container_of(d, struct usb_device, dev) @@ -392,6 +408,17 @@ extern int usb_reset_composite_device(struct usb_device *dev, extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); +/* USB autosuspend and autoresume */ +#ifdef CONFIG_USB_SUSPEND +extern int usb_autopm_get_interface(struct usb_interface *intf); +extern void usb_autopm_put_interface(struct usb_interface *intf); + +#else +#define usb_autopm_get_interface(intf) 0 +#define usb_autopm_put_interface(intf) do {} while (0) +#endif + + /*-------------------------------------------------------------------------*/ /* for drivers using iso endpoints */ @@ -593,6 +620,8 @@ struct usbdrv_wrap { * @drvwrap: Driver-model core structure wrapper. * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be * added to this driver by preventing the sysfs file from being created. + * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend + * for interfaces bound to this driver. * * USB interface drivers must provide a name, probe() and disconnect() * methods, and an id_table. Other driver fields are optional. @@ -631,6 +660,7 @@ struct usb_driver { struct usb_dynids dynids; struct usbdrv_wrap drvwrap; unsigned int no_dynamic_id:1; + unsigned int supports_autosuspend:1; }; #define to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver) @@ -648,6 +678,8 @@ struct usb_driver { * @suspend: Called when the device is going to be suspended by the system. * @resume: Called when the device is being resumed by the system. * @drvwrap: Driver-model core structure wrapper. + * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend + * for devices bound to this driver. * * USB drivers must provide all the fields listed above except drvwrap. */ @@ -660,6 +692,7 @@ struct usb_device_driver { int (*suspend) (struct usb_device *udev, pm_message_t message); int (*resume) (struct usb_device *udev); struct usbdrv_wrap drvwrap; + unsigned int supports_autosuspend:1; }; #define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ drvwrap.driver) -- cgit v1.2.3-71-gd317 From 29fa06c1292f473ae51a84f55c8fe22179bc1080 Mon Sep 17 00:00:00 2001 From: Rudolf Marek Date: Mon, 28 Aug 2006 14:40:17 +0200 Subject: hwmon: New driver k8temp Add support for the temperature sensor(s) found in AMD K8 CPUs. Signed-off-by: Rudolf Marek Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/Kconfig | 10 ++ drivers/hwmon/Makefile | 1 + drivers/hwmon/k8temp.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 1 + 4 files changed, 304 insertions(+) create mode 100644 drivers/hwmon/k8temp.c (limited to 'include/linux') diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 78c237f8fa0a..d9f86e9d405b 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -94,6 +94,16 @@ config SENSORS_ADM9240 This driver can also be built as a module. If so, the module will be called adm9240. +config SENSORS_K8TEMP + tristate "AMD K8 processor sensor" + depends on HWMON && X86 && PCI && EXPERIMENTAL + help + If you say yes here you get support for the temperature + sensor(s) inside your AMD K8 CPU. + + This driver can also be built as a module. If so, the module + will be called k8temp. + config SENSORS_ASB100 tristate "Asus ASB100 Bach" depends on HWMON && I2C && EXPERIMENTAL diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 31415843a91a..aab4c1063059 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_SENSORS_GL518SM) += gl518sm.o obj-$(CONFIG_SENSORS_GL520SM) += gl520sm.o obj-$(CONFIG_SENSORS_HDAPS) += hdaps.o obj-$(CONFIG_SENSORS_IT87) += it87.o +obj-$(CONFIG_SENSORS_K8TEMP) += k8temp.o obj-$(CONFIG_SENSORS_LM63) += lm63.o obj-$(CONFIG_SENSORS_LM70) += lm70.o obj-$(CONFIG_SENSORS_LM75) += lm75.o diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c new file mode 100644 index 000000000000..50162ffa8832 --- /dev/null +++ b/drivers/hwmon/k8temp.c @@ -0,0 +1,292 @@ +/* + * k8temp.c - Linux kernel module for hardware monitoring + * + * Copyright (C) 2006 Rudolf Marek + * + * Inspired from the w83785 and amd756 drivers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TEMP_FROM_REG(val) (((((val) >> 16) & 0xff) - 49) * 1000) +#define REG_TEMP 0xe4 +#define SEL_PLACE 0x40 +#define SEL_CORE 0x04 + +struct k8temp_data { + struct class_device *class_dev; + struct mutex update_lock; + const char *name; + char valid; /* zero until following fields are valid */ + unsigned long last_updated; /* in jiffies */ + + /* registers values */ + u8 sensorsp; /* sensor presence bits - SEL_CORE & SEL_PLACE */ + u32 temp[2][2]; /* core, place */ +}; + +static struct k8temp_data *k8temp_update_device(struct device *dev) +{ + struct k8temp_data *data = dev_get_drvdata(dev); + struct pci_dev *pdev = to_pci_dev(dev); + u8 tmp; + + mutex_lock(&data->update_lock); + + if (!data->valid + || time_after(jiffies, data->last_updated + HZ)) { + pci_read_config_byte(pdev, REG_TEMP, &tmp); + tmp &= ~(SEL_PLACE | SEL_CORE); /* Select sensor 0, core0 */ + pci_write_config_byte(pdev, REG_TEMP, tmp); + pci_read_config_dword(pdev, REG_TEMP, &data->temp[0][0]); + + if (data->sensorsp & SEL_PLACE) { + tmp |= SEL_PLACE; /* Select sensor 1, core0 */ + pci_write_config_byte(pdev, REG_TEMP, tmp); + pci_read_config_dword(pdev, REG_TEMP, + &data->temp[0][1]); + } + + if (data->sensorsp & SEL_CORE) { + tmp &= ~SEL_PLACE; /* Select sensor 0, core1 */ + tmp |= SEL_CORE; + pci_write_config_byte(pdev, REG_TEMP, tmp); + pci_read_config_dword(pdev, REG_TEMP, + &data->temp[1][0]); + + if (data->sensorsp & SEL_PLACE) { + tmp |= SEL_PLACE; /* Select sensor 1, core1 */ + pci_write_config_byte(pdev, REG_TEMP, tmp); + pci_read_config_dword(pdev, REG_TEMP, + &data->temp[1][1]); + } + } + + data->last_updated = jiffies; + data->valid = 1; + } + + mutex_unlock(&data->update_lock); + return data; +} + +/* + * Sysfs stuff + */ + +static ssize_t show_name(struct device *dev, struct device_attribute + *devattr, char *buf) +{ + struct k8temp_data *data = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", data->name); +} + + +static ssize_t show_temp(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + struct sensor_device_attribute_2 *attr = + to_sensor_dev_attr_2(devattr); + int core = attr->nr; + int place = attr->index; + struct k8temp_data *data = k8temp_update_device(dev); + + return sprintf(buf, "%d\n", + TEMP_FROM_REG(data->temp[core][place])); +} + +/* core, place */ + +static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp, NULL, 0, 1); +static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(temp4_input, S_IRUGO, show_temp, NULL, 1, 1); +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); + +static struct pci_device_id k8temp_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, + { 0 }, +}; + +static int __devinit k8temp_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int err; + u8 scfg; + u32 temp; + struct k8temp_data *data; + u32 cpuid = cpuid_eax(1); + + /* this feature should be available since SH-C0 core */ + if ((cpuid == 0xf40) || (cpuid == 0xf50) || (cpuid == 0xf51)) { + err = -ENODEV; + goto exit; + } + + if (!(data = kzalloc(sizeof(struct k8temp_data), GFP_KERNEL))) { + err = -ENOMEM; + goto exit; + } + + pci_read_config_byte(pdev, REG_TEMP, &scfg); + scfg &= ~(SEL_PLACE | SEL_CORE); /* Select sensor 0, core0 */ + pci_write_config_byte(pdev, REG_TEMP, scfg); + pci_read_config_byte(pdev, REG_TEMP, &scfg); + + if (scfg & (SEL_PLACE | SEL_CORE)) { + dev_err(&pdev->dev, "Configuration bit(s) stuck at 1!\n"); + err = -ENODEV; + goto exit_free; + } + + scfg |= (SEL_PLACE | SEL_CORE); + pci_write_config_byte(pdev, REG_TEMP, scfg); + + /* now we know if we can change core and/or sensor */ + pci_read_config_byte(pdev, REG_TEMP, &data->sensorsp); + + if (data->sensorsp & SEL_PLACE) { + scfg &= ~SEL_CORE; /* Select sensor 1, core0 */ + pci_write_config_byte(pdev, REG_TEMP, scfg); + pci_read_config_dword(pdev, REG_TEMP, &temp); + scfg |= SEL_CORE; /* prepare for next selection */ + if (!((temp >> 16) & 0xff)) /* if temp is 0 -49C is not likely */ + data->sensorsp &= ~SEL_PLACE; + } + + if (data->sensorsp & SEL_CORE) { + scfg &= ~SEL_PLACE; /* Select sensor 0, core1 */ + pci_write_config_byte(pdev, REG_TEMP, scfg); + pci_read_config_dword(pdev, REG_TEMP, &temp); + if (!((temp >> 16) & 0xff)) /* if temp is 0 -49C is not likely */ + data->sensorsp &= ~SEL_CORE; + } + + data->name = "k8temp"; + mutex_init(&data->update_lock); + dev_set_drvdata(&pdev->dev, data); + + /* Register sysfs hooks */ + err = device_create_file(&pdev->dev, + &sensor_dev_attr_temp1_input.dev_attr); + if (err) + goto exit_remove; + + /* sensor can be changed and reports something */ + if (data->sensorsp & SEL_PLACE) { + err = device_create_file(&pdev->dev, + &sensor_dev_attr_temp2_input.dev_attr); + if (err) + goto exit_remove; + } + + /* core can be changed and reports something */ + if (data->sensorsp & SEL_CORE) { + err = device_create_file(&pdev->dev, + &sensor_dev_attr_temp3_input.dev_attr); + if (err) + goto exit_remove; + if (data->sensorsp & SEL_PLACE) + err = device_create_file(&pdev->dev, + &sensor_dev_attr_temp4_input. + dev_attr); + if (err) + goto exit_remove; + } + + err = device_create_file(&pdev->dev, &dev_attr_name); + if (err) + goto exit_remove; + + data->class_dev = hwmon_device_register(&pdev->dev); + + if (IS_ERR(data->class_dev)) { + err = PTR_ERR(data->class_dev); + goto exit_remove; + } + + return 0; + +exit_remove: + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp1_input.dev_attr); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp2_input.dev_attr); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp3_input.dev_attr); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp4_input.dev_attr); + device_remove_file(&pdev->dev, &dev_attr_name); +exit_free: + dev_set_drvdata(&pdev->dev, NULL); + kfree(data); +exit: + return err; +} + +static void __devexit k8temp_remove(struct pci_dev *pdev) +{ + struct k8temp_data *data = dev_get_drvdata(&pdev->dev); + + hwmon_device_unregister(data->class_dev); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp1_input.dev_attr); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp2_input.dev_attr); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp3_input.dev_attr); + device_remove_file(&pdev->dev, + &sensor_dev_attr_temp4_input.dev_attr); + device_remove_file(&pdev->dev, &dev_attr_name); + dev_set_drvdata(&pdev->dev, NULL); + kfree(data); +} + +static struct pci_driver k8temp_driver = { + .name = "k8temp", + .id_table = k8temp_ids, + .probe = k8temp_probe, + .remove = __devexit_p(k8temp_remove), +}; + +static int __init k8temp_init(void) +{ + return pci_register_driver(&k8temp_driver); +} + +static void __exit k8temp_exit(void) +{ + pci_unregister_driver(&k8temp_driver); +} + +MODULE_AUTHOR("Rudolf Marek "); +MODULE_DESCRIPTION("AMD K8 core temperature monitor"); +MODULE_LICENSE("GPL"); + +module_init(k8temp_init) +module_exit(k8temp_exit) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ab032ceafa84..61db1907f06f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -479,6 +479,7 @@ #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD_K8_NB 0x1100 +#define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v1.2.3-71-gd317 From e0318ebff4d96131bb3524308b845f642e64df81 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 26 Sep 2006 14:50:20 -0400 Subject: USB: fix autosuspend when CONFIG_PM isn't set This patch (as791b) fixes things up to avoid compiler warnings or errors when CONFIG_USB_SUSPEND or CONFIG_PM isn't set. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 42 +++++++++++++++++++++--------------------- drivers/usb/core/hub.c | 4 ++-- drivers/usb/core/usb.c | 4 ++-- drivers/usb/core/usb.h | 18 +++++++++++++++++- include/linux/usb.h | 2 +- 5 files changed, 43 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index ee18d187ca17..113e484c763e 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -303,11 +303,11 @@ int usb_driver_claim_interface(struct usb_driver *driver, dev->driver = &driver->drvwrap.driver; usb_set_intfdata(iface, priv); - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); iface->condition = USB_INTERFACE_BOUND; mark_active(iface); iface->pm_usage_cnt = !(driver->supports_autosuspend); - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); /* if interface was already added, bind now; else let * the future device_add() bind it, bypassing probe() @@ -356,11 +356,11 @@ void usb_driver_release_interface(struct usb_driver *driver, dev->driver = NULL; usb_set_intfdata(iface, NULL); - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); iface->condition = USB_INTERFACE_UNBOUND; mark_quiesced(iface); iface->needs_remote_wakeup = 0; - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); } EXPORT_SYMBOL(usb_driver_release_interface); @@ -789,7 +789,7 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_deregister); #ifdef CONFIG_PM -/* Caller has locked udev->pm_mutex */ +/* Caller has locked udev's pm_mutex */ static int suspend_device(struct usb_device *udev, pm_message_t msg) { struct usb_device_driver *udriver; @@ -816,7 +816,7 @@ done: return status; } -/* Caller has locked udev->pm_mutex */ +/* Caller has locked udev's pm_mutex */ static int resume_device(struct usb_device *udev) { struct usb_device_driver *udriver; @@ -842,7 +842,7 @@ done: return status; } -/* Caller has locked intf's usb_device's pm_mutex */ +/* Caller has locked intf's usb_device's pm mutex */ static int suspend_interface(struct usb_interface *intf, pm_message_t msg) { struct usb_driver *driver; @@ -1064,7 +1064,7 @@ int usb_resume_both(struct usb_device *udev) /* Propagate the resume up the tree, if necessary */ if (udev->state == USB_STATE_SUSPENDED) { if (parent) { - mutex_lock_nested(&parent->pm_mutex, parent->level); + usb_pm_lock(parent); parent->auto_pm = 1; status = usb_resume_both(parent); } else { @@ -1079,7 +1079,7 @@ int usb_resume_both(struct usb_device *udev) if (status == 0) status = resume_device(udev); if (parent) - mutex_unlock(&parent->pm_mutex); + usb_pm_unlock(parent); } else { /* Needed only for setting udev->dev.power.power_state.event @@ -1129,12 +1129,12 @@ int usb_resume_both(struct usb_device *udev) */ void usb_autosuspend_device(struct usb_device *udev, int dec_usage_cnt) { - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); udev->pm_usage_cnt -= dec_usage_cnt; if (udev->pm_usage_cnt <= 0) queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend, USB_AUTOSUSPEND_DELAY); - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); // dev_dbg(&udev->dev, "%s: cnt %d\n", // __FUNCTION__, udev->pm_usage_cnt); } @@ -1168,13 +1168,13 @@ int usb_autoresume_device(struct usb_device *udev, int inc_usage_cnt) { int status; - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); udev->pm_usage_cnt += inc_usage_cnt; udev->auto_pm = 1; status = usb_resume_both(udev); if (status != 0) udev->pm_usage_cnt -= inc_usage_cnt; - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); // dev_dbg(&udev->dev, "%s: status %d cnt %d\n", // __FUNCTION__, status, udev->pm_usage_cnt); return status; @@ -1215,13 +1215,13 @@ void usb_autopm_put_interface(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); if (intf->condition != USB_INTERFACE_UNBOUND && --intf->pm_usage_cnt <= 0) { queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend, USB_AUTOSUSPEND_DELAY); } - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); // dev_dbg(&intf->dev, "%s: cnt %d\n", // __FUNCTION__, intf->pm_usage_cnt); } @@ -1263,7 +1263,7 @@ int usb_autopm_get_interface(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); int status; - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); if (intf->condition == USB_INTERFACE_UNBOUND) status = -ENODEV; else { @@ -1273,7 +1273,7 @@ int usb_autopm_get_interface(struct usb_interface *intf) if (status != 0) --intf->pm_usage_cnt; } - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); // dev_dbg(&intf->dev, "%s: status %d cnt %d\n", // __FUNCTION__, status, intf->pm_usage_cnt); return status; @@ -1289,10 +1289,10 @@ static int usb_suspend(struct device *dev, pm_message_t message) if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); udev->auto_pm = 0; status = usb_suspend_both(udev, message); - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); } else status = 0; return status; @@ -1305,10 +1305,10 @@ static int usb_resume(struct device *dev) if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); udev->auto_pm = 0; status = usb_resume_both(udev); - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); /* Rebind drivers that had no suspend method? */ } else diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2a8cb3c2b19c..7676690a0386 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1779,7 +1779,7 @@ static int remote_wakeup(struct usb_device *udev) * to the parent hub! */ usb_lock_device(udev); - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); if (udev->state == USB_STATE_SUSPENDED) { dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-"); /* TRSMRCY = 10 msec */ @@ -1788,7 +1788,7 @@ static int remote_wakeup(struct usb_device *udev) if (status == 0) udev->dev.power.power_state.event = PM_EVENT_ON; } - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); if (status == 0) usb_autoresume_device(udev, 0); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 239f8e5d247f..e4df9edf1bc0 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -214,10 +214,10 @@ static void usb_autosuspend_work(void *_udev) { struct usb_device *udev = _udev; - mutex_lock_nested(&udev->pm_mutex, udev->level); + usb_pm_lock(udev); udev->auto_pm = 1; usb_suspend_both(udev, PMSG_SUSPEND); - mutex_unlock(&udev->pm_mutex); + usb_pm_unlock(udev); } #else diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index fb6eb41c374f..f69df137ec0e 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -36,6 +36,16 @@ extern int usb_resume_both(struct usb_device *udev); extern int usb_port_suspend(struct usb_device *dev); extern int usb_port_resume(struct usb_device *dev); +static inline void usb_pm_lock(struct usb_device *udev) +{ + mutex_lock_nested(&udev->pm_mutex, udev->level); +} + +static inline void usb_pm_unlock(struct usb_device *udev) +{ + mutex_unlock(&udev->pm_mutex); +} + #else #define usb_suspend_both(udev, msg) 0 @@ -45,6 +55,8 @@ static inline int usb_resume_both(struct usb_device *udev) } #define usb_port_suspend(dev) 0 #define usb_port_resume(dev) 0 +static inline void usb_pm_lock(struct usb_device *udev) {} +static inline void usb_pm_unlock(struct usb_device *udev) {} #endif @@ -58,7 +70,11 @@ extern int usb_autoresume_device(struct usb_device *udev, int inc_busy_cnt); #else #define usb_autosuspend_device(udev, dec_busy_cnt) do {} while (0) -#define usb_autoresume_device(udev, inc_busy_cnt) 0 +static inline int usb_autoresume_device(struct usb_device *udev, + int inc_busy_cnt) +{ + return 0; +} #endif diff --git a/include/linux/usb.h b/include/linux/usb.h index 0da15b0b02be..190cc1b78fe2 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -380,10 +380,10 @@ struct usb_device { int maxchild; /* Number of ports if hub */ struct usb_device *children[USB_MAXCHILDREN]; + int pm_usage_cnt; /* usage counter for autosuspend */ #ifdef CONFIG_PM struct work_struct autosuspend; /* for delayed autosuspends */ struct mutex pm_mutex; /* protects PM operations */ - int pm_usage_cnt; /* usage counter for autosuspend */ unsigned auto_pm:1; /* autosuspend/resume in progress */ unsigned do_remote_wakeup:1; /* remote wakeup should be enabled */ -- cgit v1.2.3-71-gd317 From 2a50f28c326d20ab4556be1b867ecddf6aefbb88 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:22:08 -0700 Subject: [ATALK]: endianness annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- drivers/net/appletalk/ipddp.c | 5 +-- include/linux/atalk.h | 40 +--------------------- net/appletalk/ddp.c | 79 +++++++++++++++---------------------------- 3 files changed, 30 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index 7f7dd450226a..b98592a8bac8 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -145,9 +145,7 @@ static int ipddp_xmit(struct sk_buff *skb, struct net_device *dev) /* Create the Extended DDP header */ ddp = (struct ddpehdr *)skb->data; - ddp->deh_len = skb->len; - ddp->deh_hops = 1; - ddp->deh_pad = 0; + ddp->deh_len_hops = htons(skb->len + (1<<10)); ddp->deh_sum = 0; /* @@ -170,7 +168,6 @@ static int ipddp_xmit(struct sk_buff *skb, struct net_device *dev) ddp->deh_sport = 72; *((__u8 *)(ddp+1)) = 22; /* ddp type = IP */ - *((__u16 *)ddp)=ntohs(*((__u16 *)ddp)); /* fix up length field */ skb->protocol = htons(ETH_P_ATALK); /* Protocol has changed */ diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 6ba3aa8a81f4..75b8baca08f3 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -88,15 +88,7 @@ static inline struct atalk_sock *at_sk(struct sock *sk) #include struct ddpehdr { -#ifdef __LITTLE_ENDIAN_BITFIELD - __u16 deh_len:10, - deh_hops:4, - deh_pad:2; -#else - __u16 deh_pad:2, - deh_hops:4, - deh_len:10; -#endif + __be16 deh_len_hops; /* lower 10 bits are length, next 4 - hops */ __be16 deh_sum; __be16 deh_dnet; __be16 deh_snet; @@ -112,36 +104,6 @@ static __inline__ struct ddpehdr *ddp_hdr(struct sk_buff *skb) return (struct ddpehdr *)skb->h.raw; } -/* - * Don't drop the struct into the struct above. You'll get some - * surprise padding. - */ -struct ddpebits { -#ifdef __LITTLE_ENDIAN_BITFIELD - __u16 deh_len:10, - deh_hops:4, - deh_pad:2; -#else - __u16 deh_pad:2, - deh_hops:4, - deh_len:10; -#endif -}; - -/* Short form header */ -struct ddpshdr { -#ifdef __LITTLE_ENDIAN_BITFIELD - __u16 dsh_len:10, - dsh_pad:6; -#else - __u16 dsh_pad:6, - dsh_len:10; -#endif - __u8 dsh_dport; - __u8 dsh_sport; - /* And netatalk apps expect to stick the type in themselves */ -}; - /* AppleTalk AARP headers */ struct elapaarp { __be16 hw_type; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 96dc6bb52d14..708e2e0371af 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1002,7 +1002,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, return sum; } -static unsigned short atalk_checksum(const struct sk_buff *skb, int len) +static __be16 atalk_checksum(const struct sk_buff *skb, int len) { unsigned long sum; @@ -1010,7 +1010,7 @@ static unsigned short atalk_checksum(const struct sk_buff *skb, int len) sum = atalk_sum_skb(skb, 4, len-4, 0); /* Use 0xFFFF for 0. 0 itself means none */ - return sum ? htons((unsigned short)sum) : 0xFFFF; + return sum ? htons((unsigned short)sum) : htons(0xFFFF); } static struct proto ddp_proto = { @@ -1289,7 +1289,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb) #endif static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev, - struct ddpehdr *ddp, struct ddpebits *ddphv, + struct ddpehdr *ddp, __u16 len_hops, int origlen) { struct atalk_route *rt; @@ -1317,10 +1317,12 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev, /* Route the packet */ rt = atrtr_find(&ta); - if (!rt || ddphv->deh_hops == DDP_MAXHOPS) + /* increment hops count */ + len_hops += 1 << 10; + if (!rt || !(len_hops & (15 << 10))) goto free_it; + /* FIXME: use skb->cb to be able to use shared skbs */ - ddphv->deh_hops++; /* * Route goes through another gateway, so set the target to the @@ -1335,11 +1337,10 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev, /* Fix up skb->len field */ skb_trim(skb, min_t(unsigned int, origlen, (rt->dev->hard_header_len + - ddp_dl->header_length + ddphv->deh_len))); + ddp_dl->header_length + (len_hops & 1023)))); - /* Mend the byte order */ /* FIXME: use skb->cb to be able to use shared skbs */ - *((__u16 *)ddp) = ntohs(*((__u16 *)ddphv)); + ddp->deh_len_hops = htons(len_hops); /* * Send the buffer onwards @@ -1394,7 +1395,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_iface *atif; struct sockaddr_at tosat; int origlen; - struct ddpebits ddphv; + __u16 len_hops; /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) @@ -1406,16 +1407,11 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, ddp = ddp_hdr(skb); - /* - * Fix up the length field [Ok this is horrible but otherwise - * I end up with unions of bit fields and messy bit field order - * compiler/endian dependencies..] - */ - *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp)); + len_hops = ntohs(ddp->deh_len_hops); /* Trim buffer in case of stray trailing data */ origlen = skb->len; - skb_trim(skb, min_t(unsigned int, skb->len, ddphv.deh_len)); + skb_trim(skb, min_t(unsigned int, skb->len, len_hops & 1023)); /* * Size check to see if ddp->deh_len was crap @@ -1430,7 +1426,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, * valid for net byte orders all over the networking code... */ if (ddp->deh_sum && - atalk_checksum(skb, ddphv.deh_len) != ddp->deh_sum) + atalk_checksum(skb, len_hops & 1023) != ddp->deh_sum) /* Not a valid AppleTalk frame - dustbin time */ goto freeit; @@ -1444,7 +1440,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, /* Not ours, so we route the packet via the correct * AppleTalk iface */ - atalk_route_packet(skb, dev, ddp, &ddphv, origlen); + atalk_route_packet(skb, dev, ddp, len_hops, origlen); goto out; } @@ -1489,7 +1485,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, /* Find our address */ struct atalk_addr *ap = atalk_find_dev_addr(dev); - if (!ap || skb->len < sizeof(struct ddpshdr)) + if (!ap || skb->len < sizeof(__be16) || skb->len > 1023) goto freeit; /* Don't mangle buffer if shared */ @@ -1519,11 +1515,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, /* * Not sure about this bit... */ - ddp->deh_len = skb->len; - ddp->deh_hops = DDP_MAXHOPS; /* Non routable, so force a drop - if we slip up later */ - /* Mend the byte order */ - *((__u16 *)ddp) = htons(*((__u16 *)ddp)); + /* Non routable, so force a drop if we slip up later */ + ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10)); } skb->h.raw = skb->data; @@ -1622,16 +1615,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr)); - ddp->deh_pad = 0; - ddp->deh_hops = 0; - ddp->deh_len = len + sizeof(*ddp); - /* - * Fix up the length field [Ok this is horrible but otherwise - * I end up with unions of bit fields and messy bit field order - * compiler/endian dependencies.. - */ - *((__u16 *)ddp) = ntohs(*((__u16 *)ddp)); - + ddp->deh_len_hops = htons(len + sizeof(*ddp)); ddp->deh_dnet = usat->sat_addr.s_net; ddp->deh_snet = at->src_net; ddp->deh_dnode = usat->sat_addr.s_node; @@ -1712,8 +1696,8 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; + int offset = 0; int err = 0; - struct ddpebits ddphv; struct sk_buff *skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1721,25 +1705,18 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr /* FIXME: use skb->cb to be able to use shared skbs */ ddp = ddp_hdr(skb); - *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp)); + copied = ntohs(ddp->deh_len_hops) & 1023; - if (sk->sk_type == SOCK_RAW) { - copied = ddphv.deh_len; - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } + if (sk->sk_type != SOCK_RAW) { + offset = sizeof(*ddp); + copied -= offset; + } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - } else { - copied = ddphv.deh_len - sizeof(*ddp); - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } - err = skb_copy_datagram_iovec(skb, sizeof(*ddp), - msg->msg_iov, copied); + if (copied > size) { + copied = size; + msg->msg_flags |= MSG_TRUNC; } + err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); if (!err) { if (sat) { -- cgit v1.2.3-71-gd317 From 0ac0760a57a6b1eb75c21a590e578be5dfc2f88b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:23:16 -0700 Subject: [TR]: endiannness annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/trdevice.h | 2 +- net/802/tr.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trdevice.h b/include/linux/trdevice.h index 99e02ef54c47..bfc84a7aecc5 100644 --- a/include/linux/trdevice.h +++ b/include/linux/trdevice.h @@ -28,7 +28,7 @@ #include #ifdef __KERNEL__ -extern unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev); +extern __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev); extern void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct net_device *dev); extern struct net_device *alloc_trdev(int sizeof_priv); diff --git a/net/802/tr.c b/net/802/tr.c index d7d8f40c4fed..829deb41ce81 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -164,7 +164,7 @@ static int tr_rebuild_header(struct sk_buff *skb) */ if(trllc->ethertype != htons(ETH_P_IP)) { - printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(trllc->ethertype)); + printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype)); return 0; } @@ -186,7 +186,7 @@ static int tr_rebuild_header(struct sk_buff *skb) * it via SNAP. */ -unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev) { struct trh_hdr *trh=(struct trh_hdr *)skb->data; @@ -229,15 +229,15 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) */ if (trllc->dsap == EXTENDED_SAP && - (trllc->ethertype == ntohs(ETH_P_IP) || - trllc->ethertype == ntohs(ETH_P_IPV6) || - trllc->ethertype == ntohs(ETH_P_ARP))) + (trllc->ethertype == htons(ETH_P_IP) || + trllc->ethertype == htons(ETH_P_IPV6) || + trllc->ethertype == htons(ETH_P_ARP))) { skb_pull(skb, sizeof(struct trllc)); return trllc->ethertype; } - return ntohs(ETH_P_TR_802_2); + return htons(ETH_P_TR_802_2); } /* -- cgit v1.2.3-71-gd317 From 046d033148e6936ee2466d38214cf0743a210f39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:24:24 -0700 Subject: [IPV4]: headers endianness Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/ip.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 2f4600146f83..6b25d36fc54c 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -96,7 +96,7 @@ struct iphdr { __be16 frag_off; __u8 ttl; __u8 protocol; - __u16 check; + __be16 check; __be32 saddr; __be32 daddr; /*The options start here. */ @@ -105,22 +105,22 @@ struct iphdr { struct ip_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ - __u16 reserved; - __u32 spi; - __u32 seq_no; /* Sequence number */ + __be16 reserved; + __be32 spi; + __be32 seq_no; /* Sequence number */ __u8 auth_data[0]; /* Variable len but >=4. Mind the 64 bit alignment! */ }; struct ip_esp_hdr { - __u32 spi; - __u32 seq_no; /* Sequence number */ + __be32 spi; + __be32 seq_no; /* Sequence number */ __u8 enc_data[0]; /* Variable len but >=8. Mind the 64 bit alignment! */ }; struct ip_comp_hdr { __u8 nexthdr; __u8 flags; - __u16 cpi; + __be16 cpi; }; #endif /* _LINUX_IP_H */ -- cgit v1.2.3-71-gd317 From a61ced5d1c2e773620d7855ea2009d770c10a6e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:27:54 -0700 Subject: [IPV4]: inet_select_addr() annotations argument and return value are net-endian. Annotated function and inferred net-endian variables in callers. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- net/ipv4/arp.c | 4 ++-- net/ipv4/devinet.c | 4 ++-- net/ipv4/icmp.c | 2 +- net/ipv4/ipvs/ip_vs_sync.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/route.c | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 92297ff24e85..40d07d0b896b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -110,7 +110,7 @@ extern int devinet_ioctl(unsigned int cmd, void __user *); extern void devinet_init(void); extern struct in_device *inetdev_init(struct net_device *dev); extern struct in_device *inetdev_by_index(int); -extern u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope); +extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); extern u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope); extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask); extern void inet_forward_change(void); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 48e1ccb2ff55..db72339c21e1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -330,10 +330,10 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { - u32 saddr = 0; + __be32 saddr = 0; u8 *dst_ha = NULL; struct net_device *dev = neigh->dev; - u32 target = *(u32*)neigh->primary_key; + __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); struct in_device *in_dev = in_dev_get(dev); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8e8d1f17d77a..bffbbecef455 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -876,9 +876,9 @@ out: return done; } -u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) +__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { - u32 addr = 0; + __be32 addr = 0; struct in_device *in_dev; rcu_read_lock(); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c2ad07e48ab4..fd39685241c1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -437,7 +437,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable *)skb_in->dst; struct ipcm_cookie ipc; - u32 saddr; + __be32 saddr; u8 tos; if (!rt) diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 1bca714bda3d..0c8d20da6139 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -464,7 +464,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname) { struct net_device *dev; - u32 addr; + __be32 addr; struct sockaddr_in sin; if ((dev = __dev_get_by_name(ifname)) == NULL) diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index bc65168a3437..3dbfcfac8a84 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -70,7 +70,7 @@ masquerade_target(struct sk_buff **pskb, const struct ip_nat_multi_range_compat *mr; struct ip_nat_range newrange; struct rtable *rt; - u_int32_t newsrc; + __be32 newsrc; IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 32fcb77295f0..9d8fbf1e5bc3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1532,7 +1532,7 @@ static int ip_rt_bug(struct sk_buff *skb) void ip_rt_get_source(u8 *addr, struct rtable *rt) { - u32 src; + __be32 src; struct fib_result res; if (rt->fl.iif == 0) @@ -1603,7 +1603,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, { unsigned hash; struct rtable *rth; - u32 spec_dst; + __be32 spec_dst; struct in_device *in_dev = in_dev_get(dev); u32 itag = 0; -- cgit v1.2.3-71-gd317 From a60c4923da795c74db9ff61a60e2f1df5754e4ce Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:28:34 -0700 Subject: [IPV4]: ip_check_mc() annotations annotated arguments Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- net/ipv4/igmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 899c3d4776f3..8e7eedb3a574 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -197,7 +197,7 @@ struct ip_mc_list #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) -extern int ip_check_mc(struct in_device *dev, u32 mc_addr, u32 src_addr, u16 proto); +extern int ip_check_mc(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 58be8227b0cb..d8068c483781 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2216,7 +2216,7 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) +int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; struct ip_sf_list *psf; -- cgit v1.2.3-71-gd317 From ff428d72c59b35e4ba34bc1b487e707648010fe3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:13:35 -0700 Subject: [IPV4]: inet_addr_onlink() annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- net/ipv4/devinet.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 40d07d0b896b..5ae09372c144 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -105,7 +105,7 @@ extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); extern struct net_device *ip_dev_find(u32 addr); -extern int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b); +extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(unsigned int cmd, void __user *); extern void devinet_init(void); extern struct in_device *inetdev_init(struct net_device *dev); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bffbbecef455..5988584f6a6d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -224,7 +224,7 @@ static void inetdev_destroy(struct in_device *in_dev) call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } -int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) +int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { rcu_read_lock(); for_primary_ifa(in_dev) { -- cgit v1.2.3-71-gd317 From a144ea4b7a13087081ab5402fa9ad0bcfd249e67 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 18:00:55 -0700 Subject: [IPV4]: annotate struct in_ifaddr ifa_local, ifa_address, ifa_mask, ifa_broadcast and ifa_anycast are net-endian. Annotated them and variables that are inferred to be net-endian. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- arch/ia64/hp/sim/simeth.c | 4 ++-- arch/um/drivers/net_kern.c | 2 +- arch/xtensa/platform-iss/network.c | 2 +- drivers/isdn/i4l/isdn_net.c | 4 ++-- drivers/net/bonding/bond_main.c | 2 +- drivers/net/wan/hdlc_cisco.c | 2 +- drivers/net/wan/syncppp.c | 2 +- drivers/net/wireless/strip.c | 4 ++-- include/linux/inetdevice.h | 10 +++++----- net/ipv4/devinet.c | 4 ++-- net/ipv4/fib_frontend.c | 12 ++++++------ net/ipv4/icmp.c | 2 +- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 2 +- net/ipv4/netfilter/ipt_REDIRECT.c | 2 +- 14 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index b5195be62818..e1a1b11473e2 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -320,7 +320,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) } printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n", - dev->name, htonl(ifa->ifa_local)); + dev->name, ntohl(ifa->ifa_local)); /* * XXX Fix me @@ -331,7 +331,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) local = dev->priv; /* now do it for real */ r = event == NETDEV_UP ? - netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)): + netdev_attach(local->simfd, dev->irq, ntohl(ifa->ifa_local)): netdev_detach(local->simfd); printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n", diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 664c2e2fb820..bd1178fa4e9a 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -825,7 +825,7 @@ int dev_netmask(void *d, void *m) struct net_device *dev = d; struct in_device *ip = dev->ip_ptr; struct in_ifaddr *in; - __u32 *mask_out = m; + __be32 *mask_out = m; if(ip == NULL) return(1); diff --git a/arch/xtensa/platform-iss/network.c b/arch/xtensa/platform-iss/network.c index d96164e602fe..15d64414bd60 100644 --- a/arch/xtensa/platform-iss/network.c +++ b/arch/xtensa/platform-iss/network.c @@ -201,7 +201,7 @@ static void dev_ip_addr(void *d, char *buf, char *bin_buf) struct net_device *dev = d; struct in_device *ip = dev->ip_ptr; struct in_ifaddr *in; - u32 addr; + __be32 addr; if ((ip == NULL) || ((in = ip->ifa_list) == NULL)) { printk(KERN_WARNING "Device not assigned an IP address!\n"); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index 43da8ae1b2ad..1f8d6ae66b41 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1614,8 +1614,8 @@ isdn_net_ciscohdlck_slarp_send_reply(isdn_net_local *lp) struct sk_buff *skb; unsigned char *p; struct in_device *in_dev = NULL; - u32 addr = 0; /* local ipv4 address */ - u32 mask = 0; /* local netmask */ + __be32 addr = 0; /* local ipv4 address */ + __be32 mask = 0; /* local netmask */ if ((in_dev = lp->netdev->dev.ip_ptr) != NULL) { /* take primary(first) address of interface */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0fb5f653d3ce..c0bbddae4ec4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2252,7 +2252,7 @@ static u32 bond_glean_dev_ip(struct net_device *dev) { struct in_device *idev; struct in_ifaddr *ifa; - u32 addr = 0; + __be32 addr = 0; if (!dev) return 0; diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 7ec2b2f9b7ee..b0bc5ddcf1b1 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -161,7 +161,7 @@ static int cisco_rx(struct sk_buff *skb) struct hdlc_header *data = (struct hdlc_header*)skb->data; struct cisco_packet *cisco_data; struct in_device *in_dev; - u32 addr, mask; + __be32 addr, mask; if (skb->len < sizeof(struct hdlc_header)) goto rx_error; diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index c13b459a0137..d1173089f334 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -763,7 +763,7 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb) { struct in_device *in_dev; struct in_ifaddr *ifa; - u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */ + __be32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */ #ifdef CONFIG_INET rcu_read_lock(); if ((in_dev = __in_dev_get_rcu(dev)) != NULL) diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index ccaf28e8db0a..337c692f6fd6 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -1342,7 +1342,7 @@ static unsigned char *strip_make_packet(unsigned char *buffer, * 'broadcast hub' radio (First byte of address being 0xFF means broadcast) */ if (haddr.c[0] == 0xFF) { - u32 brd = 0; + __be32 brd = 0; struct in_device *in_dev; rcu_read_lock(); @@ -1406,7 +1406,7 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb) int doreset = (long) jiffies - strip_info->watchdog_doreset >= 0; int doprobe = (long) jiffies - strip_info->watchdog_doprobe >= 0 && !doreset; - u32 addr, brd; + __be32 addr, brd; /* * 1. If we have a packet, encapsulate it and put it in the buffer diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 5ae09372c144..54b32e8b8f65 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -90,11 +90,11 @@ struct in_ifaddr struct in_ifaddr *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; - u32 ifa_local; - u32 ifa_address; - u32 ifa_mask; - u32 ifa_broadcast; - u32 ifa_anycast; + __be32 ifa_local; + __be32 ifa_address; + __be32 ifa_mask; + __be32 ifa_broadcast; + __be32 ifa_anycast; unsigned char ifa_scope; unsigned char ifa_flags; unsigned char ifa_prefixlen; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5988584f6a6d..a0a7780e7515 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -805,7 +805,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { - u32 old_mask = ifa->ifa_mask; + __be32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); @@ -931,7 +931,7 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, u32 local, int scope) { int same = 0; - u32 addr = 0; + __be32 addr = 0; for_ifa(in_dev) { if (!addr && diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7f5217907e5a..62ee71ee6bc9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -667,9 +667,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; struct in_ifaddr *prim = ifa; - u32 mask = ifa->ifa_mask; - u32 addr = ifa->ifa_local; - u32 prefix = ifa->ifa_address&mask; + __be32 mask = ifa->ifa_mask; + __be32 addr = ifa->ifa_local; + __be32 prefix = ifa->ifa_address&mask; if (ifa->ifa_flags&IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, prefix, mask); @@ -685,7 +685,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && @@ -707,8 +707,8 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) struct net_device *dev = in_dev->dev; struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa; - u32 brd = ifa->ifa_address|~ifa->ifa_mask; - u32 any = ifa->ifa_address&ifa->ifa_mask; + __be32 brd = ifa->ifa_address|~ifa->ifa_mask; + __be32 any = ifa->ifa_address&ifa->ifa_mask; #define LOCAL_OK 1 #define BRD_OK 2 #define BRD0_OK 4 diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fd39685241c1..428f1c91ec45 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -895,7 +895,7 @@ static void icmp_address_reply(struct sk_buff *skb) if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { - u32 _mask, *mp; + __be32 _mask, *mp; mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); BUG_ON(mp == NULL); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 3d0b438783db..4adec47aae32 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -48,7 +48,7 @@ static int help(struct sk_buff **pskb, struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt = (struct rtable *)(*pskb)->dst; struct in_device *in_dev; - u_int32_t mask = 0; + __be32 mask = 0; /* we're only interested in locally generated packets */ if ((*pskb)->sk == NULL) diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index f03d43671c6d..c0dcfe9d610c 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -61,7 +61,7 @@ redirect_target(struct sk_buff **pskb, { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t newdst; + __be32 newdst; const struct ip_nat_multi_range_compat *mr = targinfo; struct ip_nat_range newrange; -- cgit v1.2.3-71-gd317 From 60cad5da5791ceb0beefe9a79b570cca45791f50 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:17:09 -0700 Subject: [IPV4]: annotate inetdev.h helpers inet_confirm_addr(), inet_ifa_byprefix(), ip_dev_find(), inet_make_mask() and inet_ifa_match() annotated, along with inferred net-endian variables Signed-off-by: Al Viro Signed-off-by: David S. Miller --- drivers/infiniband/core/addr.c | 4 ++-- include/linux/inetdevice.h | 10 +++++----- net/ipv4/devinet.c | 12 ++++++------ net/ipv4/fib_frontend.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 9cbf09e2052f..60d3fbdd216c 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -86,7 +86,7 @@ EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; - u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; int ret; dev = ip_dev_find(ip); @@ -239,7 +239,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, { struct net_device *dev; u32 src_ip = src_in->sin_addr.s_addr; - u32 dst_ip = dst_in->sin_addr.s_addr; + __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; dev = ip_dev_find(dst_ip); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 54b32e8b8f65..5a0ab04627bc 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -104,18 +104,18 @@ struct in_ifaddr extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); -extern struct net_device *ip_dev_find(u32 addr); +extern struct net_device *ip_dev_find(__be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(unsigned int cmd, void __user *); extern void devinet_init(void); extern struct in_device *inetdev_init(struct net_device *dev); extern struct in_device *inetdev_by_index(int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); -extern u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope); -extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask); +extern __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope); +extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); extern void inet_forward_change(void); -static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa) +static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); } @@ -183,7 +183,7 @@ static inline void in_dev_put(struct in_device *idev) #endif /* __KERNEL__ */ -static __inline__ __u32 inet_make_mask(int logmask) +static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) return htonl(~((1<<(32-logmask))-1)); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index afba56222fb5..7602c79a389b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -429,8 +429,8 @@ struct in_device *inetdev_by_index(int ifindex) /* Called only from RTNL semaphored context. No locks. */ -struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, - u32 mask) +struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, + __be32 mask) { ASSERT_RTNL(); @@ -927,8 +927,8 @@ out: return addr; } -static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, - u32 local, int scope) +static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, + __be32 local, int scope) { int same = 0; __be32 addr = 0; @@ -971,9 +971,9 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope) +__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) { - u32 addr = 0; + __be32 addr = 0; struct in_device *in_dev; if (dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 62ee71ee6bc9..c0bd2f122da2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -122,7 +122,7 @@ static void fib_flush(void) * Find the first device with a given source address. */ -struct net_device * ip_dev_find(u32 addr) +struct net_device * ip_dev_find(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; -- cgit v1.2.3-71-gd317 From 7699431301b189fca7ccbb64fe54e5a5170f8497 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 26 Sep 2006 22:28:46 -0700 Subject: [SUNRPC]: svc_{get,put}nl() * add svc_getnl(): Take network-endian value from buffer, convert to host-endian and return it. * add svc_putnl(): Take host-endian value, convert to network-endian and put it into a buffer. * annotate svc_getu32()/svc_putu32() as dealing with network-endian. * convert to svc_getnl(), svc_putnl(). [AV: in large part it's a carved-up Alexey's patch] Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/sunrpc/svc.h | 31 +++++++++++++++++------ net/sunrpc/auth_gss/svcauth_gss.c | 52 +++++++++++++++++++-------------------- net/sunrpc/svc.c | 44 ++++++++++++++++----------------- net/sunrpc/svcauth.c | 2 +- net/sunrpc/svcauth_unix.c | 22 ++++++++--------- 5 files changed, 84 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7b27c09b5604..5df1d319f5d5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -78,28 +78,45 @@ struct svc_serv { */ #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 2) -static inline u32 svc_getu32(struct kvec *iov) +static inline u32 svc_getnl(struct kvec *iov) { - u32 val, *vp; + __be32 val, *vp; vp = iov->iov_base; val = *vp++; iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(u32); + iov->iov_len -= sizeof(__be32); + return ntohl(val); +} + +static inline void svc_putnl(struct kvec *iov, u32 val) +{ + __be32 *vp = iov->iov_base + iov->iov_len; + *vp = htonl(val); + iov->iov_len += sizeof(__be32); +} + +static inline __be32 svc_getu32(struct kvec *iov) +{ + __be32 val, *vp; + vp = iov->iov_base; + val = *vp++; + iov->iov_base = (void*)vp; + iov->iov_len -= sizeof(__be32); return val; } static inline void svc_ungetu32(struct kvec *iov) { - u32 *vp = (u32 *)iov->iov_base; + __be32 *vp = (__be32 *)iov->iov_base; iov->iov_base = (void *)(vp - 1); iov->iov_len += sizeof(*vp); } -static inline void svc_putu32(struct kvec *iov, u32 val) +static inline void svc_putu32(struct kvec *iov, __be32 val) { - u32 *vp = iov->iov_base + iov->iov_len; + __be32 *vp = iov->iov_base + iov->iov_len; *vp = val; - iov->iov_len += sizeof(u32); + iov->iov_len += sizeof(__be32); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 94217ec9e2dd..cd3d77ca3a31 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -607,7 +607,7 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o) if (argv->iov_len < 4) return -1; - o->len = ntohl(svc_getu32(argv)); + o->len = svc_getnl(argv); l = round_up_to_quad(o->len); if (argv->iov_len < l) return -1; @@ -624,7 +624,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) if (resv->iov_len + 4 > PAGE_SIZE) return -1; - svc_putu32(resv, htonl(o->len)); + svc_putnl(resv, o->len); p = resv->iov_base + resv->iov_len; resv->iov_len += round_up_to_quad(o->len); if (resv->iov_len > PAGE_SIZE) @@ -657,7 +657,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, *authp = rpc_autherr_badverf; if (argv->iov_len < 4) return SVC_DENIED; - flavor = ntohl(svc_getu32(argv)); + flavor = svc_getnl(argv); if (flavor != RPC_AUTH_GSS) return SVC_DENIED; if (svc_safe_getnetobj(argv, &checksum)) @@ -689,7 +689,7 @@ gss_write_null_verf(struct svc_rqst *rqstp) { u32 *p; - svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_NULL)); + svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; /* don't really need to check if head->iov_len > PAGE_SIZE ... */ *p++ = 0; @@ -708,7 +708,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) u32 *p; struct kvec iov; - svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS)); + svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); xdr_seq = htonl(seq); iov.iov_base = &xdr_seq; @@ -805,7 +805,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) struct xdr_netobj mic; struct xdr_buf integ_buf; - integ_len = ntohl(svc_getu32(&buf->head[0])); + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) goto out; if (integ_len > buf->len) @@ -825,7 +825,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); if (maj_stat != GSS_S_COMPLETE) goto out; - if (ntohl(svc_getu32(&buf->head[0])) != seq) + if (svc_getnl(&buf->head[0]) != seq) goto out; stat = 0; out: @@ -857,7 +857,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs rqstp->rq_sendfile_ok = 0; - priv_len = ntohl(svc_getu32(&buf->head[0])); + priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { /* Already decrypted last time through! The sequence number * check at out_seq is unnecessary but harmless: */ @@ -895,7 +895,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs if (maj_stat != GSS_S_COMPLETE) return -EINVAL; out_seq: - if (ntohl(svc_getu32(&buf->head[0])) != seq) + if (svc_getnl(&buf->head[0]) != seq) return -EINVAL; return 0; } @@ -985,12 +985,12 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) if (argv->iov_len < 5 * 4) goto auth_err; - crlen = ntohl(svc_getu32(argv)); - if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION) + crlen = svc_getnl(argv); + if (svc_getnl(argv) != RPC_GSS_VERSION) goto auth_err; - gc->gc_proc = ntohl(svc_getu32(argv)); - gc->gc_seq = ntohl(svc_getu32(argv)); - gc->gc_svc = ntohl(svc_getu32(argv)); + gc->gc_proc = svc_getnl(argv); + gc->gc_seq = svc_getnl(argv); + gc->gc_svc = svc_getnl(argv); if (svc_safe_getnetobj(argv, &gc->gc_ctx)) goto auth_err; if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4) @@ -1016,9 +1016,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) case RPC_GSS_PROC_CONTINUE_INIT: if (argv->iov_len < 2 * 4) goto auth_err; - if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL) + if (svc_getnl(argv) != RPC_AUTH_NULL) goto auth_err; - if (ntohl(svc_getu32(argv)) != 0) + if (svc_getnl(argv) != 0) goto auth_err; break; case RPC_GSS_PROC_DATA: @@ -1076,14 +1076,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto drop; if (resv->iov_len + 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, rpc_success); + svc_putnl(resv, RPC_SUCCESS); if (svc_safe_putnetobj(resv, &rsip->out_handle)) goto drop; if (resv->iov_len + 3 * 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, htonl(rsip->major_status)); - svc_putu32(resv, htonl(rsip->minor_status)); - svc_putu32(resv, htonl(GSS_SEQ_WIN)); + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); if (svc_safe_putnetobj(resv, &rsip->out_token)) goto drop; rqstp->rq_client = NULL; @@ -1093,7 +1093,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, rpc_success); + svc_putnl(resv, RPC_SUCCESS); goto complete; case RPC_GSS_PROC_DATA: *authp = rpcsec_gsserr_ctxproblem; @@ -1111,8 +1111,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto auth_err; /* placeholders for length and seq. number: */ svcdata->body_start = resv->iov_base + resv->iov_len; - svc_putu32(resv, 0); - svc_putu32(resv, 0); + svc_putnl(resv, 0); + svc_putnl(resv, 0); break; case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, @@ -1120,8 +1120,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto auth_err; /* placeholders for length and seq. number: */ svcdata->body_start = resv->iov_base + resv->iov_len; - svc_putu32(resv, 0); - svc_putu32(resv, 0); + svc_putnl(resv, 0); + svc_putnl(resv, 0); break; default: goto auth_err; @@ -1199,7 +1199,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) goto out_err; - svc_putu32(resv, htonl(mic.len)); + svc_putnl(resv, mic.len); memset(mic.data + mic.len, 0, round_up_to_quad(mic.len) - mic.len); resv->iov_len += XDR_QUADLEN(mic.len) << 2; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b76a227dd3ad..6589e1ad47fa 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -284,16 +284,16 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) rqstp->rq_sendfile_ok = 1; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) - svc_putu32(resv, 0); + svc_putnl(resv, 0); rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = ntohl(svc_getu32(argv)); - vers = ntohl(svc_getu32(argv)); + dir = svc_getnl(argv); + vers = svc_getnl(argv); /* First words of reply: */ - svc_putu32(resv, xdr_one); /* REPLY */ + svc_putnl(resv, 1); /* REPLY */ if (dir != 0) /* direction != CALL */ goto err_bad_dir; @@ -303,11 +303,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) /* Save position in case we later decide to reject: */ accept_statp = resv->iov_base + resv->iov_len; - svc_putu32(resv, xdr_zero); /* ACCEPT */ + svc_putnl(resv, 0); /* ACCEPT */ - rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */ - rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */ - rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ + rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ + rqstp->rq_vers = vers = svc_getnl(argv); /* version number */ + rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */ progp = serv->sv_program; @@ -361,7 +361,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) /* Build the reply header. */ statp = resv->iov_base +resv->iov_len; - svc_putu32(resv, rpc_success); /* RPC_SUCCESS */ + svc_putnl(resv, RPC_SUCCESS); /* Bump per-procedure stats counter */ procp->pc_count++; @@ -439,10 +439,10 @@ err_bad_dir: err_bad_rpc: serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, xdr_one); /* REJECT */ - svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */ - svc_putu32(resv, xdr_two); /* Only RPCv2 supported */ - svc_putu32(resv, xdr_two); + svc_putnl(resv, 1); /* REJECT */ + svc_putnl(resv, 0); /* RPC_MISMATCH */ + svc_putnl(resv, 2); /* Only RPCv2 supported */ + svc_putnl(resv, 2); goto sendit; err_bad_auth: @@ -450,15 +450,15 @@ err_bad_auth: serv->sv_stats->rpcbadauth++; /* Restore write pointer to location of accept status: */ xdr_ressize_check(rqstp, accept_statp); - svc_putu32(resv, xdr_one); /* REJECT */ - svc_putu32(resv, xdr_one); /* AUTH_ERROR */ - svc_putu32(resv, auth_stat); /* status */ + svc_putnl(resv, 1); /* REJECT */ + svc_putnl(resv, 1); /* AUTH_ERROR */ + svc_putnl(resv, ntohl(auth_stat)); /* status */ goto sendit; err_bad_prog: dprintk("svc: unknown program %d\n", prog); serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_prog_unavail); + svc_putnl(resv, RPC_PROG_UNAVAIL); goto sendit; err_bad_vers: @@ -466,9 +466,9 @@ err_bad_vers: printk("svc: unknown version (%d)\n", vers); #endif serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_prog_mismatch); - svc_putu32(resv, htonl(progp->pg_lovers)); - svc_putu32(resv, htonl(progp->pg_hivers)); + svc_putnl(resv, RPC_PROG_MISMATCH); + svc_putnl(resv, progp->pg_lovers); + svc_putnl(resv, progp->pg_hivers); goto sendit; err_bad_proc: @@ -476,7 +476,7 @@ err_bad_proc: printk("svc: unknown procedure (%d)\n", proc); #endif serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_proc_unavail); + svc_putnl(resv, RPC_PROC_UNAVAIL); goto sendit; err_garbage: @@ -486,6 +486,6 @@ err_garbage: rpc_stat = rpc_garbage_args; err_bad: serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_stat); + svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 5b28c6176806..3f4d1f622de8 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -42,7 +42,7 @@ svc_authenticate(struct svc_rqst *rqstp, u32 *authp) *authp = rpc_auth_ok; - flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0])); + flavor = svc_getnl(&rqstp->rq_arg.head[0]); dprintk("svc: svc_authenticate (%d)\n", flavor); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 7e5707e2d6b6..27f443b44af8 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -427,7 +427,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) *authp = rpc_autherr_badcred; return SVC_DENIED; } - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { dprintk("svc: bad null verf\n"); *authp = rpc_autherr_badverf; return SVC_DENIED; @@ -441,8 +441,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) return SVC_DROP; /* kmalloc failure - client must retry */ /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); + svc_putnl(resv, RPC_AUTH_NULL); + svc_putnl(resv, 0); return SVC_OK; } @@ -488,31 +488,31 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) svc_getu32(argv); /* length */ svc_getu32(argv); /* time stamp */ - slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */ + slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */ if (slen > 64 || (len -= (slen + 3)*4) < 0) goto badcred; argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; - cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */ - cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */ - slen = ntohl(svc_getu32(argv)); /* gids length */ + cred->cr_uid = svc_getnl(argv); /* uid */ + cred->cr_gid = svc_getnl(argv); /* gid */ + slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) return SVC_DROP; for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); + GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; } /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); + svc_putnl(resv, RPC_AUTH_NULL); + svc_putnl(resv, 0); return SVC_OK; -- cgit v1.2.3-71-gd317 From d8ed029d6000ba2e2908d9286409e4833c091b4c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 26 Sep 2006 22:29:38 -0700 Subject: [SUNRPC]: trivial endianness annotations pure s/u32/__be32/ [AV: large part based on Alexey's patches] Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/sunrpc/auth.h | 16 +++++------ include/linux/sunrpc/msg_prot.h | 2 +- include/linux/sunrpc/svc.h | 14 +++++----- include/linux/sunrpc/svcauth.h | 4 +-- include/linux/sunrpc/xdr.h | 38 +++++++++++++------------- include/linux/sunrpc/xprt.h | 12 ++++----- net/sunrpc/auth.c | 12 ++++----- net/sunrpc/auth_gss/auth_gss.c | 33 ++++++++++++----------- net/sunrpc/auth_gss/gss_krb5_seal.c | 2 +- net/sunrpc/auth_gss/gss_krb5_wrap.c | 4 +-- net/sunrpc/auth_gss/svcauth_gss.c | 24 ++++++++--------- net/sunrpc/auth_null.c | 8 +++--- net/sunrpc/auth_unix.c | 10 +++---- net/sunrpc/clnt.c | 23 ++++++++-------- net/sunrpc/pmap_clnt.c | 6 ++--- net/sunrpc/svc.c | 8 +++--- net/sunrpc/svcauth.c | 2 +- net/sunrpc/svcauth_unix.c | 8 +++--- net/sunrpc/svcsock.c | 2 +- net/sunrpc/xdr.c | 54 ++++++++++++++++++------------------- net/sunrpc/xprt.c | 4 +-- net/sunrpc/xprtsock.c | 3 ++- 22 files changed, 146 insertions(+), 143 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a6de332e57d4..862c0d8c8381 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -109,13 +109,13 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); - u32 * (*crmarshal)(struct rpc_task *, u32 *); + __be32 * (*crmarshal)(struct rpc_task *, __be32 *); int (*crrefresh)(struct rpc_task *); - u32 * (*crvalidate)(struct rpc_task *, u32 *); + __be32 * (*crvalidate)(struct rpc_task *, __be32 *); int (*crwrap_req)(struct rpc_task *, kxdrproc_t, - void *, u32 *, void *); + void *, __be32 *, void *); int (*crunwrap_resp)(struct rpc_task *, kxdrproc_t, - void *, u32 *, void *); + void *, __be32 *, void *); }; extern struct rpc_authops authunix_ops; @@ -134,10 +134,10 @@ struct rpc_cred * rpcauth_bindcred(struct rpc_task *); void rpcauth_holdcred(struct rpc_task *); void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); -u32 * rpcauth_marshcred(struct rpc_task *, u32 *); -u32 * rpcauth_checkverf(struct rpc_task *, u32 *); -int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *data, void *obj); -int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *data, void *obj); +__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); +__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); +int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj); +int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index f43f237360ae..d9f5934ac9fe 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -95,7 +95,7 @@ enum rpc_auth_stat { * 2GB. */ -typedef u32 rpc_fraghdr; +typedef __be32 rpc_fraghdr; #define RPC_LAST_STREAM_FRAGMENT (1U << 31) #define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5df1d319f5d5..73140ee5c638 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -147,7 +147,7 @@ struct svc_rqst { short rq_arghi; /* pages available in argument page list */ short rq_resused; /* pages used for result */ - u32 rq_xid; /* transmission id */ + __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ u32 rq_vers; /* program version */ u32 rq_proc; /* procedure number */ @@ -156,7 +156,7 @@ struct svc_rqst { rq_secure : 1; /* secure port */ - __u32 rq_daddr; /* dest addr of request - reply from here */ + __be32 rq_daddr; /* dest addr of request - reply from here */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ @@ -186,7 +186,7 @@ struct svc_rqst { * Check buffer bounds after decoding arguments */ static inline int -xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) +xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) { char *cp = (char *)p; struct kvec *vec = &rqstp->rq_arg.head[0]; @@ -195,7 +195,7 @@ xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) } static inline int -xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) +xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) { struct kvec *vec = &rqstp->rq_res.head[0]; char *cp = (char*)p; @@ -266,10 +266,10 @@ struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct sockaddr_in addr; struct svc_sock *svsk; /* where reply must go */ - u32 daddr; /* where reply must come from */ + __be32 daddr; /* where reply must come from */ struct cache_deferred_req handle; int argslen; - u32 args[0]; + __be32 args[0]; }; /* @@ -301,7 +301,7 @@ struct svc_version { * A return value of 0 means drop the request. * vs_dispatch == NULL means use default dispatcher. */ - int (*vs_dispatch)(struct svc_rqst *, u32 *); + int (*vs_dispatch)(struct svc_rqst *, __be32 *); }; /* diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 2fe2087edd66..a6601650deeb 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -95,7 +95,7 @@ struct auth_ops { char * name; struct module *owner; int flavour; - int (*accept)(struct svc_rqst *rq, u32 *authp); + int (*accept)(struct svc_rqst *rq, __be32 *authp); int (*release)(struct svc_rqst *rq); void (*domain_release)(struct auth_domain *); int (*set_client)(struct svc_rqst *rq); @@ -112,7 +112,7 @@ struct auth_ops { #define SVC_COMPLETE 9 -extern int svc_authenticate(struct svc_rqst *rqstp, u32 *authp); +extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); extern int svc_authorise(struct svc_rqst *rqstp); extern int svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index e6d3d349506c..953723b09bc6 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -32,7 +32,7 @@ struct xdr_netobj { * side) or svc_rqst pointer (server side). * Encode functions always assume there's enough room in the buffer. */ -typedef int (*kxdrproc_t)(void *rqstp, u32 *data, void *obj); +typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); /* * Basic structure for transmission/reception of a client XDR message. @@ -88,19 +88,19 @@ struct xdr_buf { /* * Miscellaneous XDR helper functions */ -u32 * xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int len); -u32 * xdr_encode_opaque(u32 *p, const void *ptr, unsigned int len); -u32 * xdr_encode_string(u32 *p, const char *s); -u32 * xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen); -u32 * xdr_encode_netobj(u32 *p, const struct xdr_netobj *); -u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *); +__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); +__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); +__be32 *xdr_encode_string(__be32 *p, const char *s); +__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen); +__be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); +__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int, unsigned int); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); -static inline u32 *xdr_encode_array(u32 *p, const void *s, unsigned int len) +static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { return xdr_encode_opaque(p, s, len); } @@ -108,16 +108,16 @@ static inline u32 *xdr_encode_array(u32 *p, const void *s, unsigned int len) /* * Decode 64bit quantities (NFSv3 support) */ -static inline u32 * -xdr_encode_hyper(u32 *p, __u64 val) +static inline __be32 * +xdr_encode_hyper(__be32 *p, __u64 val) { *p++ = htonl(val >> 32); *p++ = htonl(val & 0xFFFFFFFF); return p; } -static inline u32 * -xdr_decode_hyper(u32 *p, __u64 *valp) +static inline __be32 * +xdr_decode_hyper(__be32 *p, __u64 *valp) { *valp = ((__u64) ntohl(*p++)) << 32; *valp |= ntohl(*p++); @@ -128,7 +128,7 @@ xdr_decode_hyper(u32 *p, __u64 *valp) * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) */ static inline int -xdr_adjust_iovec(struct kvec *iov, u32 *p) +xdr_adjust_iovec(struct kvec *iov, __be32 *p) { return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base); } @@ -180,19 +180,19 @@ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, * Provide some simple tools for XDR buffer overflow-checking etc. */ struct xdr_stream { - uint32_t *p; /* start of available buffer */ + __be32 *p; /* start of available buffer */ struct xdr_buf *buf; /* XDR buffer to read/write */ - uint32_t *end; /* end of available buffer space */ + __be32 *end; /* end of available buffer space */ struct kvec *iov; /* pointer to the current kvec */ }; -extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); -extern uint32_t *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); -extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); -extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bdeba8538c71..6cf626580752 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -79,7 +79,7 @@ struct rpc_rqst { * This is the private part */ struct rpc_task * rq_task; /* RPC task data */ - __u32 rq_xid; /* request XID */ + __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ u32 rq_seqno; /* gss seq no. used on req. */ @@ -171,9 +171,9 @@ struct rpc_xprt { /* * State of TCP reply receive stuff */ - u32 tcp_recm, /* Fragment header */ - tcp_xid, /* Current XID */ - tcp_reclen, /* fragment length */ + __be32 tcp_recm, /* Fragment header */ + tcp_xid; /* Current XID */ + u32 tcp_reclen, /* fragment length */ tcp_offset; /* fragment offset */ unsigned long tcp_copied, /* copied to request */ tcp_flags; @@ -253,7 +253,7 @@ void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); -static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) +static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) { return p + xprt->tsh_size; } @@ -268,7 +268,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); void xprt_update_rtt(struct rpc_task *task); void xprt_adjust_cwnd(struct rpc_task *task, int result); -struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); +struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect(struct rpc_xprt *xprt); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 55163af3dcaf..993ff1a5d945 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -331,8 +331,8 @@ rpcauth_unbindcred(struct rpc_task *task) task->tk_msg.rpc_cred = NULL; } -u32 * -rpcauth_marshcred(struct rpc_task *task, u32 *p) +__be32 * +rpcauth_marshcred(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -342,8 +342,8 @@ rpcauth_marshcred(struct rpc_task *task, u32 *p) return cred->cr_ops->crmarshal(task, p); } -u32 * -rpcauth_checkverf(struct rpc_task *task, u32 *p) +__be32 * +rpcauth_checkverf(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -355,7 +355,7 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p) int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, - u32 *data, void *obj) + __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -369,7 +369,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, - u32 *data, void *obj) + __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6eed3e166ba3..a6ed2d22a6e6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -826,14 +826,14 @@ out: * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static u32 * -gss_marshal(struct rpc_task *task, u32 *p) +static __be32 * +gss_marshal(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 *cred_len; + __be32 *cred_len; struct rpc_rqst *req = task->tk_rqstp; u32 maj_stat = 0; struct xdr_netobj mic; @@ -894,12 +894,12 @@ gss_refresh(struct rpc_task *task) return 0; } -static u32 * -gss_validate(struct rpc_task *task, u32 *p) +static __be32 * +gss_validate(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 seq; + __be32 seq; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -940,13 +940,14 @@ out_bad: static inline int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) + kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf integ_buf; - u32 *integ_len = NULL; + __be32 *integ_len = NULL; struct xdr_netobj mic; - u32 offset, *q; + u32 offset; + __be32 *q; struct kvec *iov; u32 maj_stat = 0; int status = -EIO; @@ -1032,13 +1033,13 @@ out: static inline int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) + kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; u32 offset; u32 maj_stat; int status; - u32 *opaque_len; + __be32 *opaque_len; struct page **inpages; int first; int pad; @@ -1095,7 +1096,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_wrap_req(struct rpc_task *task, - kxdrproc_t encode, void *rqstp, u32 *p, void *obj) + kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, @@ -1132,7 +1133,7 @@ out: static inline int gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, u32 **p) + struct rpc_rqst *rqstp, __be32 **p) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct xdr_buf integ_buf; @@ -1169,7 +1170,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static inline int gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, u32 **p) + struct rpc_rqst *rqstp, __be32 **p) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; u32 offset; @@ -1198,13 +1199,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_unwrap_resp(struct rpc_task *task, - kxdrproc_t decode, void *rqstp, u32 *p, void *obj) + kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 *savedp = p; + __be32 *savedp = p; struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; int savedlen = head->iov_len; int status = -EIO; diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 2f312164d6d5..08601ee4cd73 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -115,7 +115,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; - *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index f179415d0c38..cc45c1605f80 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -177,9 +177,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, msg_start = krb5_hdr + 24; /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); - *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg); make_confounder(msg_start, blocksize); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index cd3d77ca3a31..a2587e8eef7b 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -640,7 +640,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) */ static int gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, - u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp) + __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp) { struct gss_ctx *ctx_id = rsci->mechctx; struct xdr_buf rpchdr; @@ -687,7 +687,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, static int gss_write_null_verf(struct svc_rqst *rqstp) { - u32 *p; + __be32 *p; svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; @@ -701,11 +701,11 @@ gss_write_null_verf(struct svc_rqst *rqstp) static int gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) { - u32 xdr_seq; + __be32 xdr_seq; u32 maj_stat; struct xdr_buf verf_data; struct xdr_netobj mic; - u32 *p; + __be32 *p; struct kvec iov; svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); @@ -782,7 +782,7 @@ EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); static inline int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) { - u32 raw; + __be32 raw; int status; status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); @@ -905,7 +905,7 @@ struct gss_svc_data { struct rpc_gss_wire_cred clcred; /* pointer to the beginning of the procedure-specific results, * which may be encrypted/checksummed in svcauth_gss_release: */ - u32 *body_start; + __be32 *body_start; struct rsc *rsci; }; @@ -946,7 +946,7 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) * response here and return SVC_COMPLETE. */ static int -svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -956,8 +956,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; struct rsi *rsip, rsikey; - u32 *rpcstart; - u32 *reject_stat = resv->iov_base + resv->iov_len; + __be32 *rpcstart; + __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",argv->iov_len); @@ -1156,7 +1156,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) struct xdr_buf integ_buf; struct xdr_netobj mic; struct kvec *resv; - u32 *p; + __be32 *p; int integ_offset, integ_len; int stat = -EINVAL; @@ -1219,7 +1219,7 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; struct page **inpages = NULL; - u32 *p; + __be32 *p; int offset, *len; int pad; @@ -1264,7 +1264,7 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) return -ENOMEM; *len = htonl(resbuf->len - offset); pad = 3 - ((resbuf->len - offset - 1)&3); - p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); + p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); memset(p, 0, pad); resbuf->tail[0].iov_len += pad; resbuf->len += pad; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 2eccffa96ba1..3be257dc32b2 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -60,8 +60,8 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) /* * Marshal credential. */ -static u32 * -nul_marshal(struct rpc_task *task, u32 *p) +static __be32 * +nul_marshal(struct rpc_task *task, __be32 *p) { *p++ = htonl(RPC_AUTH_NULL); *p++ = 0; @@ -81,8 +81,8 @@ nul_refresh(struct rpc_task *task) return 0; } -static u32 * -nul_validate(struct rpc_task *task, u32 *p) +static __be32 * +nul_validate(struct rpc_task *task, __be32 *p) { rpc_authflavor_t flavor; u32 size; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 74c7406a1054..f7f990c9afe2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -137,12 +137,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static u32 * -unx_marshal(struct rpc_task *task, u32 *p) +static __be32 * +unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; - u32 *base, *hold; + __be32 *base, *hold; int i; *p++ = htonl(RPC_AUTH_UNIX); @@ -178,8 +178,8 @@ unx_refresh(struct rpc_task *task) return 0; } -static u32 * -unx_validate(struct rpc_task *task, u32 *p) +static __be32 * +unx_validate(struct rpc_task *task, __be32 *p) { rpc_authflavor_t flavor; u32 size; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 084a0ad5c64e..124ff0ceb55b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -60,8 +60,8 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static u32 * call_header(struct rpc_task *task); -static u32 * call_verify(struct rpc_task *task); +static __be32 * call_header(struct rpc_task *task); +static __be32 * call_verify(struct rpc_task *task); static int @@ -782,7 +782,7 @@ call_encode(struct rpc_task *task) struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; - u32 *p; + __be32 *p; dprintk("RPC: %4d call_encode (status %d)\n", task->tk_pid, task->tk_status); @@ -1100,7 +1100,7 @@ call_decode(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode; - u32 *p; + __be32 *p; dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); @@ -1197,12 +1197,12 @@ call_refreshresult(struct rpc_task *task) /* * Call header serialization */ -static u32 * +static __be32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - u32 *p = req->rq_svec[0].iov_base; + __be32 *p = req->rq_svec[0].iov_base; /* FIXME: check buffer size? */ @@ -1221,12 +1221,13 @@ call_header(struct rpc_task *task) /* * Reply header verification */ -static u32 * +static __be32 * call_verify(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; - u32 *p = iov->iov_base, n; + __be32 *p = iov->iov_base; + u32 n; int error = -EACCES; if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { @@ -1303,7 +1304,7 @@ call_verify(struct rpc_task *task) printk(KERN_WARNING "call_verify: auth check failed\n"); goto out_garbage; /* bad verifier, retry */ } - len = p - (u32 *)iov->iov_base - 1; + len = p - (__be32 *)iov->iov_base - 1; if (len < 0) goto out_overflow; switch ((n = ntohl(*p++))) { @@ -1358,12 +1359,12 @@ out_overflow: goto out_garbage; } -static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) +static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj) { return 0; } -static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj) +static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj) { return 0; } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index c04609d3476a..919d5ba7ca0a 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -300,7 +300,7 @@ static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, /* * XDR encode/decode functions for PMAP */ -static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) +static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map) { dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); @@ -313,13 +313,13 @@ static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args return 0; } -static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) +static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); return 0; } -static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) +static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); return 0; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6589e1ad47fa..44b8d9d4c18a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -256,11 +256,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) struct kvec * argv = &rqstp->rq_arg.head[0]; struct kvec * resv = &rqstp->rq_res.head[0]; kxdrproc_t xdr; - u32 *statp; - u32 dir, prog, vers, proc, - auth_stat, rpc_stat; + __be32 *statp; + u32 dir, prog, vers, proc; + __be32 auth_stat, rpc_stat; int auth_res; - u32 *accept_statp; + __be32 *accept_statp; rpc_stat = rpc_success; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 3f4d1f622de8..8f2320aded5c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -35,7 +35,7 @@ static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = { }; int -svc_authenticate(struct svc_rqst *rqstp, u32 *authp) +svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) { rpc_authflavor_t flavor; struct auth_ops *aops; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 27f443b44af8..f98229fb5e10 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -145,7 +145,7 @@ static void ip_map_request(struct cache_detail *cd, { char text_addr[20]; struct ip_map *im = container_of(h, struct ip_map, h); - __u32 addr = im->m_addr.s_addr; + __be32 addr = im->m_addr.s_addr; snprintf(text_addr, 20, "%u.%u.%u.%u", ntohl(addr) >> 24 & 0xff, @@ -410,7 +410,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) } static int -svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -472,7 +472,7 @@ struct auth_ops svcauth_null = { static int -svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -491,7 +491,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */ if (slen > 64 || (len -= (slen + 3)*4) < 0) goto badcred; - argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */ + argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; cred->cr_uid = svc_getnl(argv); /* uid */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 953aff89bcac..f09f7487051f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1030,7 +1030,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp) { struct xdr_buf *xbufp = &rqstp->rq_res; int sent; - u32 reclen; + __be32 reclen; /* Set up the first element of the reply kvec. * Any other kvecs that may be in use have been taken diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6ac45103a272..9022eb8b37ed 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -18,8 +18,8 @@ /* * XDR functions for basic NFS types */ -u32 * -xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) +__be32 * +xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) { unsigned int quadlen = XDR_QUADLEN(obj->len); @@ -29,8 +29,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) return p + XDR_QUADLEN(obj->len); } -u32 * -xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) +__be32 * +xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) { unsigned int len; @@ -55,7 +55,7 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) * Returns the updated current XDR buffer position * */ -u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes) +__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int nbytes) { if (likely(nbytes != 0)) { unsigned int quadlen = XDR_QUADLEN(nbytes); @@ -79,21 +79,21 @@ EXPORT_SYMBOL(xdr_encode_opaque_fixed); * * Returns the updated current XDR buffer position */ -u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes) +__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { *p++ = htonl(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL(xdr_encode_opaque); -u32 * -xdr_encode_string(u32 *p, const char *string) +__be32 * +xdr_encode_string(__be32 *p, const char *string) { return xdr_encode_array(p, string, strlen(string)); } -u32 * -xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) +__be32 * +xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) { unsigned int len; @@ -432,7 +432,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) * of the buffer length, and takes care of adjusting the kvec * length for us. */ -void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) +void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; @@ -440,8 +440,8 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; - xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len); - xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len); + xdr->p = (__be32 *)((char *)iov->iov_base + iov->iov_len); + xdr->end = (__be32 *)((char *)iov->iov_base + scratch_len); BUG_ON(iov->iov_len > scratch_len); if (p != xdr->p && p != NULL) { @@ -465,10 +465,10 @@ EXPORT_SYMBOL(xdr_init_encode); * bytes of data. If so, update the total xdr_buf length, and * adjust the length of the current kvec. */ -uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) +__be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) { - uint32_t *p = xdr->p; - uint32_t *q; + __be32 *p = xdr->p; + __be32 *q; /* align nbytes on the next 32-bit boundary */ nbytes += 3; @@ -524,7 +524,7 @@ EXPORT_SYMBOL(xdr_write_pages); * @buf: pointer to XDR buffer from which to decode data * @p: current pointer inside XDR buffer */ -void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) +void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { struct kvec *iov = buf->head; unsigned int len = iov->iov_len; @@ -534,7 +534,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) xdr->buf = buf; xdr->iov = iov; xdr->p = p; - xdr->end = (uint32_t *)((char *)iov->iov_base + len); + xdr->end = (__be32 *)((char *)iov->iov_base + len); } EXPORT_SYMBOL(xdr_init_decode); @@ -548,10 +548,10 @@ EXPORT_SYMBOL(xdr_init_decode); * If so return the current pointer, then update the current * pointer position. */ -uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) +__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { - uint32_t *p = xdr->p; - uint32_t *q = p + XDR_QUADLEN(nbytes); + __be32 *p = xdr->p; + __be32 *q = p + XDR_QUADLEN(nbytes); if (unlikely(q > xdr->end || q < p)) return NULL; @@ -599,8 +599,8 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) * Position current pointer at beginning of tail, and * set remaining message length. */ - xdr->p = (uint32_t *)((char *)iov->iov_base + padding); - xdr->end = (uint32_t *)((char *)iov->iov_base + end); + xdr->p = (__be32 *)((char *)iov->iov_base + padding); + xdr->end = (__be32 *)((char *)iov->iov_base + end); } EXPORT_SYMBOL(xdr_read_pages); @@ -624,8 +624,8 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) */ if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) len = PAGE_CACHE_SIZE - xdr->buf->page_base; - xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base); - xdr->end = (uint32_t *)((char *)xdr->p + len); + xdr->p = (__be32 *)(kaddr + xdr->buf->page_base); + xdr->end = (__be32 *)((char *)xdr->p + len); } EXPORT_SYMBOL(xdr_enter_page); @@ -743,7 +743,7 @@ out: int xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { - u32 raw; + __be32 raw; int status; status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); @@ -756,7 +756,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) int xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) { - u32 raw = htonl(obj); + __be32 raw = htonl(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1f786f68729d..80857470dc11 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -594,7 +594,7 @@ static void xprt_connect_status(struct rpc_task *task) * @xid: RPC XID of incoming reply * */ -struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) +struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { struct list_head *pos; @@ -801,7 +801,7 @@ void xprt_reserve(struct rpc_task *task) spin_unlock(&xprt->reserve_lock); } -static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) +static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { return xprt->xid++; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9b62923a9c06..28100e019225 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -548,7 +548,8 @@ static void xs_udp_data_ready(struct sock *sk, int len) struct rpc_rqst *rovr; struct sk_buff *skb; int err, repsize, copied; - u32 _xid, *xp; + u32 _xid; + __be32 *xp; read_lock(&sk->sk_callback_lock); dprintk("RPC: xs_udp_data_ready...\n"); -- cgit v1.2.3-71-gd317 From 126a336822a6594662f5898f1ddf33e6d048fcc7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 27 Sep 2006 16:03:07 -0700 Subject: [TG3]: Add 5722 and 5756 support. Add IDs to support 5722 and 5756. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 3 +++ drivers/net/tg3.h | 4 +++- include/linux/pci_ids.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 14e964524969..d443b7372325 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -173,6 +173,7 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)}, @@ -187,6 +188,7 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5756)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)}, @@ -11273,6 +11275,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp) case PHY_ID_BCM5780: return "5780"; case PHY_ID_BCM5755: return "5755"; case PHY_ID_BCM5787: return "5787"; + case PHY_ID_BCM5756: return "5722/5756"; case PHY_ID_BCM8002: return "8002/serdes"; case 0: return "serdes"; default: return "unknown"; diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index f7462c2ccc0a..feed13dc8719 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2282,6 +2282,7 @@ struct tg3 { #define PHY_ID_BCM5780 0x60008350 #define PHY_ID_BCM5755 0xbc050cc0 #define PHY_ID_BCM5787 0xbc050ce0 +#define PHY_ID_BCM5756 0xbc050ed0 #define PHY_ID_BCM8002 0x60010140 #define PHY_ID_INVALID 0xffffffff #define PHY_ID_REV_MASK 0x0000000f @@ -2308,7 +2309,8 @@ struct tg3 { (X) == PHY_ID_BCM5705 || (X) == PHY_ID_BCM5750 || \ (X) == PHY_ID_BCM5752 || (X) == PHY_ID_BCM5714 || \ (X) == PHY_ID_BCM5780 || (X) == PHY_ID_BCM5787 || \ - (X) == PHY_ID_BCM5755 || (X) == PHY_ID_BCM8002) + (X) == PHY_ID_BCM5755 || (X) == PHY_ID_BCM5756 || \ + (X) == PHY_ID_BCM8002) struct tg3_hw_stats *hw_stats; dma_addr_t stats_mapping; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 61db1907f06f..ea3140d226e6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1904,6 +1904,7 @@ #define PCI_DEVICE_ID_TIGON3_5705_2 0x1654 #define PCI_DEVICE_ID_TIGON3_5720 0x1658 #define PCI_DEVICE_ID_TIGON3_5721 0x1659 +#define PCI_DEVICE_ID_TIGON3_5722 0x165a #define PCI_DEVICE_ID_TIGON3_5705M 0x165d #define PCI_DEVICE_ID_TIGON3_5705M_2 0x165e #define PCI_DEVICE_ID_TIGON3_5714 0x1668 @@ -1913,6 +1914,7 @@ #define PCI_DEVICE_ID_TIGON3_5705F 0x166e #define PCI_DEVICE_ID_TIGON3_5754M 0x1672 #define PCI_DEVICE_ID_TIGON3_5755M 0x1673 +#define PCI_DEVICE_ID_TIGON3_5756 0x1674 #define PCI_DEVICE_ID_TIGON3_5750 0x1676 #define PCI_DEVICE_ID_TIGON3_5751 0x1677 #define PCI_DEVICE_ID_TIGON3_5715 0x1678 -- cgit v1.2.3-71-gd317 From b5d3772ccbe0bc5ac8ffbb5356b74ca698aee28c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 27 Sep 2006 16:06:21 -0700 Subject: [TG3]: Add basic 5906 support. Add support for the new 5709 device. This is a new 10/100 Mbps chip. The mailbox access and firmware interface are quite different from all other tg3 chips. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 126 +++++++++++++++++++++++++++++++++++++++++++----- drivers/net/tg3.h | 28 +++++++++-- include/linux/pci_ids.h | 2 + 3 files changed, 141 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index eafca2a0dd00..2b062d776511 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -199,6 +199,8 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906M)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)}, {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)}, @@ -426,6 +428,16 @@ static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) readl(mbox); } +static u32 tg3_read32_mbox_5906(struct tg3 *tp, u32 off) +{ + return (readl(tp->regs + off + GRCMBOX_BASE)); +} + +static void tg3_write32_mbox_5906(struct tg3 *tp, u32 off, u32 val) +{ + writel(val, tp->regs + off + GRCMBOX_BASE); +} + #define tw32_mailbox(reg, val) tp->write32_mbox(tp, reg, val) #define tw32_mailbox_f(reg, val) tw32_mailbox_flush(tp, (reg), (val)) #define tw32_rx_mbox(reg, val) tp->write32_rx_mbox(tp, reg, val) @@ -441,6 +453,10 @@ static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val) { unsigned long flags; + if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) && + (off >= NIC_SRAM_STATS_BLK) && (off < NIC_SRAM_TX_BUFFER_DESC)) + return; + spin_lock_irqsave(&tp->indirect_lock, flags); if (tp->tg3_flags & TG3_FLAG_SRAM_USE_CONFIG) { pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); @@ -462,6 +478,12 @@ static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val) { unsigned long flags; + if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) && + (off >= NIC_SRAM_STATS_BLK) && (off < NIC_SRAM_TX_BUFFER_DESC)) { + *val = 0; + return; + } + spin_lock_irqsave(&tp->indirect_lock, flags); if (tp->tg3_flags & TG3_FLAG_SRAM_USE_CONFIG) { pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); @@ -491,6 +513,9 @@ static inline void tg3_cond_int(struct tg3 *tp) if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) && (tp->hw_status->status & SD_STATUS_UPDATED)) tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl | GRC_LCLCTRL_SETINT); + else + tw32(HOSTCC_MODE, tp->coalesce_mode | + (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW)); } static void tg3_enable_ints(struct tg3 *tp) @@ -656,6 +681,10 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val) unsigned int loops; int ret; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906 && + (reg == MII_TG3_CTRL || reg == MII_TG3_AUX_CTRL)) + return 0; + if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) { tw32_f(MAC_MI_MODE, (tp->mi_mode & ~MAC_MI_MODE_AUTO_POLL)); @@ -1207,7 +1236,12 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) tg3_setup_phy(tp, 0); } - if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) { + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + u32 val; + + val = tr32(GRC_VCPU_EXT_CTRL); + tw32(GRC_VCPU_EXT_CTRL, val | GRC_VCPU_EXT_CTRL_DISABLE_WOL); + } else if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) { int i; u32 val; @@ -4667,6 +4701,15 @@ static int tg3_poll_fw(struct tg3 *tp) int i; u32 val; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + for (i = 0; i < 400; i++) { + if (tr32(VCPU_STATUS) & VCPU_STATUS_INIT_DONE) + return 0; + udelay(10); + } + return -ENODEV; + } + /* Wait for firmware initialization to complete. */ for (i = 0; i < 100000; i++) { tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val); @@ -4735,6 +4778,12 @@ static int tg3_chip_reset(struct tg3 *tp) } } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + tw32(VCPU_STATUS, tr32(VCPU_STATUS) | VCPU_STATUS_DRV_RESET); + tw32(GRC_VCPU_EXT_CTRL, + tr32(GRC_VCPU_EXT_CTRL) & ~GRC_VCPU_EXT_CTRL_HALT_CPU); + } + if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) val |= GRC_MISC_CFG_KEEP_GPHY_POWER; tw32(GRC_MISC_CFG, val); @@ -5066,6 +5115,12 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) BUG_ON(offset == TX_CPU_BASE && (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + u32 val = tr32(GRC_VCPU_EXT_CTRL); + + tw32(GRC_VCPU_EXT_CTRL, val | GRC_VCPU_EXT_CTRL_HALT_CPU); + return 0; + } if (offset == RX_CPU_BASE) { for (i = 0; i < 10000; i++) { tw32(offset + CPU_STATE, 0xffffffff); @@ -6070,6 +6125,13 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) val = 1; else if (val > tp->rx_std_max_post) val = tp->rx_std_max_post; + else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + if (tp->pci_chip_rev_id == CHIPREV_ID_5906_A1) + tw32(ISO_PKT_TX, (tr32(ISO_PKT_TX) & ~0x3) | 0x2); + + if (val > (TG3_RX_INTERNAL_RING_SZ_5906 / 2)) + val = TG3_RX_INTERNAL_RING_SZ_5906 / 2; + } tw32(RCVBDI_STD_THRESH, val); @@ -6984,9 +7046,10 @@ static int tg3_open(struct net_device *dev) if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) { if (tp->tg3_flags2 & TG3_FLG2_1SHOT_MSI) { - u32 val = tr32(0x7c04); + u32 val = tr32(PCIE_TRANSACTION_CFG); - tw32(0x7c04, val | (1 << 29)); + tw32(PCIE_TRANSACTION_CFG, + val | PCIE_TRANS_CFG_1SHOT_MSI); } } } @@ -7941,7 +8004,8 @@ static int tg3_set_tso(struct net_device *dev, u32 value) return -EINVAL; return 0; } - if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) { + if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) && + (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)) { if (value) dev->features |= NETIF_F_TSO6; else @@ -9257,6 +9321,13 @@ static void __devinit tg3_get_5787_nvram_info(struct tg3 *tp) } } +static void __devinit tg3_get_5906_nvram_info(struct tg3 *tp) +{ + tp->nvram_jedecnum = JEDEC_ATMEL; + tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED; + tp->nvram_pagesize = ATMEL_AT24C512_CHIP_SIZE; +} + /* Chips other than 5700/5701 use the NVRAM for fetching info. */ static void __devinit tg3_nvram_init(struct tg3 *tp) { @@ -9293,6 +9364,8 @@ static void __devinit tg3_nvram_init(struct tg3 *tp) tg3_get_5755_nvram_info(tp); else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) tg3_get_5787_nvram_info(tp); + else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + tg3_get_5906_nvram_info(tp); else tg3_get_nvram_info(tp); @@ -9766,6 +9839,12 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) /* Assume an onboard device by default. */ tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + if (!(tr32(PCIE_TRANSACTION_CFG) & PCIE_TRANS_CFG_LOM)) + tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + return; + } + tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val); if (val == NIC_SRAM_DATA_SIG_MAGIC) { u32 nic_cfg, led_cfg; @@ -10097,7 +10176,10 @@ static void __devinit tg3_read_partno(struct tg3 *tp) } out_not_found: - strcpy(tp->board_part_number, "none"); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + strcpy(tp->board_part_number, "BCM95906"); + else + strcpy(tp->board_part_number, "none"); } static void __devinit tg3_read_fw_ver(struct tg3 *tp) @@ -10299,6 +10381,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906 || (tp->tg3_flags2 & TG3_FLG2_5780_CLASS)) tp->tg3_flags2 |= TG3_FLG2_5750_PLUS; @@ -10308,7 +10391,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS) { if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) { + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { tp->tg3_flags2 |= TG3_FLG2_HW_TSO_2; tp->tg3_flags2 |= TG3_FLG2_1SHOT_MSI; } else { @@ -10325,7 +10409,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750 && GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5752 && GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5755 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787) + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787 && + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906) tp->tg3_flags2 |= TG3_FLG2_JUMBO_CAPABLE; if (pci_find_capability(tp->pdev, PCI_CAP_ID_EXP) != 0) @@ -10455,6 +10540,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) pci_cmd &= ~PCI_COMMAND_MEMORY; pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd); } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + tp->read32_mbox = tg3_read32_mbox_5906; + tp->write32_mbox = tg3_write32_mbox_5906; + tp->write32_tx_mbox = tg3_write32_mbox_5906; + tp->write32_rx_mbox = tg3_write32_mbox_5906; + } if (tp->write32 == tg3_write_indirect_reg32 || ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) && @@ -10526,6 +10617,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) && (tp->pci_chip_rev_id != CHIPREV_ID_5705_A0) && (tp->pci_chip_rev_id != CHIPREV_ID_5705_A1)) || + (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) || (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES)) tp->tg3_flags2 |= TG3_FLG2_NO_ETH_WIRE_SPEED; @@ -10539,7 +10631,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) tp->tg3_flags2 |= TG3_FLG2_PHY_JITTER_BUG; - else + else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906) tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG; } @@ -10629,7 +10721,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->pdev->device == PCI_DEVICE_ID_TIGON3_5705F)) || (tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM && (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F || - tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F))) + tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F)) || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tp->tg3_flags |= TG3_FLAG_10_100_ONLY; err = tg3_phy_probe(tp); @@ -10680,7 +10773,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) * straddle the 4GB address boundary in some cases. */ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tp->dev->hard_start_xmit = tg3_start_xmit; else tp->dev->hard_start_xmit = tg3_start_xmit_dma_bug; @@ -10761,6 +10855,8 @@ static int __devinit tg3_get_device_address(struct tg3 *tp) else tg3_nvram_unlock(tp); } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + mac_offset = 0x10; /* First try to get it from MAC address mailbox. */ tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_HIGH_MBOX, &hi); @@ -11244,6 +11340,12 @@ static void __devinit tg3_init_bufmgr_config(struct tg3 *tp) DEFAULT_MB_MACRX_LOW_WATER_5705; tp->bufmgr_config.mbuf_high_water = DEFAULT_MB_HIGH_WATER_5705; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { + tp->bufmgr_config.mbuf_mac_rx_low_water = + DEFAULT_MB_MACRX_LOW_WATER_5906; + tp->bufmgr_config.mbuf_high_water = + DEFAULT_MB_HIGH_WATER_5906; + } tp->bufmgr_config.mbuf_read_dma_low_water_jumbo = DEFAULT_MB_RDMA_LOW_WATER_JUMBO_5780; @@ -11288,6 +11390,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp) case PHY_ID_BCM5755: return "5755"; case PHY_ID_BCM5787: return "5787"; case PHY_ID_BCM5756: return "5722/5756"; + case PHY_ID_BCM5906: return "5906"; case PHY_ID_BCM8002: return "8002/serdes"; case 0: return "serdes"; default: return "unknown"; @@ -11590,7 +11693,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, */ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) { dev->features |= NETIF_F_TSO; - if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) + if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) && + (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)) dev->features |= NETIF_F_TSO6; } diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index feed13dc8719..2f5e00c96016 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -24,6 +24,8 @@ #define RX_COPY_THRESHOLD 256 +#define TG3_RX_INTERNAL_RING_SZ_5906 32 + #define RX_STD_MAX_SIZE 1536 #define RX_STD_MAX_SIZE_5705 512 #define RX_JUMBO_MAX_SIZE 0xdeadbeef /* XXX */ @@ -129,6 +131,7 @@ #define CHIPREV_ID_5752_A0_HW 0x5000 #define CHIPREV_ID_5752_A0 0x6000 #define CHIPREV_ID_5752_A1 0x6001 +#define CHIPREV_ID_5906_A1 0xc001 #define GET_ASIC_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 12) #define ASIC_REV_5700 0x07 #define ASIC_REV_5701 0x00 @@ -141,6 +144,7 @@ #define ASIC_REV_5714 0x09 #define ASIC_REV_5755 0x0a #define ASIC_REV_5787 0x0b +#define ASIC_REV_5906 0x0c #define GET_CHIP_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 8) #define CHIPREV_5700_AX 0x70 #define CHIPREV_5700_BX 0x71 @@ -646,7 +650,8 @@ #define SNDDATAI_SCTRL_FORCE_ZERO 0x00000010 #define SNDDATAI_STATSENAB 0x00000c0c #define SNDDATAI_STATSINCMASK 0x00000c10 -/* 0xc14 --> 0xc80 unused */ +#define ISO_PKT_TX 0x00000c20 +/* 0xc24 --> 0xc80 unused */ #define SNDDATAI_COS_CNT_0 0x00000c80 #define SNDDATAI_COS_CNT_1 0x00000c84 #define SNDDATAI_COS_CNT_2 0x00000c88 @@ -997,11 +1002,13 @@ #define BUFMGR_MB_MACRX_LOW_WATER 0x00004414 #define DEFAULT_MB_MACRX_LOW_WATER 0x00000020 #define DEFAULT_MB_MACRX_LOW_WATER_5705 0x00000010 +#define DEFAULT_MB_MACRX_LOW_WATER_5906 0x00000004 #define DEFAULT_MB_MACRX_LOW_WATER_JUMBO 0x00000098 #define DEFAULT_MB_MACRX_LOW_WATER_JUMBO_5780 0x0000004b #define BUFMGR_MB_HIGH_WATER 0x00004418 #define DEFAULT_MB_HIGH_WATER 0x00000060 #define DEFAULT_MB_HIGH_WATER_5705 0x00000060 +#define DEFAULT_MB_HIGH_WATER_5906 0x00000010 #define DEFAULT_MB_HIGH_WATER_JUMBO 0x0000017c #define DEFAULT_MB_HIGH_WATER_JUMBO_5780 0x00000096 #define BUFMGR_RX_MB_ALLOC_REQ 0x0000441c @@ -1138,7 +1145,12 @@ #define TX_CPU_STATE 0x00005404 #define TX_CPU_PGMCTR 0x0000541c +#define VCPU_STATUS 0x00005100 +#define VCPU_STATUS_INIT_DONE 0x04000000 +#define VCPU_STATUS_DRV_RESET 0x08000000 + /* Mailboxes */ +#define GRCMBOX_BASE 0x00005600 #define GRCMBOX_INTERRUPT_0 0x00005800 /* 64-bit */ #define GRCMBOX_INTERRUPT_1 0x00005808 /* 64-bit */ #define GRCMBOX_INTERRUPT_2 0x00005810 /* 64-bit */ @@ -1398,7 +1410,10 @@ #define GRC_EEPROM_CTRL 0x00006840 #define GRC_MDI_CTRL 0x00006844 #define GRC_SEEPROM_DELAY 0x00006848 -/* 0x684c --> 0x6c00 unused */ +/* 0x684c --> 0x6890 unused */ +#define GRC_VCPU_EXT_CTRL 0x00006890 +#define GRC_VCPU_EXT_CTRL_HALT_CPU 0x00400000 +#define GRC_VCPU_EXT_CTRL_DISABLE_WOL 0x20000000 #define GRC_FASTBOOT_PC 0x00006894 /* 5752, 5755, 5787 */ /* 0x6c00 --> 0x7000 unused */ @@ -1485,7 +1500,11 @@ #define NVRAM_WRITE1 0x00007028 /* 0x702c --> 0x7400 unused */ -/* 0x7400 --> 0x8000 unused */ +/* 0x7400 --> 0x7c00 unused */ +#define PCIE_TRANSACTION_CFG 0x00007c04 +#define PCIE_TRANS_CFG_1SHOT_MSI 0x20000000 +#define PCIE_TRANS_CFG_LOM 0x00000020 + #define TG3_EEPROM_MAGIC 0x669955aa @@ -2283,6 +2302,7 @@ struct tg3 { #define PHY_ID_BCM5755 0xbc050cc0 #define PHY_ID_BCM5787 0xbc050ce0 #define PHY_ID_BCM5756 0xbc050ed0 +#define PHY_ID_BCM5906 0xdc00ac40 #define PHY_ID_BCM8002 0x60010140 #define PHY_ID_INVALID 0xffffffff #define PHY_ID_REV_MASK 0x0000000f @@ -2310,7 +2330,7 @@ struct tg3 { (X) == PHY_ID_BCM5752 || (X) == PHY_ID_BCM5714 || \ (X) == PHY_ID_BCM5780 || (X) == PHY_ID_BCM5787 || \ (X) == PHY_ID_BCM5755 || (X) == PHY_ID_BCM5756 || \ - (X) == PHY_ID_BCM8002) + (X) == PHY_ID_BCM5906 || (X) == PHY_ID_BCM8002) struct tg3_hw_stats *hw_stats; dma_addr_t stats_mapping; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ea3140d226e6..b7e85ff045ea 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1944,6 +1944,8 @@ #define PCI_DEVICE_ID_TIGON3_5901 0x170d #define PCI_DEVICE_ID_BCM4401B1 0x170c #define PCI_DEVICE_ID_TIGON3_5901_2 0x170e +#define PCI_DEVICE_ID_TIGON3_5906 0x1712 +#define PCI_DEVICE_ID_TIGON3_5906M 0x1713 #define PCI_DEVICE_ID_BCM4401 0x4401 #define PCI_DEVICE_ID_BCM4401B0 0x4402 -- cgit v1.2.3-71-gd317 From 00a5020cd51febbb3166ff7a09a2901c47ba251a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:29:47 -0700 Subject: [IPV4]: annotate ipv4 address fields in struct ip_msfilter and struct ip_mreq_source Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/in.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index bcaca8399aed..d79fc75fa7c2 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -123,17 +123,17 @@ struct ip_mreqn }; struct ip_mreq_source { - __u32 imr_multiaddr; - __u32 imr_interface; - __u32 imr_sourceaddr; + __be32 imr_multiaddr; + __be32 imr_interface; + __be32 imr_sourceaddr; }; struct ip_msfilter { - __u32 imsf_multiaddr; - __u32 imsf_interface; + __be32 imsf_multiaddr; + __be32 imsf_interface; __u32 imsf_fmode; __u32 imsf_numsrc; - __u32 imsf_slist[1]; + __be32 imsf_slist[1]; }; #define IP_MSFILTER_SIZE(numsrc) \ -- cgit v1.2.3-71-gd317 From 8f935bbd7c6c66796c2403aefdab74bb48045bf6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:30:07 -0700 Subject: [IPV4]: ip_mc_{inc,dec}_group() annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/igmp.h | 4 ++-- net/ipv4/igmp.c | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 8e7eedb3a574..dd49ba9065ae 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -215,7 +215,7 @@ extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); extern void ip_mc_up(struct in_device *); extern void ip_mc_down(struct in_device *); -extern void ip_mc_dec_group(struct in_device *in_dev, u32 addr); -extern void ip_mc_inc_group(struct in_device *in_dev, u32 addr); +extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); +extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); #endif #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8068c483781..a2e733a82de2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -144,8 +144,8 @@ static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); #endif static void ip_mc_clear_src(struct ip_mc_list *pmc); -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta); +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta); static void ip_ma_put(struct ip_mc_list *im) { @@ -1193,7 +1193,7 @@ static void igmp_group_added(struct ip_mc_list *im) * A socket has joined a multicast group on device dev. */ -void ip_mc_inc_group(struct in_device *in_dev, u32 addr) +void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *im; @@ -1252,7 +1252,7 @@ out: * A socket has left a multicast group on device dev */ -void ip_mc_dec_group(struct in_device *in_dev, u32 addr) +void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *i, **ip; @@ -1402,7 +1402,7 @@ int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; int rv = 0; @@ -1450,8 +1450,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #define igmp_ifc_event(x) do { } while (0) #endif -static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int changerec = 0; @@ -1517,7 +1517,7 @@ out_unlock: * Add multicast single-source filter to the interface list */ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc, int delta) + __be32 *psfsrc, int delta) { struct ip_sf_list *psf, *psf_prev; @@ -1623,8 +1623,8 @@ static int sf_setstate(struct ip_mc_list *pmc) /* * Add multicast source filter list to the interface list */ -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int isexclude; @@ -1717,7 +1717,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; - u32 addr = imr->imr_multiaddr.s_addr; + __be32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -1791,7 +1791,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; - u32 group = imr->imr_multiaddr.s_addr; + __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; int ret = -EADDRNOTAVAIL; -- cgit v1.2.3-71-gd317 From 942bf921e922560c05fde6afb00ddedf6224c608 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:30:28 -0700 Subject: [IPV4]: IGMP on-the-wire data is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/igmp.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index dd49ba9065ae..fd207d9b2733 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -30,8 +30,8 @@ struct igmphdr { __u8 type; __u8 code; /* For newer IGMP */ - __u16 csum; - __u32 group; + __be16 csum; + __be32 group; }; /* V3 group record types [grec_type] */ @@ -45,25 +45,25 @@ struct igmphdr struct igmpv3_grec { __u8 grec_type; __u8 grec_auxwords; - __u16 grec_nsrcs; - __u32 grec_mca; - __u32 grec_src[0]; + __be16 grec_nsrcs; + __be32 grec_mca; + __be32 grec_src[0]; }; struct igmpv3_report { __u8 type; __u8 resv1; - __u16 csum; - __u16 resv2; - __u16 ngrec; + __be16 csum; + __be16 resv2; + __be16 ngrec; struct igmpv3_grec grec[0]; }; struct igmpv3_query { __u8 type; __u8 code; - __u16 csum; - __u32 group; + __be16 csum; + __be32 group; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 qrv:3, suppress:1, @@ -76,8 +76,8 @@ struct igmpv3_query { #error "Please fix " #endif __u8 qqic; - __u16 nsrcs; - __u32 srcs[0]; + __be16 nsrcs; + __be32 srcs[0]; }; #define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */ -- cgit v1.2.3-71-gd317 From ea4d9e7220d32348cc9742ba6d27de5165262664 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:30:52 -0700 Subject: [IPV4]: struct ip_sf_list and struct ip_sf_socklist annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/igmp.h | 4 ++-- net/ipv4/igmp.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index fd207d9b2733..4e9f3fe77cf9 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -136,7 +136,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; - __u32 sl_addr[0]; + __be32 sl_addr[0]; }; #define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ @@ -159,7 +159,7 @@ struct ip_mc_socklist struct ip_sf_list { struct ip_sf_list *sf_next; - __u32 sf_inaddr; + __be32 sf_inaddr; unsigned long sf_count[2]; /* include/exclude counts */ unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a2e733a82de2..f6ca51a2dcc2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -426,7 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, first = 1; psf_prev = NULL; for (psf=*psf_list; psf; psf=psf_next) { - u32 *psrc; + __be32 *psrc; psf_next = psf->sf_next; @@ -455,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } - psrc = (u32 *)skb_put(skb, sizeof(u32)); + psrc = (__be32 *)skb_put(skb, sizeof(u32)); *psrc = psf->sf_inaddr; scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || @@ -748,7 +748,7 @@ static void igmp_timer_expire(unsigned long data) } /* mark EXCLUDE-mode sources */ -static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; @@ -775,7 +775,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; -- cgit v1.2.3-71-gd317 From c0cda068aac3481d40795b115e4fd36f7d386e3a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:31:10 -0700 Subject: [IPV4]: ip_mc_sf_allow() annotated ip_mc_sf_allow() expects addresses to be passed net-endian. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- net/ipv4/igmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 4e9f3fe77cf9..7514cceb4fe3 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -209,7 +209,7 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_msfilter __user *optval, int __user *optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, struct group_filter __user *optval, int __user *optlen); -extern int ip_mc_sf_allow(struct sock *sk, u32 local, u32 rmt, int dif); +extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif); extern void ip_mr_init(void); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f6ca51a2dcc2..a2c7b2b1b3fb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2156,7 +2156,7 @@ done: /* * check if a multicast source filter allows delivery for a given */ -int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif) +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; -- cgit v1.2.3-71-gd317 From 63007727e0bb09e8d906f73d36a09b9fac0d5893 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:31:32 -0700 Subject: [IPV4]: trivial igmp annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- net/ipv4/igmp.c | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 7514cceb4fe3..03f43e2893a4 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -140,7 +140,7 @@ struct ip_sf_socklist }; #define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ - (count) * sizeof(__u32)) + (count) * sizeof(__be32)) #define IP_SFBLOCK 10 /* allocate this many at once */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a2c7b2b1b3fb..6eee71647b7c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -138,7 +138,7 @@ time_before(jiffies, (in_dev)->mr_v2_seen))) static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); @@ -439,7 +439,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (isquery) psf->sf_gsresp = 0; - if (AVAILABLE(skb) < sizeof(u32) + + if (AVAILABLE(skb) < sizeof(__be32) + first*sizeof(struct igmpv3_grec)) { if (truncate && !first) break; /* truncate these */ @@ -455,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } - psrc = (__be32 *)skb_put(skb, sizeof(u32)); + psrc = (__be32 *)skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || @@ -630,8 +630,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; - u32 group = pmc ? pmc->multiaddr : 0; - u32 dst; + __be32 group = pmc ? pmc->multiaddr : 0; + __be32 dst; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -803,7 +803,7 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) return 1; } -static void igmp_heard_report(struct in_device *in_dev, u32 group) +static void igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; @@ -828,7 +828,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, struct igmphdr *ih = skb->h.igmph; struct igmpv3_query *ih3 = (struct igmpv3_query *)ih; struct ip_mc_list *im; - u32 group = ih->group; + __be32 group = ih->group; int max_delay; int mark = 0; @@ -862,7 +862,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, ih3 = (struct igmpv3_query *) skb->h.raw; if (ih3->nsrcs) { if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) - + ntohs(ih3->nsrcs)*sizeof(__u32))) + + ntohs(ih3->nsrcs)*sizeof(__be32))) return; ih3 = (struct igmpv3_query *) skb->h.raw; } @@ -985,7 +985,7 @@ drop: * Add a filter to a device */ -static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; @@ -1005,7 +1005,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) * Remove a filter from a device */ -static void ip_mc_filter_del(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; @@ -1055,7 +1055,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) { struct ip_mc_list *pmc, *pmc_prev; struct ip_sf_list *psf, *psf_next; @@ -1829,7 +1829,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct { int err; struct ip_mreqn imr; - u32 addr = mreqs->imr_multiaddr; + __be32 addr = mreqs->imr_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); @@ -1883,7 +1883,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct rv = !0; for (i=0; isl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1935,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; isl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1960,7 +1960,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) { int err = 0; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -2044,7 +2044,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, { int err, len, count, copycount; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -2103,7 +2103,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, { int err, i, count, copycount; struct sockaddr_in *psin; - u32 addr; + __be32 addr; struct ip_mc_socklist *pmc; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; -- cgit v1.2.3-71-gd317 From 46a97324a5ebdc1e343a0223d993e79551adab0f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:31:51 -0700 Subject: [IPV4]: TCP headers annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/tcp.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8ebf497907f8..543f06371840 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -21,10 +21,10 @@ #include struct tcphdr { - __u16 source; - __u16 dest; - __u32 seq; - __u32 ack_seq; + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; #if defined(__LITTLE_ENDIAN_BITFIELD) __u16 res1:4, doff:4, @@ -50,9 +50,9 @@ struct tcphdr { #else #error "Adjust your defines" #endif - __u16 window; - __u16 check; - __u16 urg_ptr; + __be16 window; + __be16 check; + __be16 urg_ptr; }; /* @@ -62,7 +62,7 @@ struct tcphdr { */ union tcp_word_hdr { struct tcphdr hdr; - __u32 words[5]; + __be32 words[5]; }; #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) -- cgit v1.2.3-71-gd317 From 269bd27e66037a7932cee6d6aa7ef7defd0bfe38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:32:28 -0700 Subject: [TCP]: struct tcp_sack_block annotations Some of the instances of tcp_sack_block are host-endian, some - net-endian. Define struct tcp_sack_block_wire identical to struct tcp_sack_block with u32 replaced with __be32; annotate uses of tcp_sack_block replacing net-endian ones with tcp_sack_block_wire. Change is obviously safe since for cc(1) __be32 is typedefed to u32. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 +++++ net/ipv4/netfilter/ip_nat_helper.c | 2 +- net/ipv4/tcp_input.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 543f06371840..9632aa866de4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -166,6 +166,11 @@ struct tcp_info #include /* This defines a selective acknowledgement block. */ +struct tcp_sack_block_wire { + __be32 start_seq; + __be32 end_seq; +}; + struct tcp_sack_block { __u32 start_seq; __u32 end_seq; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 7f6a75984f6c..e9c5187ea5b2 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -283,7 +283,7 @@ sack_adjust(struct sk_buff *skb, struct ip_nat_seq *natseq) { while (sackoff < sackend) { - struct tcp_sack_block *sack; + struct tcp_sack_block_wire *sack; u_int32_t new_start_seq, new_end_seq; sack = (void *)skb->data + sackoff; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b3def0df14fb..46984ffd73cd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -935,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; - struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); + struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; -- cgit v1.2.3-71-gd317 From dddc93c05d7dba60b44866486502c155e96ab915 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:32:46 -0700 Subject: [TCP]: struct tcp_sock .pred_flags is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9632aa866de4..0e058a2d1c6d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -216,7 +216,7 @@ struct tcp_sock { * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ - __u32 pred_flags; + __be32 pred_flags; /* * RFC793 variables by their proper names. This means you can -- cgit v1.2.3-71-gd317 From b406313c733156c8eea7d9c1891476f400914367 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:34:02 -0700 Subject: [NET]: struct sock_exterr_skb annotations ->port is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/errqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index 408118a07763..92f8d4fab32b 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -38,7 +38,7 @@ struct sock_exterr_skb } header; struct sock_extended_err ee; u16 addr_offset; - u16 port; + __be16 port; }; #endif -- cgit v1.2.3-71-gd317 From bd6d610a14f2ed896b76dfb61fbdec829e44b8d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:35:47 -0700 Subject: [IPV4]: ARP header annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/if_arp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index a8b1a2071838..7f5714214ee3 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -130,11 +130,11 @@ struct arpreq_old { struct arphdr { - unsigned short ar_hrd; /* format of hardware address */ - unsigned short ar_pro; /* format of protocol address */ + __be16 ar_hrd; /* format of hardware address */ + __be16 ar_pro; /* format of protocol address */ unsigned char ar_hln; /* length of hardware address */ unsigned char ar_pln; /* length of protocol address */ - unsigned short ar_op; /* ARP opcode (command) */ + __be16 ar_op; /* ARP opcode (command) */ #if 0 /* -- cgit v1.2.3-71-gd317 From 4e7e0c7592cafe5453e5b2f115fc0065d11b3d44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:37:19 -0700 Subject: [IPV4]: UDP header annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/udp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 90223f057d50..014b41d1e308 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -20,10 +20,10 @@ #include struct udphdr { - __u16 source; - __u16 dest; - __u16 len; - __u16 check; + __be16 source; + __be16 dest; + __be16 len; + __be16 check; }; /* UDP socket options */ -- cgit v1.2.3-71-gd317 From b1dd39ac963040c2d282ab8026b9c9aa9306ea06 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:38:13 -0700 Subject: [IPV4]: ICMP header annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/icmp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmp.h b/include/linux/icmp.h index f0b571f1060b..878cfe4e587f 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -68,16 +68,16 @@ struct icmphdr { __u8 type; __u8 code; - __u16 checksum; + __be16 checksum; union { struct { - __u16 id; - __u16 sequence; + __be16 id; + __be16 sequence; } echo; - __u32 gateway; + __be32 gateway; struct { - __u16 __unused; - __u16 mtu; + __be16 __unused; + __be16 mtu; } frag; } un; }; -- cgit v1.2.3-71-gd317 From 1b620154273d5cc57690e0d199282c6bb9e56974 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:39:09 -0700 Subject: [IPV4]: PIMv2 header annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/mroute.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index e05d54a90743..c7dd4c11f667 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -213,8 +213,8 @@ struct pimreghdr { __u8 type; __u8 reserved; - __u16 csum; - __u32 flags; + __be16 csum; + __be32 flags; }; extern int pim_rcv_v1(struct sk_buff *); -- cgit v1.2.3-71-gd317 From 114c7844f34c1608aec20ae7ff85cec471ac90ae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:39:29 -0700 Subject: [IPV4]: mroute annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/mroute.h | 10 +++++----- net/ipv4/ipmr.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index c7dd4c11f667..7da2cee8e132 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -142,7 +142,7 @@ struct vif_device unsigned long rate_limit; /* Traffic shaping (NI) */ unsigned char threshold; /* TTL threshold */ unsigned short flags; /* Control flags */ - __u32 local,remote; /* Addresses(remote for tunnels)*/ + __be32 local,remote; /* Addresses(remote for tunnels)*/ int link; /* Physical interface index */ }; @@ -151,8 +151,8 @@ struct vif_device struct mfc_cache { struct mfc_cache *next; /* Next entry on cache line */ - __u32 mfc_mcastgrp; /* Group the entry belongs to */ - __u32 mfc_origin; /* Source of packet */ + __be32 mfc_mcastgrp; /* Group the entry belongs to */ + __be32 mfc_origin; /* Source of packet */ vifi_t mfc_parent; /* Source interface */ int mfc_flags; /* Flags on line */ @@ -179,9 +179,9 @@ struct mfc_cache #define MFC_LINES 64 #ifdef __BIG_ENDIAN -#define MFC_HASH(a,b) ((((a)>>24)^((b)>>26))&(MFC_LINES-1)) +#define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else -#define MFC_HASH(a,b) (((a)^((b)>>2))&(MFC_LINES-1)) +#define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1)) #endif #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ba49588da242..97cfa97c8abb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -462,7 +462,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return 0; } -static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) +static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) { int line=MFC_HASH(mcastgrp,origin); struct mfc_cache *c; @@ -1097,7 +1097,7 @@ static struct notifier_block ip_mr_notifier={ * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) +static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); -- cgit v1.2.3-71-gd317 From 4f765d842fa6e6fe15d555b247b640118d65b4dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:43:07 -0700 Subject: [IPV4]: INET_MATCH() annotations INET_MATCH() and friends depend on an interesting set of kludges: * there's a pair of adjacent fields in struct inet_sock - __be16 dport followed by __u16 num. We want to search by pair, so we combine the keys into a single 32bit value and compare with 32bit value read from &...->dport. * on 64bit targets we combine comparisons with pair of adjacent __be32 fields in the same way. Make sure that we don't mix those values with anything else and that pairs we form them from have correct types. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/inet_hashtables.h | 36 +++++++++++++++++++++++++----------- net/ipv4/inet_hashtables.c | 2 +- net/ipv6/inet6_hashtables.c | 8 ++++---- 4 files changed, 31 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index caca57df0d7d..6dc07ee7702e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -461,7 +461,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b4491c9e2a5a..fb0c09c7090c 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -283,31 +283,45 @@ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, } /* Socket demux engine toys. */ +/* What happens here is ugly; there's a pair of adjacent fields in + struct inet_sock; __be16 dport followed by __u16 num. We want to + search by pair, so we combine the keys into a single 32bit value + and compare with 32bit value read from &...->dport. Let's at least + make sure that it's not mixed with anything else... + On 64bit targets we combine comparisons with pair of adjacent __be32 + fields in the same way. +*/ +typedef __u32 __bitwise __portpair; #ifdef __BIG_ENDIAN #define INET_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport) << 16) | (__u32)(__dport)) + ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport))) #else /* __LITTLE_ENDIAN */ #define INET_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport) << 16) | (__u32)(__sport)) + ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif #if (BITS_PER_LONG == 64) +typedef __u64 __bitwise __addrpair; #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); + const __addrpair __name = (__force __addrpair) ( \ + (((__force __u64)(__be32)(__saddr)) << 32) | \ + ((__force __u64)(__be32)(__daddr))); #else /* __LITTLE_ENDIAN */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); + const __addrpair __name = (__force __addrpair) ( \ + (((__force __u64)(__be32)(__daddr)) << 32) | \ + ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ - ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ - ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) @@ -315,13 +329,13 @@ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_hash == (__hash)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #endif /* 64-bit arch */ @@ -338,7 +352,7 @@ static inline struct sock * const int dif) { INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_node *node; /* Optimize here for direct hit, only listening connections can diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb296c9a7f3f..729d1eb39483 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -201,7 +201,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, u32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct sock *sk2; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d2f3fc990bfa..8accd1fbeeda 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -64,7 +64,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, { struct sock *sk; const struct hlist_node *node; - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ @@ -82,7 +82,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { const struct inet_timewait_sock *tw = inet_twsk(sk); - if(*((__u32 *)&(tw->tw_dport)) == ports && + if(*((__portpair *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); @@ -171,7 +171,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &np->rcv_saddr; const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -188,7 +188,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = inet_twsk(sk2); - if(*((__u32 *)&(tw->tw_dport)) == ports && + if(*((__portpair *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && -- cgit v1.2.3-71-gd317 From 9f8552996d969f56039ec88128cf5ad35b12f141 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:44:30 -0700 Subject: [IPV4]: inet_diag annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 10 +++++----- net/ipv4/inet_diag.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index a4606e5810e5..6e8bc548635a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -9,10 +9,10 @@ /* Socket identity */ struct inet_diag_sockid { - __u16 idiag_sport; - __u16 idiag_dport; - __u32 idiag_src[4]; - __u32 idiag_dst[4]; + __be16 idiag_sport; + __be16 idiag_dport; + __be32 idiag_src[4]; + __be32 idiag_dst[4]; __u32 idiag_if; __u32 idiag_cookie[2]; #define INET_DIAG_NOCOOKIE (~0U) @@ -67,7 +67,7 @@ struct inet_diag_hostcond { __u8 family; __u8 prefix_len; int port; - __u32 addr[0]; + __be32 addr[0]; }; /* Base info structure. It contains socket identity (addrs/ports/cookie) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 492858e6faf0..77761ac4f7bb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -36,8 +36,8 @@ static const struct inet_diag_handler **inet_diag_table; struct inet_diag_entry { - u32 *saddr; - u32 *daddr; + __be32 *saddr; + __be32 *daddr; u16 sport; u16 dport; u16 family; @@ -294,7 +294,7 @@ out: return err; } -static int bitstring_match(const u32 *a1, const u32 *a2, int bits) +static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) { int words = bits >> 5; @@ -305,8 +305,8 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) return 0; } if (bits) { - __u32 w1, w2; - __u32 mask; + __be32 w1, w2; + __be32 mask; w1 = a1[words]; w2 = a2[words]; @@ -352,7 +352,7 @@ static int inet_diag_bc_run(const void *bc, int len, case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { struct inet_diag_hostcond *cond; - u32 *addr; + __be32 *addr; cond = (struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && -- cgit v1.2.3-71-gd317 From 48818f822d2b2e16f4bf4d1ed1185e7d2146dc34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:44:54 -0700 Subject: [IPV6]: struct in6_addr annotations in6_addr elements are net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/in6.h | 4 ++-- include/net/ipv6.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index d776829b443f..348ecd4a36fe 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -32,8 +32,8 @@ struct in6_addr union { __u8 u6_addr8[16]; - __u16 u6_addr16[8]; - __u32 u6_addr32[4]; + __be16 u6_addr16[8]; + __be32 u6_addr32[4]; } in6_u; #define s6_addr in6_u.u6_addr8 #define s6_addr16 in6_u.u6_addr16 diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 72bf47b2a4e0..8223c4410b4b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -318,8 +318,8 @@ static inline void ipv6_addr_prefix(struct in6_addr *pfx, #ifndef __HAVE_ARCH_ADDR_SET static inline void ipv6_addr_set(struct in6_addr *addr, - __u32 w1, __u32 w2, - __u32 w3, __u32 w4) + __be32 w1, __be32 w2, + __be32 w3, __be32 w4) { addr->s6_addr32[0] = w1; addr->s6_addr32[1] = w2; @@ -337,7 +337,7 @@ static inline int ipv6_addr_equal(const struct in6_addr *a1, a1->s6_addr32[3] == a2->s6_addr32[3]); } -static inline int __ipv6_prefix_equal(const u32 *a1, const u32 *a2, +static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, unsigned int prefixlen) { unsigned pdw, pbi; -- cgit v1.2.3-71-gd317 From 43505077df075545e9b28b7c6ea12d82b3caf036 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:45:11 -0700 Subject: [IPV6]: IPv6 headers annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/ipv6.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6dc07ee7702e..4f435c59de06 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -99,22 +99,22 @@ struct ipv6_destopt_hao { struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ - __u16 reserved; - __u32 spi; - __u32 seq_no; /* Sequence number */ + __be16 reserved; + __be32 spi; + __be32 seq_no; /* Sequence number */ __u8 auth_data[0]; /* Length variable but >=4. Mind the 64 bit alignment! */ }; struct ipv6_esp_hdr { - __u32 spi; - __u32 seq_no; /* Sequence number */ + __be32 spi; + __be32 seq_no; /* Sequence number */ __u8 enc_data[0]; /* Length variable but >=8. Mind the 64 bit alignment! */ }; struct ipv6_comp_hdr { __u8 nexthdr; __u8 flags; - __u16 cpi; + __be16 cpi; }; /* @@ -136,7 +136,7 @@ struct ipv6hdr { #endif __u8 flow_lbl[3]; - __u16 payload_len; + __be16 payload_len; __u8 nexthdr; __u8 hop_limit; -- cgit v1.2.3-71-gd317 From e2e38e819bd03e9674c102aaa2c9dc8151a3c3d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:45:27 -0700 Subject: [IPV6]: sin6_port is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/in6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 348ecd4a36fe..9be6a4756f0b 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -53,7 +53,7 @@ extern const struct in6_addr in6addr_loopback; struct sockaddr_in6 { unsigned short int sin6_family; /* AF_INET6 */ - __u16 sin6_port; /* Transport layer port # */ + __be16 sin6_port; /* Transport layer port # */ __u32 sin6_flowinfo; /* IPv6 flow information */ struct in6_addr sin6_addr; /* IPv6 address */ __u32 sin6_scope_id; /* scope id (new in RFC2553) */ -- cgit v1.2.3-71-gd317 From 8f83f23e6db8b9a9fe787d02f73489224668c4e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:46:11 -0700 Subject: [XFRM]: ports in struct xfrm_selector annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/xfrm.h | 8 ++++---- net/ipv4/xfrm4_state.c | 4 ++-- net/ipv6/xfrm6_state.c | 4 ++-- net/key/af_key.c | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 14ecd19f4cdc..3aae9b9ce79b 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -49,10 +49,10 @@ struct xfrm_selector { xfrm_address_t daddr; xfrm_address_t saddr; - __u16 dport; - __u16 dport_mask; - __u16 sport; - __u16 sport_mask; + __be16 dport; + __be16 dport_mask; + __be16 sport; + __be16 sport_mask; __u16 family; __u8 prefixlen_d; __u8 prefixlen_s; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index fe2034494d08..3cc3df0c6ece 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -29,9 +29,9 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.daddr.a4 = fl->fl4_dst; x->sel.saddr.a4 = fl->fl4_src; x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = ~0; + x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = ~0; + x->sel.sport_mask = htons(0xffff); x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 711bfafb2472..9ddaa9d41539 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -29,9 +29,9 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = ~0; + x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = ~0; + x->sel.sport_mask = htons(0xffff); x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; diff --git a/net/key/af_key.c b/net/key/af_key.c index 83b443ddc72f..ff98e70b0931 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2140,7 +2140,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.sport) - xp->selector.sport_mask = ~0; + xp->selector.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); @@ -2153,7 +2153,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.dport) - xp->selector.dport_mask = ~0; + xp->selector.dport_mask = htons(0xffff); sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; if (sec_ctx != NULL) { @@ -2243,7 +2243,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.sport) - sel.sport_mask = ~0; + sel.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); @@ -2251,7 +2251,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.dport) - sel.dport_mask = ~0; + sel.dport_mask = htons(0xffff); sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; memset(&tmp, 0, sizeof(struct xfrm_policy)); -- cgit v1.2.3-71-gd317 From 737b5761dfc609b5be4163deb2cf09226f56bcbc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:46:48 -0700 Subject: [XFRM]: xfrm_address_t annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 3aae9b9ce79b..4b321842d65f 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -12,8 +12,8 @@ */ typedef union { - __u32 a4; - __u32 a6[4]; + __be32 a4; + __be32 a6[4]; } xfrm_address_t; /* Ident of a specific xfrm_state. It is used on input to lookup -- cgit v1.2.3-71-gd317 From e037c39bf965ca66fde902e191d849a90de278fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:47:40 -0700 Subject: [XFRM]: struct xfrm_id annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4b321842d65f..c894267ff5dd 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -23,7 +23,7 @@ typedef union struct xfrm_id { xfrm_address_t daddr; - __u32 spi; + __be32 spi; __u8 proto; }; -- cgit v1.2.3-71-gd317 From 9916ecb0a6f52f1475fa2f71845227d3c75fb40c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:48:48 -0700 Subject: [XFRM]: struct xfrm_usersa_id annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index c894267ff5dd..430afd058269 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -281,7 +281,7 @@ struct xfrm_usersa_info { struct xfrm_usersa_id { xfrm_address_t daddr; - __u32 spi; + __be32 spi; __u16 family; __u8 proto; }; -- cgit v1.2.3-71-gd317 From cbde1668e4f08e0a150207646010bc65e1e2a42b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 27 Sep 2006 22:40:19 -0700 Subject: [NET]: Move netlink interface bits to linux/if_link.h. Moving netlink interface bits to linux/if.h is rather troublesome for applications including both linux/if.h (which was changed to be included from linux/rtnetlink.h automatically) and net/if.h. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if.h | 130 -------------------------------------------- include/linux/if_link.h | 136 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 2 +- 3 files changed, 137 insertions(+), 131 deletions(-) create mode 100644 include/linux/if_link.h (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 8018c2e22c0c..32bf419351f1 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -214,134 +214,4 @@ struct ifconf #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ -/* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 collisions; - - /* detailed rx_errors: */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; -}; - -/* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap -{ - __u64 mem_start; - __u64 mem_end; - __u64 base_addr; - __u16 irq; - __u8 dma; - __u8 port; -}; - -enum -{ - IFLA_UNSPEC, - IFLA_ADDRESS, - IFLA_BROADCAST, - IFLA_IFNAME, - IFLA_MTU, - IFLA_LINK, - IFLA_QDISC, - IFLA_STATS, - IFLA_COST, -#define IFLA_COST IFLA_COST - IFLA_PRIORITY, -#define IFLA_PRIORITY IFLA_PRIORITY - IFLA_MASTER, -#define IFLA_MASTER IFLA_MASTER - IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ -#define IFLA_WIRELESS IFLA_WIRELESS - IFLA_PROTINFO, /* Protocol specific information for a link */ -#define IFLA_PROTINFO IFLA_PROTINFO - IFLA_TXQLEN, -#define IFLA_TXQLEN IFLA_TXQLEN - IFLA_MAP, -#define IFLA_MAP IFLA_MAP - IFLA_WEIGHT, -#define IFLA_WEIGHT IFLA_WEIGHT - IFLA_OPERSTATE, - IFLA_LINKMODE, - __IFLA_MAX -}; - - -#define IFLA_MAX (__IFLA_MAX - 1) - -/* ifi_flags. - - IFF_* flags. - - The only change is: - IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are - more not changeable by user. They describe link media - characteristics and set by device driver. - - Comments: - - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid - - If neither of these three flags are set; - the interface is NBMA. - - - IFF_MULTICAST does not mean anything special: - multicasts can be used on all not-NBMA links. - IFF_MULTICAST means that this media uses special encapsulation - for multicast frames. Apparently, all IFF_POINTOPOINT and - IFF_BROADCAST devices are able to use multicasts too. - */ - -/* IFLA_LINK. - For usual devices it is equal ifi_index. - If it is a "virtual interface" (f.e. tunnel), ifi_link - can point to real physical interface (f.e. for bandwidth calculations), - or maybe 0, what means, that real media is unknown (usual - for IPIP tunnels, when route to endpoint is allowed to change) - */ - -/* Subtype attributes for IFLA_PROTINFO */ -enum -{ - IFLA_INET6_UNSPEC, - IFLA_INET6_FLAGS, /* link flags */ - IFLA_INET6_CONF, /* sysctl parameters */ - IFLA_INET6_STATS, /* statistics */ - IFLA_INET6_MCAST, /* MC things. What of them? */ - IFLA_INET6_CACHEINFO, /* time values and max reasm size */ - __IFLA_INET6_MAX -}; - -#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) - -struct ifla_cacheinfo -{ - __u32 max_reasm_len; - __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ - __u32 reachable_time; - __u32 retrans_time; -}; - #endif /* _LINUX_IF_H */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h new file mode 100644 index 000000000000..e963a077e6f5 --- /dev/null +++ b/include/linux/if_link.h @@ -0,0 +1,136 @@ +#ifndef _LINUX_IF_LINK_H +#define _LINUX_IF_LINK_H + +#include + +/* The struct should be in sync with struct net_device_stats */ +struct rtnl_link_stats +{ + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap +{ + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +enum +{ + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum +{ + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +struct ifla_cacheinfo +{ + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; + +#endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9c92dc8b9a08..3a18addaed4c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -2,7 +2,7 @@ #define __LINUX_RTNETLINK_H #include -#include +#include /**** * Routing/neighbour discovery messages. -- cgit v1.2.3-71-gd317 From b854d0d218688b30ccea70521d6ea5f3f56d4e12 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 27 Sep 2006 22:43:05 -0700 Subject: [NET] KBUILD: Add missing entries for new net headers. Add the following for userspace export by the 'headers_include' make target: linux/fib_rules.h, linux/if_addr.h, linux/if_link.h, linux/neighbour.h. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/Kbuild | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 1df2ac30a4d2..f7a52e19b4be 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -58,6 +58,7 @@ header-y += elf-em.h header-y += fadvise.h header-y += fd.h header-y += fdreg.h +header-y += fib_rules.h header-y += ftape-header-segment.h header-y += ftape-vendors.h header-y += fuse.h @@ -70,6 +71,7 @@ header-y += hysdn_if.h header-y += i2c-dev.h header-y += i8k.h header-y += icmp.h +header-y += if_addr.h header-y += if_arcnet.h header-y += if_arp.h header-y += if_bonding.h @@ -79,6 +81,7 @@ header-y += if_fddi.h header-y += if.h header-y += if_hippi.h header-y += if_infiniband.h +header-y += if_link.h header-y += if_packet.h header-y += if_plip.h header-y += if_ppp.h @@ -110,6 +113,7 @@ header-y += mmtimer.h header-y += mqueue.h header-y += mtio.h header-y += ncp_no.h +header-y += neighbour.h header-y += netfilter_arp.h header-y += netrom.h header-y += nfs2.h -- cgit v1.2.3-71-gd317 From d77072ecfb6d28287d5e2a61d60d87a3a444ac97 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:20:34 -0700 Subject: [NET]: Annotate dst_ops protocol Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/net/dst.h | 2 +- net/core/neighbour.c | 2 +- net/ethernet/eth.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 13d6d4eb8b3a..9264139bd8df 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -187,7 +187,7 @@ struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ - unsigned short hh_type; /* protocol identifier, f.e ETH_P_IP + __be16 hh_type; /* protocol identifier, f.e ETH_P_IP * NOTE: For VLANs, this will be the * encapuslated type. --BLG */ diff --git a/include/net/dst.h b/include/net/dst.h index a8d825f90305..e156e38e4ac3 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -84,7 +84,7 @@ struct dst_entry struct dst_ops { unsigned short family; - unsigned short protocol; + __be16 protocol; unsigned gc_thresh; int (*gc)(void); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b6c69e1463e8..8ce8c471d868 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1079,7 +1079,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, } static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - u16 protocol) + __be16 protocol) { struct hh_cache *hh; struct net_device *dev = dst->dev; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 43863933f27f..4bd78c8cfb26 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -223,7 +223,7 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) */ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) { - unsigned short type = hh->hh_type; + __be16 type = hh->hh_type; struct ethhdr *eth; struct net_device *dev = neigh->dev; -- cgit v1.2.3-71-gd317 From 59b8bfd8fd608821e5addc9f4682c7f2424afd8c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:21:07 -0700 Subject: [NETFILTER]: netfilter misc annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netfilter_arp/arp_tables.h | 6 +++--- include/linux/netfilter_ipv4/ip_queue.h | 2 +- net/ipv4/netfilter.c | 4 ++-- net/ipv4/netfilter/arp_tables.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 149e87c9ab13..44e39b61d9e7 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -46,11 +46,11 @@ struct arpt_arp { struct arpt_devaddr_info tgt_devaddr; /* ARP operation code. */ - u_int16_t arpop, arpop_mask; + __be16 arpop, arpop_mask; /* ARP hardware address and protocol address format. */ - u_int16_t arhrd, arhrd_mask; - u_int16_t arpro, arpro_mask; + __be16 arhrd, arhrd_mask; + __be16 arpro, arpro_mask; /* The protocol address length is only accepted if it is 4 * so there is no use in offering a way to do filtering on it. diff --git a/include/linux/netfilter_ipv4/ip_queue.h b/include/linux/netfilter_ipv4/ip_queue.h index aa08d68c4841..a03507f465f8 100644 --- a/include/linux/netfilter_ipv4/ip_queue.h +++ b/include/linux/netfilter_ipv4/ip_queue.h @@ -26,7 +26,7 @@ typedef struct ipq_packet_msg { unsigned int hook; /* Netfilter hook we rode in on */ char indev_name[IFNAMSIZ]; /* Name of incoming interface */ char outdev_name[IFNAMSIZ]; /* Name of outgoing interface */ - unsigned short hw_protocol; /* Hardware protocol (network order) */ + __be16 hw_protocol; /* Hardware protocol (network order) */ unsigned short hw_type; /* Hardware type */ unsigned char hw_addrlen; /* Hardware address length */ unsigned char hw_addr[8]; /* Hardware address */ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f88347de21a9..5ac15379a0cf 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -128,8 +128,8 @@ EXPORT_SYMBOL(ip_nat_decode_session); */ struct ip_rt_info { - u_int32_t daddr; - u_int32_t saddr; + __be32 daddr; + __be32 saddr; u_int8_t tos; }; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85f0d73ebfb4..17e1a687ab45 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -80,7 +80,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, { char *arpptr = (char *)(arphdr + 1); char *src_devaddr, *tgt_devaddr; - u32 src_ipaddr, tgt_ipaddr; + __be32 src_ipaddr, tgt_ipaddr; int i, ret; #define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg)) -- cgit v1.2.3-71-gd317 From cdcb71bf964e02e0a22007f5d90ead7bede3b85b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:21:37 -0700 Subject: [NETFILTER]: conntrack annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 2 +- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 16 ++--- net/ipv4/netfilter/ip_conntrack_amanda.c | 6 +- net/ipv4/netfilter/ip_conntrack_core.c | 12 ++-- net/ipv4/netfilter/ip_conntrack_ftp.c | 6 +- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 4 +- net/ipv4/netfilter/ip_conntrack_irc.c | 5 +- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 10 +-- net/ipv4/netfilter/ip_conntrack_netlink.c | 82 +++++++++++------------ net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 4 +- net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 6 +- net/ipv4/netfilter/ip_conntrack_sip.c | 16 ++--- net/ipv4/netfilter/ip_conntrack_tftp.c | 8 +-- 14 files changed, 90 insertions(+), 89 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 51dbec1892c8..64e868034c4a 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -157,7 +157,7 @@ struct ip_conntrack_expect unsigned int flags; #ifdef CONFIG_IP_NF_NAT_NEEDED - u_int32_t saved_ip; + __be32 saved_ip; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ union ip_conntrack_manip_proto saved_proto; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 2fdabdb4c0ef..c228bde74c33 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -23,13 +23,13 @@ union ip_conntrack_manip_proto __be16 port; } tcp; struct { - u_int16_t port; + __be16 port; } udp; struct { - u_int16_t id; + __be16 id; } icmp; struct { - u_int16_t port; + __be16 port; } sctp; struct { __be16 key; /* key is 32bit, pptp only uses 16 */ @@ -39,7 +39,7 @@ union ip_conntrack_manip_proto /* The manipulable part of the tuple. */ struct ip_conntrack_manip { - u_int32_t ip; + __be32 ip; union ip_conntrack_manip_proto u; }; @@ -50,22 +50,22 @@ struct ip_conntrack_tuple /* These are the parts of the tuple which are fixed. */ struct { - u_int32_t ip; + __be32 ip; union { /* Add other protocols here. */ u_int16_t all; struct { - u_int16_t port; + __be16 port; } tcp; struct { - u_int16_t port; + __be16 port; } udp; struct { u_int8_t type, code; } icmp; struct { - u_int16_t port; + __be16 port; } sctp; struct { __be16 key; /* key is 32bit, diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 0a7bd7f04061..6c7383a8e42b 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -155,11 +155,11 @@ static int help(struct sk_buff **pskb, exp->tuple.dst.protonum = IPPROTO_TCP; exp->tuple.dst.u.tcp.port = htons(port); - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); exp->mask.dst.protonum = 0xFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.u.tcp.port = htons(0xFFFF); if (ip_nat_amanda_hook) ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index c432b3163609..143c4668538b 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -149,8 +149,8 @@ static unsigned int ip_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, unsigned int size, unsigned int rnd) { - return (jhash_3words(tuple->src.ip, - (tuple->dst.ip ^ tuple->dst.protonum), + return (jhash_3words((__force u32)tuple->src.ip, + ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), rnd) % size); } @@ -1169,9 +1169,9 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16), &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16), &tuple->dst.u.tcp.port); return 0; @@ -1186,9 +1186,9 @@ int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], return -EINVAL; t->src.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); t->dst.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0; } diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 1d18c863f064..93dcf960662f 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -425,8 +425,8 @@ static int help(struct sk_buff **pskb, exp->tuple.src.u.tcp.port = 0; /* Don't care. */ exp->tuple.dst.protonum = IPPROTO_TCP; exp->mask = ((struct ip_conntrack_tuple) - { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); + { { htonl(0xFFFFFFFF), { 0 } }, + { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; @@ -488,7 +488,7 @@ static int __init ip_conntrack_ftp_init(void) for (i = 0; i < ports_c; i++) { ftp[i].tuple.src.u.tcp.port = htons(ports[i]); ftp[i].tuple.dst.protonum = IPPROTO_TCP; - ftp[i].mask.src.u.tcp.port = 0xFFFF; + ftp[i].mask.src.u.tcp.port = htons(0xFFFF); ftp[i].mask.dst.protonum = 0xFF; ftp[i].max_expected = 1; ftp[i].timeout = 5 * 60; /* 5 minutes */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index fb0aee691721..a2af5e0c7f99 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -242,10 +242,10 @@ exp_gre(struct ip_conntrack *ct, exp_orig->tuple.dst.u.gre.key = callid; exp_orig->tuple.dst.protonum = IPPROTO_GRE; - exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.ip = htonl(0xffffffff); exp_orig->mask.src.u.all = 0; exp_orig->mask.dst.u.gre.key = htons(0xffff); - exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.ip = htonl(0xffffffff); exp_orig->mask.dst.protonum = 0xff; exp_orig->master = ct; diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 44889075f3b2..75f7c3db1619 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -218,7 +218,8 @@ static int help(struct sk_buff **pskb, IPPROTO_TCP }}); exp->mask = ((struct ip_conntrack_tuple) { { 0, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); + { htonl(0xFFFFFFFF), + { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; if (ip_nat_irc_hook) @@ -266,7 +267,7 @@ static int __init ip_conntrack_irc_init(void) hlpr = &irc_helpers[i]; hlpr->tuple.src.u.tcp.port = htons(ports[i]); hlpr->tuple.dst.protonum = IPPROTO_TCP; - hlpr->mask.src.u.tcp.port = 0xFFFF; + hlpr->mask.src.u.tcp.port = htons(0xFFFF); hlpr->mask.dst.protonum = 0xFF; hlpr->max_expected = max_dcc_channels; hlpr->timeout = dcc_timeout; diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 4adec47aae32..a1d6a89f64aa 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -78,12 +78,12 @@ static int help(struct sk_buff **pskb, goto out; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->tuple.src.u.udp.port = ntohs(NMBD_PORT); + exp->tuple.src.u.udp.port = htons(NMBD_PORT); exp->mask.src.ip = mask; - exp->mask.src.u.udp.port = 0xFFFF; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.src.u.udp.port = htons(0xFFFF); + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; @@ -115,7 +115,7 @@ static struct ip_conntrack_helper helper = { .src = { .u = { .udp = { - .port = 0xFFFF, + .port = __constant_htons(0xFFFF), } } }, diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 52eddea27e93..53b6dffea6c2 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -78,8 +78,8 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb, { struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip); NFA_NEST_END(skb, nest_parms); @@ -110,7 +110,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb, static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t status = htonl((u_int32_t) ct->status); + __be32 status = htonl((u_int32_t) ct->status); NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); return 0; @@ -122,7 +122,7 @@ static inline int ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) { long timeout_l = ct->timeout.expires - jiffies; - u_int32_t timeout; + __be32 timeout; if (timeout_l < 0) timeout = 0; @@ -192,13 +192,13 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nfattr *nest_count = NFA_NEST(skb, type); - u_int32_t tmp; + __be32 tmp; tmp = htonl(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp); tmp = htonl(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp); NFA_NEST_END(skb, nest_count); @@ -215,9 +215,9 @@ nfattr_failure: static inline int ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t mark = htonl(ct->mark); + __be32 mark = htonl(ct->mark); - NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark); return 0; nfattr_failure: @@ -230,8 +230,8 @@ nfattr_failure: static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t id = htonl(ct->id); - NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + __be32 id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(__be32), &id); return 0; nfattr_failure: @@ -241,9 +241,9 @@ nfattr_failure: static inline int ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t use = htonl(atomic_read(&ct->ct_general.use)); + __be32 use = htonl(atomic_read(&ct->ct_general.use)); - NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + NFA_PUT(skb, CTA_USE, sizeof(__be32), &use); return 0; nfattr_failure: @@ -457,8 +457,8 @@ out: } static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), + [CTA_IP_V4_SRC-1] = sizeof(__be32), + [CTA_IP_V4_DST-1] = sizeof(__be32), }; static inline int @@ -475,11 +475,11 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) if (!tb[CTA_IP_V4_SRC-1]) return -EINVAL; - tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); if (!tb[CTA_IP_V4_DST-1]) return -EINVAL; - tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); DEBUGP("leaving\n"); @@ -602,8 +602,8 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, } static const size_t cta_min_nat[CTA_NAT_MAX] = { - [CTA_NAT_MINIP-1] = sizeof(u_int32_t), - [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), + [CTA_NAT_MINIP-1] = sizeof(__be32), + [CTA_NAT_MAXIP-1] = sizeof(__be32), }; static inline int @@ -623,12 +623,12 @@ ctnetlink_parse_nat(struct nfattr *nat, return -EINVAL; if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); if (!tb[CTA_NAT_MAXIP-1]) range->max_ip = range->min_ip; else - range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); if (range->min_ip) range->flags |= IP_NAT_RANGE_MAP_IPS; @@ -663,11 +663,11 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) } static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) + [CTA_STATUS-1] = sizeof(__be32), + [CTA_TIMEOUT-1] = sizeof(__be32), + [CTA_MARK-1] = sizeof(__be32), + [CTA_USE-1] = sizeof(__be32), + [CTA_ID-1] = sizeof(__be32) }; static int @@ -706,7 +706,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = tuplehash_to_ctrack(h); if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); if (ct->id != id) { ip_conntrack_put(ct); return -ENOENT; @@ -808,7 +808,7 @@ static inline int ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) { unsigned long d; - unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); + unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -903,7 +903,7 @@ ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) static inline int ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) { - u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); if (!del_timer(&ct->timeout)) return -ETIME; @@ -966,7 +966,7 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) #if defined(CONFIG_IP_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif DEBUGP("all done\n"); @@ -989,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (!cda[CTA_TIMEOUT-1]) goto err; - ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; @@ -1006,7 +1006,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], #if defined(CONFIG_IP_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif ct->helper = ip_conntrack_helper_find_get(rtuple); @@ -1138,8 +1138,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct ip_conntrack_expect *exp) { struct ip_conntrack *master = exp->master; - u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); - u_int32_t id = htonl(exp->id); + __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); + __be32 id = htonl(exp->id); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nfattr_failure; @@ -1150,8 +1150,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, CTA_EXPECT_MASTER) < 0) goto nfattr_failure; - NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); - NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id); return 0; @@ -1272,8 +1272,8 @@ out: } static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) + [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32), + [CTA_EXPECT_ID-1] = sizeof(__be32) }; static int @@ -1321,7 +1321,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { ip_conntrack_expect_put(exp); return -ENOENT; @@ -1375,8 +1375,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = - *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = + *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { ip_conntrack_expect_put(exp); return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 09c40ebe3345..295b6fa340db 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -261,7 +261,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, static int icmp_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16), &t->src.u.icmp.id); NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); @@ -287,7 +287,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], tuple->dst.u.icmp.code = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); if (tuple->dst.u.icmp.type >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type]) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index b908a4842e18..2443322e4128 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -210,7 +210,7 @@ static int sctp_print_conntrack(struct seq_file *s, for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \ offset < skb->len && \ (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ - offset += (htons(sch->length) + 3) & ~3, count++) + offset += (ntohs(sch->length) + 3) & ~3, count++) /* Some validity checks to make sure the chunks are fine */ static int do_basic_checks(struct ip_conntrack *conntrack, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 03ae9a04cb37..06e4e8a6dd9f 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -519,8 +519,8 @@ static void tcp_sack(const struct sk_buff *skb, /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED*4 - && *(__u32 *)ptr == - __constant_ntohl((TCPOPT_NOP << 24) + && *(__be32 *)ptr == + __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) @@ -551,7 +551,7 @@ static void tcp_sack(const struct sk_buff *skb, for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { - tmp = ntohl(*((u_int32_t *)(ptr+i)+1)); + tmp = ntohl(*((__be32 *)(ptr+i)+1)); if (after(tmp, *sack)) *sack = tmp; diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index 2893e9c74850..f4f75995a9e4 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -193,7 +193,7 @@ static int skp_digits_len(const char *dptr, const char *limit, int *shift) /* Simple ipaddr parser.. */ static int parse_ipaddr(const char *cp, const char **endp, - u_int32_t *ipaddr, const char *limit) + __be32 *ipaddr, const char *limit) { unsigned long int val; int i, digit = 0; @@ -227,7 +227,7 @@ static int parse_ipaddr(const char *cp, const char **endp, static int epaddr_len(const char *dptr, const char *limit, int *shift) { const char *aux = dptr; - u_int32_t ip; + __be32 ip; if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) { DEBUGP("ip: %s parse failed.!\n", dptr); @@ -302,7 +302,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen, static int set_expected_rtp(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, - u_int32_t ipaddr, u_int16_t port, + __be32 ipaddr, u_int16_t port, const char *dptr) { struct ip_conntrack_expect *exp; @@ -319,10 +319,10 @@ static int set_expected_rtp(struct sk_buff **pskb, exp->tuple.dst.u.udp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_UDP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.udp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; @@ -349,7 +349,7 @@ static int sip_help(struct sk_buff **pskb, const char *dptr; int ret = NF_ACCEPT; int matchoff, matchlen; - u_int32_t ipaddr; + __be32 ipaddr; u_int16_t port; /* No Data ? */ @@ -439,7 +439,7 @@ static int __init init(void) sip[i].tuple.dst.protonum = IPPROTO_UDP; sip[i].tuple.src.u.udp.port = htons(ports[i]); - sip[i].mask.src.u.udp.port = 0xFFFF; + sip[i].mask.src.u.udp.port = htons(0xFFFF); sip[i].mask.dst.protonum = 0xFF; sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index 7e33d3bed5e3..fe0b634dd377 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -70,10 +70,10 @@ static int tftp_help(struct sk_buff **pskb, return NF_DROP; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->mask.src.ip = 0xffffffff; + exp->mask.src.ip = htonl(0xffffffff); exp->mask.src.u.udp.port = 0; - exp->mask.dst.ip = 0xffffffff; - exp->mask.dst.u.udp.port = 0xffff; + exp->mask.dst.ip = htonl(0xffffffff); + exp->mask.dst.u.udp.port = htons(0xffff); exp->mask.dst.protonum = 0xff; exp->expectfn = NULL; exp->flags = 0; @@ -129,7 +129,7 @@ static int __init ip_conntrack_tftp_init(void) tftp[i].tuple.dst.protonum = IPPROTO_UDP; tftp[i].tuple.src.u.udp.port = htons(ports[i]); tftp[i].mask.dst.protonum = 0xFF; - tftp[i].mask.src.u.udp.port = 0xFFFF; + tftp[i].mask.src.u.udp.port = htons(0xFFFF); tftp[i].max_expected = 1; tftp[i].timeout = 5 * 60; /* 5 minutes */ tftp[i].me = THIS_MODULE; -- cgit v1.2.3-71-gd317 From a76b11dd25957287af12ce6855be6d7fd415b3a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:22:02 -0700 Subject: [NETFILTER]: NAT annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat.h | 2 +- net/ipv4/netfilter/ip_nat_core.c | 14 ++++++------- net/ipv4/netfilter/ip_nat_ftp.c | 10 ++++----- net/ipv4/netfilter/ip_nat_helper.c | 37 +++++++++++++++------------------ net/ipv4/netfilter/ip_nat_helper_pptp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_icmp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_tcp.c | 10 ++++----- net/ipv4/netfilter/ip_nat_proto_udp.c | 10 ++++----- net/ipv4/netfilter/ip_nat_rule.c | 6 +++--- net/ipv4/netfilter/ip_nat_sip.c | 8 +++---- net/ipv4/netfilter/ip_nat_snmp_basic.c | 2 +- net/ipv4/netfilter/ip_nat_standalone.c | 2 +- 12 files changed, 51 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index 98f8407e4cb5..bdf553620ca1 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -33,7 +33,7 @@ struct ip_nat_range unsigned int flags; /* Inclusive: network order. */ - u_int32_t min_ip, max_ip; + __be32 min_ip, max_ip; /* Inclusive: network order */ union ip_conntrack_manip_proto min, max; diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 71f3e09cbc84..4b6260a97408 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -82,7 +82,7 @@ static inline unsigned int hash_by_src(const struct ip_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words(tuple->src.ip, tuple->src.u.all, + return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all, tuple->dst.protonum, 0) % ip_nat_htable_size; } @@ -190,7 +190,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - u_int32_t *var_ipp; + __be32 *var_ipp; /* Host order */ u_int32_t minip, maxip, j; @@ -217,7 +217,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * like this), even across reboots. */ minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); - j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0); + j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0); *var_ipp = htonl(minip + j % (maxip - minip + 1)); } @@ -534,9 +534,9 @@ int ip_nat_port_range_to_nfattr(struct sk_buff *skb, const struct ip_nat_range *range) { - NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), &range->min.tcp.port); - NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), &range->max.tcp.port); return 0; @@ -555,7 +555,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) if (tb[CTA_PROTONAT_PORT_MIN-1]) { ret = 1; range->min.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); } if (!tb[CTA_PROTONAT_PORT_MAX-1]) { @@ -564,7 +564,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) } else { ret = 1; range->max.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); } return ret; diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index 3328fc5c5f50..a71c233d8112 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -34,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper"); static int mangle_rfc959_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -57,7 +57,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, /* |1|132.235.1.2|6275| */ static int mangle_eprt_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ mangle_eprt_packet(struct sk_buff **pskb, /* |1|132.235.1.2|6275| */ static int mangle_epsv_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -98,7 +98,7 @@ mangle_epsv_packet(struct sk_buff **pskb, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t, +static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, unsigned int, unsigned int, struct ip_conntrack *, @@ -120,7 +120,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb, struct ip_conntrack_expect *exp, u32 *seq) { - u_int32_t newip; + __be32 newip; u_int16_t port; int dir = CTINFO2DIR(ctinfo); struct ip_conntrack *ct = exp->master; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index e9c5187ea5b2..3bf858480558 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -189,7 +189,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen, 0)); } else tcph->check = nf_proto_csum_update(*pskb, - htons(oldlen) ^ 0xFFFF, + htons(oldlen) ^ htons(0xFFFF), htons(datalen), tcph->check, 1); @@ -267,7 +267,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, udph->check = -1; } else udph->check = nf_proto_csum_update(*pskb, - htons(oldlen) ^ 0xFFFF, + htons(oldlen) ^ htons(0xFFFF), htons(datalen), udph->check, 1); return 1; @@ -284,26 +284,24 @@ sack_adjust(struct sk_buff *skb, { while (sackoff < sackend) { struct tcp_sack_block_wire *sack; - u_int32_t new_start_seq, new_end_seq; + __be32 new_start_seq, new_end_seq; sack = (void *)skb->data + sackoff; if (after(ntohl(sack->start_seq) - natseq->offset_before, natseq->correction_pos)) - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_after; + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); else - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_before; - new_start_seq = htonl(new_start_seq); + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); if (after(ntohl(sack->end_seq) - natseq->offset_before, natseq->correction_pos)) - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_after; + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); else - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_before; - new_end_seq = htonl(new_end_seq); + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", ntohl(sack->start_seq), new_start_seq, @@ -375,7 +373,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb, enum ip_conntrack_info ctinfo) { struct tcphdr *tcph; - int dir, newseq, newack; + int dir; + __be32 newseq, newack; struct ip_nat_seq *this_way, *other_way; dir = CTINFO2DIR(ctinfo); @@ -388,17 +387,15 @@ ip_nat_seq_adjust(struct sk_buff **pskb, tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = ntohl(tcph->seq) + this_way->offset_after; + newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else - newseq = ntohl(tcph->seq) + this_way->offset_before; - newseq = htonl(newseq); + newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = ntohl(tcph->ack_seq) - other_way->offset_after; + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); else - newack = ntohl(tcph->ack_seq) - other_way->offset_before; - newack = htonl(newack); + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, tcph->check, 0); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 2ff578807123..329fdcd7d702 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -51,7 +51,7 @@ #define IP_NAT_PPTP_VERSION "3.0" -#define REQ_CID(req, off) (*(u_int16_t *)((char *)(req) + (off))) +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index ec50cc295317..3f6efc13ac74 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff **pskb, hdr = (struct icmphdr *)((*pskb)->data + hdroff); hdr->checksum = nf_proto_csum_update(*pskb, - hdr->un.echo.id ^ 0xFFFF, + hdr->un.echo.id ^ htons(0xFFFF), tuple->src.u.icmp.id, hdr->checksum, 0); hdr->un.echo.id = tuple->src.u.icmp.id; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index 72a6307bd2db..12deb13b93b1 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -24,7 +24,7 @@ tcp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - u_int16_t port; + __be16 port; if (maniptype == IP_NAT_MANIP_SRC) port = tuple->src.u.tcp.port; @@ -42,7 +42,7 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t port; - u_int16_t *portptr; + __be16 *portptr; unsigned int range_size, min, i; if (maniptype == IP_NAT_MANIP_SRC) @@ -93,8 +93,8 @@ tcp_manip_pkt(struct sk_buff **pskb, struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport, oldport; + __be32 oldip, newip; + __be16 *portptr, newport, oldport; int hdrsize = 8; /* TCP connection tracking guarantees this much */ /* this could be a inner header returned in icmp packet; in such @@ -130,7 +130,7 @@ tcp_manip_pkt(struct sk_buff **pskb, return 1; hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); - hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport, + hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport, hdr->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 5da196ae758c..4bbec7730d18 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -24,7 +24,7 @@ udp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - u_int16_t port; + __be16 port; if (maniptype == IP_NAT_MANIP_SRC) port = tuple->src.u.udp.port; @@ -42,7 +42,7 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t port; - u_int16_t *portptr; + __be16 *portptr; unsigned int range_size, min, i; if (maniptype == IP_NAT_MANIP_SRC) @@ -91,8 +91,8 @@ udp_manip_pkt(struct sk_buff **pskb, struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport; + __be32 oldip, newip; + __be16 *portptr, newport; if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; @@ -118,7 +118,7 @@ udp_manip_pkt(struct sk_buff **pskb, hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); hdr->check = nf_proto_csum_update(*pskb, - *portptr ^ 0xFFFF, newport, + *portptr ^ htons(0xFFFF), newport, hdr->check, 0); if (!hdr->check) hdr->check = -1; diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 7b703839aa58..a176aa3031e0 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -119,7 +119,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(u32 dstip, u32 srcip) +static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; @@ -205,7 +205,7 @@ alloc_null_binding(struct ip_conntrack *conntrack, per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). Use reply in case it's already been mangled (eg local packet). */ - u_int32_t ip + __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); @@ -222,7 +222,7 @@ alloc_null_binding_confirmed(struct ip_conntrack *conntrack, struct ip_nat_info *info, unsigned int hooknum) { - u_int32_t ip + __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c index 6ffba63adca2..71fc2730a007 100644 --- a/net/ipv4/netfilter/ip_nat_sip.c +++ b/net/ipv4/netfilter/ip_nat_sip.c @@ -60,8 +60,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; unsigned int bufflen, dataoff; - u_int32_t ip; - u_int16_t port; + __be32 ip; + __be16 port; dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); @@ -159,7 +159,7 @@ static int mangle_content_len(struct sk_buff **pskb, static unsigned int mangle_sdp(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct ip_conntrack *ct, - u_int32_t newip, u_int16_t port, + __be32 newip, u_int16_t port, const char *dptr) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; @@ -195,7 +195,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, { struct ip_conntrack *ct = exp->master; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - u_int32_t newip; + __be32 newip; u_int16_t port; DEBUGP("ip_nat_sdp():\n"); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 18b7fbdccb61..168f45fa1898 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1211,7 +1211,7 @@ static int snmp_translate(struct ip_conntrack *ct, struct sk_buff **pskb) { struct iphdr *iph = (*pskb)->nh.iph; - struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); int dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 9c577db62047..021395b67463 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -191,7 +191,7 @@ ip_nat_in(unsigned int hooknum, int (*okfn)(struct sk_buff *)) { unsigned int ret; - u_int32_t daddr = (*pskb)->nh.iph->daddr; + __be32 daddr = (*pskb)->nh.iph->daddr; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN -- cgit v1.2.3-71-gd317 From 6a19d61472d0802a24493c0d200e88f99ad39cd8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:22:24 -0700 Subject: [NETFILTER]: ipt annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_iprange.h | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 14 +++++++------- net/ipv4/netfilter/ipt_ECN.c | 12 ++++++------ net/ipv4/netfilter/ipt_NETMAP.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 4 ++-- net/ipv4/netfilter/ipt_SAME.c | 3 ++- net/ipv4/netfilter/ipt_TCPMSS.c | 17 +++++++++-------- net/ipv4/netfilter/ipt_TOS.c | 4 ++-- net/ipv4/netfilter/ipt_TTL.c | 4 ++-- net/ipv4/netfilter/ipt_hashlimit.c | 16 +++++++++------- net/ipv4/netfilter/ipt_recent.c | 15 +++++++-------- net/ipv4/netfilter/iptable_mangle.c | 2 +- 12 files changed, 49 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h index 3ecb3bd63676..34ab0fb736e2 100644 --- a/include/linux/netfilter_ipv4/ipt_iprange.h +++ b/include/linux/netfilter_ipv4/ipt_iprange.h @@ -8,7 +8,7 @@ struct ipt_iprange { /* Inclusive: network order. */ - u_int32_t min_ip, max_ip; + __be32 min_ip, max_ip; }; struct ipt_iprange_info diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 41589665fc5d..7a29d6e7baa7 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -52,7 +52,7 @@ struct clusterip_config { atomic_t entries; /* number of entries/rules * referencing us */ - u_int32_t clusterip; /* the IP address */ + __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ @@ -119,7 +119,7 @@ clusterip_config_entry_put(struct clusterip_config *c) } static struct clusterip_config * -__clusterip_config_find(u_int32_t clusterip) +__clusterip_config_find(__be32 clusterip) { struct list_head *pos; @@ -136,7 +136,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip, int entry) +clusterip_config_find_get(__be32 clusterip, int entry) { struct clusterip_config *c; @@ -166,7 +166,7 @@ clusterip_config_init_nodelist(struct clusterip_config *c, } static struct clusterip_config * -clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, +clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; @@ -387,7 +387,7 @@ checkentry(const char *tablename, return 0; } - if (e->ip.dmsk.s_addr != 0xffffffff + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); return 0; @@ -476,9 +476,9 @@ static struct ipt_target clusterip_tgt = { /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ struct arp_payload { u_int8_t src_hw[ETH_ALEN]; - u_int32_t src_ip; + __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; - u_int32_t dst_ip; + __be32 dst_ip; } __attribute__ ((packed)); #ifdef CLUSTERIP_DEBUG diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 23f9c7ebe7eb..12a818a2462f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -28,7 +28,7 @@ static inline int set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct iphdr *iph = (*pskb)->nh.iph; - u_int16_t oldtos; + __be16 oldtos; if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) @@ -37,7 +37,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, + iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos, iph->check); } return 1; @@ -48,7 +48,7 @@ static inline int set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; - u_int16_t oldval; + __be16 oldval; /* Not enought header? */ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, @@ -66,15 +66,15 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - oldval = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; tcph->check = nf_proto_csum_update((*pskb), - oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], + oldval ^ htons(0xFFFF), + ((__be16 *)tcph)[6], tcph->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index beb2914225ff..58a88f227108 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -58,7 +58,7 @@ target(struct sk_buff **pskb, { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t new_ip, netmask; + __be32 new_ip, netmask; const struct ip_nat_multi_range_compat *mr = targinfo; struct ip_nat_range newrange; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b81821edd893..fd0c05efed8a 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -104,8 +104,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; struct rtable *rt; - u_int16_t tmp_port; - u_int32_t tmp_addr; + __be16 tmp_port; + __be32 tmp_addr; int needs_ack; int hh_len; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index efbcb1198832..b38b13328d73 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -135,7 +135,8 @@ same_target(struct sk_buff **pskb, { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t tmpip, aindex, new_ip; + u_int32_t tmpip, aindex; + __be32 new_ip; const struct ipt_same_info *same = targinfo; struct ip_nat_range newrange; const struct ip_conntrack_tuple *t; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 4246c4321e5b..108b6b76311f 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -42,7 +42,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, const struct ipt_tcpmss_info *tcpmssinfo = targinfo; struct tcphdr *tcph; struct iphdr *iph; - u_int16_t tcplen, newtotlen, oldval, newmss; + u_int16_t tcplen, newmss; + __be16 newtotlen, oldval; unsigned int i; u_int8_t *opt; @@ -97,7 +98,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[i+3] = (newmss & 0x00ff); tcph->check = nf_proto_csum_update(*pskb, - htons(oldmss)^0xFFFF, + htons(oldmss)^htons(0xFFFF), htons(newmss), tcph->check, 0); return IPT_CONTINUE; @@ -126,7 +127,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); tcph->check = nf_proto_csum_update(*pskb, - htons(tcplen) ^ 0xFFFF, + htons(tcplen) ^ htons(0xFFFF), htons(tcplen + TCPOLEN_MSS), tcph->check, 1); opt[0] = TCPOPT_MSS; @@ -134,18 +135,18 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt), + tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt), tcph->check, 0); - oldval = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; tcph->check = nf_proto_csum_update(*pskb, - oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], + oldval ^ htons(0xFFFF), + ((__be16 *)tcph)[6], tcph->check, 0); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF, + iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF), newtotlen, iph->check); iph->tot_len = newtotlen; return IPT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 471a4c438b0a..6b8b14ccc3d3 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -30,7 +30,7 @@ target(struct sk_buff **pskb, { const struct ipt_tos_target_info *tosinfo = targinfo; struct iphdr *iph = (*pskb)->nh.iph; - u_int16_t oldtos; + __be16 oldtos; if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) @@ -38,7 +38,7 @@ target(struct sk_buff **pskb, iph = (*pskb)->nh.iph; oldtos = iph->tos; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; - iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, + iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos, iph->check); } return IPT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 96e79cc6d0f2..ac9517d62af0 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -54,8 +54,8 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF, - ntohs(new_ttl << 8), + iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF), + htons(new_ttl << 8), iph->check); iph->ttl = new_ttl; } diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 4f73a61aa3dd..33ccdbf8e794 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -50,11 +50,11 @@ static struct file_operations dl_file_ops; /* hash table crap */ struct dsthash_dst { - u_int32_t src_ip; - u_int32_t dst_ip; + __be32 src_ip; + __be32 dst_ip; /* ports have to be consecutive !!! */ - u_int16_t src_port; - u_int16_t dst_port; + __be16 src_port; + __be16 dst_port; }; struct dsthash_ent { @@ -106,8 +106,10 @@ static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) static inline u_int32_t hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst) { - return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port), - dst->src_ip, ht->rnd) % ht->cfg.size); + return (jhash_3words((__force u32)dst->dst_ip, + ((__force u32)dst->dst_port<<16 | + (__force u32)dst->src_port), + (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size); } static inline struct dsthash_ent * @@ -406,7 +408,7 @@ hashlimit_match(const struct sk_buff *skb, dst.src_ip = skb->nh.iph->saddr; if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) { - u_int16_t _ports[2], *ports; + __be16 _ports[2], *ports; switch (skb->nh.iph->protocol) { case IPPROTO_TCP: diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 32ae8d7ac506..126db44e71a8 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -50,11 +50,10 @@ MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); - struct recent_entry { struct list_head list; struct list_head lru_list; - u_int32_t addr; + __be32 addr; u_int8_t ttl; u_int8_t index; u_int16_t nstamps; @@ -85,17 +84,17 @@ static struct file_operations recent_fops; static u_int32_t hash_rnd; static int hash_rnd_initted; -static unsigned int recent_entry_hash(u_int32_t addr) +static unsigned int recent_entry_hash(__be32 addr) { if (!hash_rnd_initted) { get_random_bytes(&hash_rnd, 4); hash_rnd_initted = 1; } - return jhash_1word(addr, hash_rnd) & (ip_list_hash_size - 1); + return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1); } static struct recent_entry * -recent_entry_lookup(const struct recent_table *table, u_int32_t addr, u_int8_t ttl) +recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl) { struct recent_entry *e; unsigned int h; @@ -116,7 +115,7 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) } static struct recent_entry * -recent_entry_init(struct recent_table *t, u_int32_t addr, u_int8_t ttl) +recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) { struct recent_entry *e; @@ -178,7 +177,7 @@ ipt_recent_match(const struct sk_buff *skb, const struct ipt_recent_info *info = matchinfo; struct recent_table *t; struct recent_entry *e; - u_int32_t addr; + __be32 addr; u_int8_t ttl; int ret = info->invert; @@ -406,7 +405,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - u_int32_t addr; + __be32 addr; int add; if (size > sizeof(buf)) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 79336cb42527..e62ea2bb9c0a 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -131,7 +131,7 @@ ipt_local_hook(unsigned int hook, { unsigned int ret; u_int8_t tos; - u_int32_t saddr, daddr; + __be32 saddr, daddr; unsigned long nfmark; /* root is playing with raw sockets. */ -- cgit v1.2.3-71-gd317 From d4263cde88d3fee2af0aac8836bb785cdb6b06c0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:22:51 -0700 Subject: [NETFILTER]: h323 annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_h323.h | 6 +- net/ipv4/netfilter/ip_conntrack_helper_h323.c | 84 ++++++++++++------------ net/ipv4/netfilter/ip_nat_helper_h323.c | 16 ++--- 3 files changed, 53 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_h323.h b/include/linux/netfilter_ipv4/ip_conntrack_h323.h index 3cbff7379002..943cc6a4871d 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_h323.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_h323.h @@ -30,7 +30,7 @@ struct ip_ct_h323_master { struct ip_conntrack_expect; extern int get_h225_addr(unsigned char *data, TransportAddress * addr, - u_int32_t * ip, u_int16_t * port); + __be32 * ip, u_int16_t * port); extern void ip_conntrack_h245_expect(struct ip_conntrack *new, struct ip_conntrack_expect *this); extern void ip_conntrack_q931_expect(struct ip_conntrack *new, @@ -38,11 +38,11 @@ extern void ip_conntrack_q931_expect(struct ip_conntrack *new, extern int (*set_h245_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); extern int (*set_h225_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); extern int (*set_sig_addr_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c index 9a39e2969712..7b7441202bfd 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c @@ -49,11 +49,11 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " int (*set_h245_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); int (*set_h225_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); int (*set_sig_addr_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, @@ -209,7 +209,7 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct, /****************************************************************************/ static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr, - u_int32_t * ip, u_int16_t * port) + __be32 * ip, u_int16_t * port) { unsigned char *p; @@ -232,7 +232,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; u_int16_t rtp_port; struct ip_conntrack_expect *rtp_exp; @@ -254,10 +254,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; rtp_exp->tuple.dst.u.udp.port = htons(rtp_port); rtp_exp->tuple.dst.protonum = IPPROTO_UDP; - rtp_exp->mask.src.ip = 0xFFFFFFFF; + rtp_exp->mask.src.ip = htonl(0xFFFFFFFF); rtp_exp->mask.src.u.udp.port = 0; - rtp_exp->mask.dst.ip = 0xFFFFFFFF; - rtp_exp->mask.dst.u.udp.port = 0xFFFF; + rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF); + rtp_exp->mask.dst.u.udp.port = htons(0xFFFF); rtp_exp->mask.dst.protonum = 0xFF; rtp_exp->flags = 0; @@ -271,10 +271,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1); rtcp_exp->tuple.dst.protonum = IPPROTO_UDP; - rtcp_exp->mask.src.ip = 0xFFFFFFFF; + rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF); rtcp_exp->mask.src.u.udp.port = 0; - rtcp_exp->mask.dst.ip = 0xFFFFFFFF; - rtcp_exp->mask.dst.u.udp.port = 0xFFFF; + rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF); + rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF); rtcp_exp->mask.dst.protonum = 0xFF; rtcp_exp->flags = 0; @@ -325,7 +325,7 @@ static int expect_t120(struct sk_buff **pskb, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -342,10 +342,10 @@ static int expect_t120(struct sk_buff **pskb, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */ @@ -626,7 +626,7 @@ void ip_conntrack_h245_expect(struct ip_conntrack *new, /****************************************************************************/ int get_h225_addr(unsigned char *data, TransportAddress * addr, - u_int32_t * ip, u_int16_t * port) + __be32 * ip, u_int16_t * port) { unsigned char *p; @@ -648,7 +648,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -665,10 +665,10 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; @@ -709,7 +709,7 @@ static int expect_callforwarding(struct sk_buff **pskb, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -751,10 +751,10 @@ static int expect_callforwarding(struct sk_buff **pskb, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; @@ -791,7 +791,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret; int i; - u_int32_t ip; + __be32 ip; u_int16_t port; DEBUGP("ip_ct_q931: Setup\n"); @@ -1188,7 +1188,7 @@ static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen) /****************************************************************************/ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { struct ip_conntrack_expect *exp; struct ip_conntrack_tuple tuple; @@ -1228,7 +1228,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1251,10 +1251,10 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = gkrouted_only ? 0xFFFFFFFF : 0; + exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0; exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */ @@ -1307,7 +1307,7 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1333,10 +1333,10 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_UDP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; exp->expectfn = ip_conntrack_ras_expect; @@ -1477,7 +1477,7 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct, { struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); - u_int32_t ip; + __be32 ip; u_int16_t port; DEBUGP("ip_ct_ras: ARQ\n"); @@ -1513,7 +1513,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1538,10 +1538,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; exp->expectfn = ip_conntrack_q931_expect; @@ -1581,7 +1581,7 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -1598,10 +1598,10 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; exp->expectfn = ip_conntrack_q931_expect; diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c index 419b878fb467..4a7d34466ee2 100644 --- a/net/ipv4/netfilter/ip_nat_helper_h323.c +++ b/net/ipv4/netfilter/ip_nat_helper_h323.c @@ -32,13 +32,13 @@ /****************************************************************************/ static int set_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, - unsigned int addroff, u_int32_t ip, u_int16_t port) + unsigned int addroff, __be32 ip, u_int16_t port) { enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo); struct { - u_int32_t ip; - u_int16_t port; + __be32 ip; + __be16 port; } __attribute__ ((__packed__)) buf; struct tcphdr _tcph, *th; @@ -86,7 +86,7 @@ static int set_addr(struct sk_buff **pskb, static int set_h225_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port); } @@ -95,7 +95,7 @@ static int set_h225_addr(struct sk_buff **pskb, static int set_h245_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { return set_addr(pskb, data, dataoff, addr->unicastAddress.iPAddress.network, ip, port); @@ -110,7 +110,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); int i; - u_int32_t ip; + __be32 ip; u_int16_t port; for (i = 0; i < count; i++) { @@ -164,7 +164,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int i; - u_int32_t ip; + __be32 ip; u_int16_t port; for (i = 0; i < count; i++) { @@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = port; - u_int32_t ip; + __be32 ip; /* Set expectations for NAT */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; -- cgit v1.2.3-71-gd317 From 32f50cdee666333168b5203c7864bede159f789e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 28 Sep 2006 14:51:47 -0700 Subject: [NetLabel]: add audit support for configuration changes This patch adds audit support to NetLabel, including six new audit message types shown below. #define AUDIT_MAC_UNLBL_ACCEPT 1406 #define AUDIT_MAC_UNLBL_DENY 1407 #define AUDIT_MAC_CIPSOV4_ADD 1408 #define AUDIT_MAC_CIPSOV4_DEL 1409 #define AUDIT_MAC_MAP_ADD 1410 #define AUDIT_MAC_MAP_DEL 1411 Signed-off-by: Paul Moore Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/audit.h | 6 +++ include/net/cipso_ipv4.h | 5 ++- include/net/netlabel.h | 2 +- net/ipv4/cipso_ipv4.c | 8 +++- net/netlabel/netlabel_cipso_v4.c | 43 +++++++++++++----- net/netlabel/netlabel_domainhash.c | 54 +++++++++++++++++++--- net/netlabel/netlabel_domainhash.h | 6 +-- net/netlabel/netlabel_mgmt.c | 14 +++--- net/netlabel/netlabel_unlabeled.c | 36 ++++++++++++--- net/netlabel/netlabel_user.c | 91 ++++++++++++++++++++++++++++++++++++++ net/netlabel/netlabel_user.h | 6 +++ 11 files changed, 235 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 40a6c26294ae..42719d07612a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -95,6 +95,12 @@ #define AUDIT_MAC_POLICY_LOAD 1403 /* Policy file load */ #define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ #define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ +#define AUDIT_MAC_UNLBL_ACCEPT 1406 /* NetLabel: allow unlabeled traffic */ +#define AUDIT_MAC_UNLBL_DENY 1407 /* NetLabel: deny unlabeled traffic */ +#define AUDIT_MAC_CIPSOV4_ADD 1408 /* NetLabel: add CIPSOv4 DOI entry */ +#define AUDIT_MAC_CIPSOV4_DEL 1409 /* NetLabel: del CIPSOv4 DOI entry */ +#define AUDIT_MAC_MAP_ADD 1410 /* NetLabel: add LSM domain mapping */ +#define AUDIT_MAC_MAP_DEL 1411 /* NetLabel: del LSM domain mapping */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 2d72496c2029..5d6ae1b2b196 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -128,7 +128,9 @@ extern int cipso_v4_rbm_strictvalid; #ifdef CONFIG_NETLABEL int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); -int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); +int cipso_v4_doi_remove(u32 doi, + u32 audit_secid, + void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); int cipso_v4_doi_walk(u32 *skip_cnt, int (*callback) (struct cipso_v4_doi *doi_def, void *arg), @@ -143,6 +145,7 @@ static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } static inline int cipso_v4_doi_remove(u32 doi, + u32 audit_secid, void (*callback) (struct rcu_head * head)) { return 0; diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 6692430063fd..190bfdbbdba6 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -96,7 +96,7 @@ struct netlbl_dom_map; /* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain); +int netlbl_domhsh_remove(const char *domain, u32 audit_secid); /* LSM security attributes */ struct netlbl_lsm_cache { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e6ce0b3ba62a..c4e469ff842d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -474,6 +474,7 @@ doi_add_failure_rlock: /** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value + * @audit_secid: the LSM secid to use in the audit message * @callback: the DOI cleanup/free callback * * Description: @@ -483,7 +484,9 @@ doi_add_failure_rlock: * success and negative values on failure. * */ -int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) +int cipso_v4_doi_remove(u32 doi, + u32 audit_secid, + void (*callback) (struct rcu_head * head)) { struct cipso_v4_doi *doi_def; struct cipso_v4_domhsh_entry *dom_iter; @@ -502,7 +505,8 @@ int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) spin_unlock(&cipso_v4_doi_list_lock); list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) - netlbl_domhsh_remove(dom_iter->domain); + netlbl_domhsh_remove(dom_iter->domain, + audit_secid); cipso_v4_cache_invalidate(); rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 4125a55f469f..09986ca962a6 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -162,8 +163,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) int nla_a_rem; int nla_b_rem; - if (!info->attrs[NLBL_CIPSOV4_A_DOI] || - !info->attrs[NLBL_CIPSOV4_A_TAGLST] || + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] || !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) return -EINVAL; @@ -344,8 +344,7 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info) int ret_val; struct cipso_v4_doi *doi_def = NULL; - if (!info->attrs[NLBL_CIPSOV4_A_DOI] || - !info->attrs[NLBL_CIPSOV4_A_TAGLST]) + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); @@ -381,21 +380,35 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - u32 map_type; + u32 type; + u32 doi; + const char *type_str = "(unknown)"; + struct audit_buffer *audit_buf; - if (!info->attrs[NLBL_CIPSOV4_A_MTYPE]) + if (!info->attrs[NLBL_CIPSOV4_A_DOI] || + !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; - map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); - switch (map_type) { + type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); + switch (type) { case CIPSO_V4_MAP_STD: + type_str = "std"; ret_val = netlbl_cipsov4_add_std(info); break; case CIPSO_V4_MAP_PASS: + type_str = "pass"; ret_val = netlbl_cipsov4_add_pass(info); break; } + if (ret_val == 0) { + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + NETLINK_CB(skb).sid); + audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str); + audit_log_end(audit_buf); + } + return ret_val; } @@ -653,11 +666,21 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - u32 doi; + u32 doi = 0; + struct audit_buffer *audit_buf; if (info->attrs[NLBL_CIPSOV4_A_DOI]) { doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); + ret_val = cipso_v4_doi_remove(doi, + NETLINK_CB(skb).sid, + netlbl_cipsov4_doi_free); + } + + if (ret_val == 0) { + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, + NETLINK_CB(skb).sid); + audit_log_format(audit_buf, " doi=%u", doi); + audit_log_end(audit_buf); } return ret_val; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index f56d7a8ac7b7..d64e2ae3b129 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -35,12 +35,14 @@ #include #include #include +#include #include #include #include #include "netlabel_mgmt.h" #include "netlabel_domainhash.h" +#include "netlabel_user.h" struct netlbl_domhsh_tbl { struct list_head *tbl; @@ -186,6 +188,7 @@ int netlbl_domhsh_init(u32 size) /** * netlbl_domhsh_add - Adds a entry to the domain hash table * @entry: the entry to add + * @audit_secid: the LSM secid to use in the audit message * * Description: * Adds a new entry to the domain hash table and handles any updates to the @@ -193,10 +196,12 @@ int netlbl_domhsh_init(u32 size) * negative on failure. * */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry) +int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) { int ret_val; u32 bkt; + struct audit_buffer *audit_buf; + char *audit_domain; switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: @@ -236,6 +241,26 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry) spin_unlock(&netlbl_domhsh_def_lock); } else ret_val = -EINVAL; + if (ret_val == 0) { + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, + audit_secid); + audit_log_format(audit_buf, " domain=%s", audit_domain); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " protocol=cipsov4 doi=%u", + entry->type_def.cipsov4->doi); + break; + } + audit_log_end(audit_buf); + } rcu_read_unlock(); if (ret_val != 0) { @@ -254,6 +279,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry) /** * netlbl_domhsh_add_default - Adds the default entry to the domain hash table * @entry: the entry to add + * @audit_secid: the LSM secid to use in the audit message * * Description: * Adds a new default entry to the domain hash table and handles any updates @@ -261,14 +287,15 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry) * negative on failure. * */ -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) { - return netlbl_domhsh_add(entry); + return netlbl_domhsh_add(entry, audit_secid); } /** * netlbl_domhsh_remove - Removes an entry from the domain hash table * @domain: the domain to remove + * @audit_secid: the LSM secid to use in the audit message * * Description: * Removes an entry from the domain hash table and handles any updates to the @@ -276,10 +303,12 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) * negative on failure. * */ -int netlbl_domhsh_remove(const char *domain) +int netlbl_domhsh_remove(const char *domain, u32 audit_secid) { int ret_val = -ENOENT; struct netlbl_dom_map *entry; + struct audit_buffer *audit_buf; + char *audit_domain; rcu_read_lock(); if (domain != NULL) @@ -316,8 +345,18 @@ int netlbl_domhsh_remove(const char *domain) ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_def_lock); } - if (ret_val == 0) + if (ret_val == 0) { + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, + audit_secid); + audit_log_format(audit_buf, " domain=%s", audit_domain); + audit_log_end(audit_buf); + call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + } remove_return: rcu_read_unlock(); @@ -326,6 +365,7 @@ remove_return: /** * netlbl_domhsh_remove_default - Removes the default entry from the table + * @audit_secid: the LSM secid to use in the audit message * * Description: * Removes/resets the default entry for the domain hash table and handles any @@ -333,9 +373,9 @@ remove_return: * success, non-zero on failure. * */ -int netlbl_domhsh_remove_default(void) +int netlbl_domhsh_remove_default(u32 audit_secid) { - return netlbl_domhsh_remove(NULL); + return netlbl_domhsh_remove(NULL, audit_secid); } /** diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 02af72a7877c..d50f13cacdca 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -57,9 +57,9 @@ struct netlbl_dom_map { int netlbl_domhsh_init(u32 size); /* Manipulate the domain hash table */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry); -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); -int netlbl_domhsh_remove_default(void); +int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid); +int netlbl_domhsh_remove_default(u32 audit_secid); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 8626c9f678eb..0ac314f18ad1 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -108,7 +108,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry); + ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -125,7 +125,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto add_failure; } - ret_val = netlbl_domhsh_add(entry); + ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); rcu_read_unlock(); break; default: @@ -161,7 +161,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) return -EINVAL; domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); - return netlbl_domhsh_remove(domain); + return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid); } /** @@ -277,7 +277,8 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry); + ret_val = netlbl_domhsh_add_default(entry, + NETLINK_CB(skb).sid); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -294,7 +295,8 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto adddef_failure; } - ret_val = netlbl_domhsh_add_default(entry); + ret_val = netlbl_domhsh_add_default(entry, + NETLINK_CB(skb).sid); rcu_read_unlock(); break; default: @@ -322,7 +324,7 @@ adddef_failure: */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { - return netlbl_domhsh_remove_default(); + return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid); } /** diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 440f5c4e1e2d..ab36675fee8c 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -63,6 +63,27 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, }; +/* + * Helper Functions + */ + +/** + * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag + * @value: desired value + * @audit_secid: the LSM secid to use in the audit message + * + * Description: + * Set the value of the unlabeled accept flag to @value. + * + */ +static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) +{ + atomic_set(&netlabel_unlabel_accept_flg, value); + netlbl_audit_nomsg((value ? + AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY), + audit_secid); +} + /* * NetLabel Command Handlers */ @@ -79,18 +100,18 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { */ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; u8 value; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { - atomic_set(&netlabel_unlabel_accept_flg, value); - ret_val = 0; + netlbl_unlabel_acceptflg_set(value, + NETLINK_CB(skb).sid); + return 0; } } - return ret_val; + return -EINVAL; } /** @@ -229,16 +250,19 @@ int netlbl_unlabel_defconf(void) { int ret_val; struct netlbl_dom_map *entry; + u32 secid; + + security_task_getsecid(current, &secid); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->type = NETLBL_NLTYPE_UNLABELED; - ret_val = netlbl_domhsh_add_default(entry); + ret_val = netlbl_domhsh_add_default(entry, secid); if (ret_val != 0) return ret_val; - atomic_set(&netlabel_unlabel_accept_flg, 1); + netlbl_unlabel_acceptflg_set(1, secid); return 0; } diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index eeb7d768d2bb..c2343af584cb 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -74,3 +77,91 @@ int netlbl_netlink_init(void) return 0; } + +/* + * NetLabel Audit Functions + */ + +/** + * netlbl_audit_start_common - Start an audit message + * @type: audit message type + * @secid: LSM context ID + * + * Description: + * Start an audit message using the type specified in @type and fill the audit + * message with some fields common to all NetLabel audit messages. Returns + * a pointer to the audit buffer on success, NULL on failure. + * + */ +struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) +{ + struct audit_context *audit_ctx = current->audit_context; + struct audit_buffer *audit_buf; + uid_t audit_loginuid; + const char *audit_tty; + char audit_comm[sizeof(current->comm)]; + struct vm_area_struct *vma; + char *secctx; + u32 secctx_len; + + audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type); + if (audit_buf == NULL) + return NULL; + + audit_loginuid = audit_get_loginuid(audit_ctx); + if (current->signal && + current->signal->tty && + current->signal->tty->name) + audit_tty = current->signal->tty->name; + else + audit_tty = "(none)"; + get_task_comm(audit_comm, current); + + audit_log_format(audit_buf, + "netlabel: auid=%u uid=%u tty=%s pid=%d", + audit_loginuid, + current->uid, + audit_tty, + current->pid); + audit_log_format(audit_buf, " comm="); + audit_log_untrustedstring(audit_buf, audit_comm); + if (current->mm) { + down_read(¤t->mm->mmap_sem); + vma = current->mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && + vma->vm_file) { + audit_log_d_path(audit_buf, + " exe=", + vma->vm_file->f_dentry, + vma->vm_file->f_vfsmnt); + break; + } + vma = vma->vm_next; + } + up_read(¤t->mm->mmap_sem); + } + + if (secid != 0 && + security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + + return audit_buf; +} + +/** + * netlbl_audit_nomsg - Send an audit message without additional text + * @type: audit message type + * @secid: LSM context ID + * + * Description: + * Send an audit message with only the common NetLabel audit fields. + * + */ +void netlbl_audit_nomsg(int type, u32 secid) +{ + struct audit_buffer *audit_buf; + + audit_buf = netlbl_audit_start_common(type, secid); + audit_log_end(audit_buf); +} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 3f9386b917df..ab840acfc964 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -75,4 +76,9 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, int netlbl_netlink_init(void); +/* NetLabel Audit Functions */ + +struct audit_buffer *netlbl_audit_start_common(int type, u32 secid); +void netlbl_audit_nomsg(int type, u32 secid); + #endif -- cgit v1.2.3-71-gd317 From 0891a8d706d6e6838a926b6dec42f95581747d0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Sep 2006 01:58:34 -0700 Subject: [PATCH] __percpu_alloc_mask() has to be __always_inline in UP case ... or we'll end up with cpu_online_map being evaluated on UP. In modules. cpumask.h is very careful to avoid that, and for a very good reason. So should we... PS: yes, it really triggers (on alpha). Signed-off-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3835a9642f13..46ec72fa2c84 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -74,7 +74,7 @@ static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, return 0; } -static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { return kzalloc(size, gfp); } -- cgit v1.2.3-71-gd317 From e04da1dfd9041e306cb33d1b40b6005c23c5b325 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Sep 2006 01:58:35 -0700 Subject: [PATCH] sys_getcpu() prototype annotated Signed-off-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3f0f716225ec..2d1c3d5c83ac 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -597,6 +597,6 @@ asmlinkage long sys_get_robust_list(int pid, size_t __user *len_ptr); asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); -asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache); +asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); #endif -- cgit v1.2.3-71-gd317 From f71b2f10f56802075d67c5710cd9f1816382d720 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 29 Sep 2006 01:58:39 -0700 Subject: [PATCH] JBD: Make journal_brelse_array() static It's always good to make symbols static when we can, and this also eliminates the need to rename the function in jbd2 Suggested by Eric Sandeen. Signed-off-by: Dave Kleikamp Cc: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/recovery.c | 2 +- include/linux/jbd.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 445eed6ce5dc..11563fe2a52b 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *, #ifdef __KERNEL__ /* Release readahead buffers after use */ -void journal_brelse_array(struct buffer_head *b[], int n) +static void journal_brelse_array(struct buffer_head *b[], int n) { while (--n >= 0) brelse (b[n]); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index a6d9daa38c6d..fe89444b1c6f 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -977,7 +977,6 @@ extern void journal_write_revoke_records(journal_t *, transaction_t *); extern int journal_set_revoke(journal_t *, unsigned long, tid_t); extern int journal_test_revoke(journal_t *, unsigned long, tid_t); extern void journal_clear_revoke(journal_t *); -extern void journal_brelse_array(struct buffer_head *b[], int n); extern void journal_switch_revoke_table(journal_t *journal); /* -- cgit v1.2.3-71-gd317 From 2dcea57ae19275451a756a2d5bf96b329487b0e0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 29 Sep 2006 01:58:41 -0700 Subject: [PATCH] convert s390 page handling macros to functions Convert s390 page handling macros to functions. In particular this fixes a problem with s390's SetPageUptodate macro which uses its input parameter twice which again can cause subtle bugs. [akpm@osdl.org: build fix] Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/appldata/appldata_base.c | 2 +- include/asm-s390/pgtable.h | 84 ++++++++++++++++++-------------------- include/linux/page-flags.h | 11 +++-- 3 files changed, 46 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 9a8f6ff21652..2b1e6c9a6e0e 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 83425cdefc91..ecdff13b2505 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -31,9 +31,9 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include #include #include -#include struct vm_area_struct; /* forward declaration (include/linux/mm.h) */ struct mm_struct; @@ -597,31 +597,31 @@ ptep_establish(struct vm_area_struct *vma, * should therefore only be called if it is not mapped in any * address space. */ -#define page_test_and_clear_dirty(_page) \ -({ \ - struct page *__page = (_page); \ - unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT); \ - int __skey = page_get_storage_key(__physpage); \ - if (__skey & _PAGE_CHANGED) \ - page_set_storage_key(__physpage, __skey & ~_PAGE_CHANGED);\ - (__skey & _PAGE_CHANGED); \ -}) +static inline int page_test_and_clear_dirty(struct page *page) +{ + unsigned long physpage = __pa((page - mem_map) << PAGE_SHIFT); + int skey = page_get_storage_key(physpage); + + if (skey & _PAGE_CHANGED) + page_set_storage_key(physpage, skey & ~_PAGE_CHANGED); + return skey & _PAGE_CHANGED; +} /* * Test and clear referenced bit in storage key. */ -#define page_test_and_clear_young(page) \ -({ \ - struct page *__page = (page); \ - unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);\ - int __ccode; \ - asm volatile( \ - " rrbe 0,%1\n" \ - " ipm %0\n" \ - " srl %0,28\n" \ - : "=d" (__ccode) : "a" (__physpage) : "cc"); \ - (__ccode & 2); \ -}) +static inline int page_test_and_clear_young(struct page *page) +{ + unsigned long physpage = __pa((page - mem_map) << PAGE_SHIFT); + int ccode; + + asm volatile ( + "rrbe 0,%1\n" + "ipm %0\n" + "srl %0,28\n" + : "=d" (ccode) : "a" (physpage) : "cc" ); + return ccode & 2; +} /* * Conversion functions: convert a page and protection to a page entry, @@ -634,32 +634,28 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) return __pte; } -#define mk_pte(pg, pgprot) \ -({ \ - struct page *__page = (pg); \ - pgprot_t __pgprot = (pgprot); \ - unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT); \ - pte_t __pte = mk_pte_phys(__physpage, __pgprot); \ - __pte; \ -}) +static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + unsigned long physpage = __pa((page - mem_map) << PAGE_SHIFT); -#define pfn_pte(pfn, pgprot) \ -({ \ - pgprot_t __pgprot = (pgprot); \ - unsigned long __physpage = __pa((pfn) << PAGE_SHIFT); \ - pte_t __pte = mk_pte_phys(__physpage, __pgprot); \ - __pte; \ -}) + return mk_pte_phys(physpage, pgprot); +} + +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) +{ + unsigned long physpage = __pa((pfn) << PAGE_SHIFT); + + return mk_pte_phys(physpage, pgprot); +} #ifdef __s390x__ -#define pfn_pmd(pfn, pgprot) \ -({ \ - pgprot_t __pgprot = (pgprot); \ - unsigned long __physpage = __pa((pfn) << PAGE_SHIFT); \ - pmd_t __pmd = __pmd(__physpage + pgprot_val(__pgprot)); \ - __pmd; \ -}) +static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) +{ + unsigned long physpage = __pa((pfn) << PAGE_SHIFT); + + return __pmd(physpage + pgprot_val(pgprot)); +} #endif /* __s390x__ */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 9d7921dd50f0..4830a3bedfb2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -128,12 +128,11 @@ #define PageUptodate(page) test_bit(PG_uptodate, &(page)->flags) #ifdef CONFIG_S390 -#define SetPageUptodate(_page) \ - do { \ - struct page *__page = (_page); \ - if (!test_and_set_bit(PG_uptodate, &__page->flags)) \ - page_test_and_clear_dirty(_page); \ - } while (0) +static inline void SetPageUptodate(struct page *page) +{ + if (!test_and_set_bit(PG_uptodate, &page->flags)) + page_test_and_clear_dirty(page); +} #else #define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) #endif -- cgit v1.2.3-71-gd317 From 199a9afc3dbe98c35326f1d3907ab94dae953a6e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 29 Sep 2006 01:59:00 -0700 Subject: [PATCH] Debug variants of linked list macros Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 15 ++++++++++ lib/Kconfig.debug | 9 ++++++ lib/Makefile | 1 + lib/list_debug.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+) create mode 100644 lib/list_debug.c (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 65a5b5ceda49..a9c90287c0ff 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -39,6 +39,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) * This is only for internal list manipulation where we know * the prev/next entries already! */ +#ifndef CONFIG_DEBUG_LIST static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) @@ -48,6 +49,11 @@ static inline void __list_add(struct list_head *new, new->prev = prev; prev->next = new; } +#else +extern void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next); +#endif /** * list_add - add a new entry @@ -57,10 +63,15 @@ static inline void __list_add(struct list_head *new, * Insert a new entry after the specified head. * This is good for implementing stacks. */ +#ifndef CONFIG_DEBUG_LIST static inline void list_add(struct list_head *new, struct list_head *head) { __list_add(new, head, head->next); } +#else +extern void list_add(struct list_head *new, struct list_head *head); +#endif + /** * list_add_tail - add a new entry @@ -153,12 +164,16 @@ static inline void __list_del(struct list_head * prev, struct list_head * next) * Note: list_empty on entry does not return true after this, the entry is * in an undefined state. */ +#ifndef CONFIG_DEBUG_LIST static inline void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } +#else +extern void list_del(struct list_head *entry); +#endif /** * list_del_rcu - deletes entry from list without re-initialization diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b0f5ca72599f..f9ae75cc0145 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -320,6 +320,15 @@ config DEBUG_VM If unsure, say N. +config DEBUG_LIST + bool "Debug linked list manipulation" + depends on DEBUG_KERNEL + help + Enable this to turn on extended checks in the linked-list + walking routines. + + If unsure, say N. + config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32 || SUPERH) diff --git a/lib/Makefile b/lib/Makefile index ef1d37afbbb6..402762fead70 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -28,6 +28,7 @@ lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o +obj-$(CONFIG_DEBUG_LIST) += list_debug.o ifneq ($(CONFIG_HAVE_DEC_LOCK),y) lib-y += dec_and_lock.o diff --git a/lib/list_debug.c b/lib/list_debug.c new file mode 100644 index 000000000000..1aae85cef92c --- /dev/null +++ b/lib/list_debug.c @@ -0,0 +1,77 @@ +/* + * Copyright 2006, Red Hat, Inc., Dave Jones + * Released under the General Public License (GPL). + * + * This file contains the linked list implementations for + * DEBUG_LIST. + */ + +#include +#include + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ + +void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + if (unlikely(next->prev != prev)) { + printk(KERN_ERR "list_add corruption. next->prev should be %p, but was %p\n", + prev, next->prev); + BUG(); + } + if (unlikely(prev->next != next)) { + printk(KERN_ERR "list_add corruption. prev->next should be %p, but was %p\n", + next, prev->next); + BUG(); + } + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} +EXPORT_SYMBOL(__list_add); + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} +EXPORT_SYMBOL(list_add); + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +void list_del(struct list_head *entry) +{ + if (unlikely(entry->prev->next != entry)) { + printk(KERN_ERR "list_del corruption. prev->next should be %p, but was %p\n", + entry, entry->prev->next); + BUG(); + } + if (unlikely(entry->next->prev != entry)) { + printk(KERN_ERR "list_del corruption. next->prev should be %p, but was %p\n", + entry, entry->next->prev); + BUG(); + } + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} +EXPORT_SYMBOL(list_del); + -- cgit v1.2.3-71-gd317 From 9938406ab6b2558d60c0c7200cc8e12f1ea7104a Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 29 Sep 2006 01:59:03 -0700 Subject: [PATCH] Make touch_nmi_watchdog imply touch_softlockup_watchdog on all archs touch_nmi_watchdog() calls touch_softlockup_watchdog() on both architectures that implement it (i386 and x86_64). On other architectures it does nothing at all. touch_nmi_watchdog() should imply touch_softlockup_watchdog() on all architectures. Suggested by Andi Kleen. [heiko.carstens@de.ibm.com: s390 fix] Signed-off-by: Michal Schmidt Cc: Andi Kleen Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Cc: Michal Schmidt Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-s390/irq.h | 3 --- include/linux/nmi.h | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/asm-s390/irq.h b/include/asm-s390/irq.h index bd1a721f7aa2..7da991a858f8 100644 --- a/include/asm-s390/irq.h +++ b/include/asm-s390/irq.h @@ -19,8 +19,5 @@ enum interruption_class { NR_IRQS, }; -#define touch_nmi_watchdog() do { } while(0) - #endif /* __KERNEL__ */ #endif - diff --git a/include/linux/nmi.h b/include/linux/nmi.h index c8f4d2f627d7..e16904e28c3a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -4,6 +4,7 @@ #ifndef LINUX_NMI_H #define LINUX_NMI_H +#include #include /** @@ -16,7 +17,7 @@ #ifdef ARCH_HAS_NMI_WATCHDOG extern void touch_nmi_watchdog(void); #else -# define touch_nmi_watchdog() do { } while(0) +# define touch_nmi_watchdog() touch_softlockup_watchdog() #endif #endif -- cgit v1.2.3-71-gd317 From 58012cd788443b9d144bd7c72260a84b6b30f45d Mon Sep 17 00:00:00 2001 From: Chris Boot Date: Fri, 29 Sep 2006 01:59:07 -0700 Subject: [PATCH] scx200_gpio export cleanups Use EXPORT_SYMBOL_GPL for new symbols, and declare the struct in the header file for access by other modules. Signed-off-by: Chris Boot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/scx200_gpio.c | 2 +- include/linux/scx200_gpio.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index b1f88c66b2b5..99e5272e3c53 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -44,7 +44,7 @@ struct nsc_gpio_ops scx200_gpio_ops = { .gpio_change = scx200_gpio_change, .gpio_current = scx200_gpio_current }; -EXPORT_SYMBOL(scx200_gpio_ops); +EXPORT_SYMBOL_GPL(scx200_gpio_ops); static int scx200_gpio_open(struct inode *inode, struct file *file) { diff --git a/include/linux/scx200_gpio.h b/include/linux/scx200_gpio.h index 90dd069cc145..1a82d30c4b17 100644 --- a/include/linux/scx200_gpio.h +++ b/include/linux/scx200_gpio.h @@ -4,6 +4,7 @@ u32 scx200_gpio_configure(unsigned index, u32 set, u32 clear); extern unsigned scx200_gpio_base; extern long scx200_gpio_shadow[2]; +extern struct nsc_gpio_ops scx200_gpio_ops; #define scx200_gpio_present() (scx200_gpio_base!=0) -- cgit v1.2.3-71-gd317 From 6c9979185c7ef4feeb7f8d29be032b8f032a1838 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 29 Sep 2006 01:59:11 -0700 Subject: [PATCH] kthread: convert loop.c to kthread Convert loop.c from the deprecated kernel_thread to kthread. This patch simplifies the code quite a bit and passes similar testing to the previous submission on both emulated x86 and s390. Changes since last submission: switched to using a rather simple loop based on wait_event_interruptible. Signed-off-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/loop.c | 69 ++++++++++++++++++---------------------------------- include/linux/loop.h | 5 ++-- 2 files changed, 26 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c774121684d7..e87b88731adc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -72,6 +72,7 @@ #include #include #include +#include #include @@ -522,15 +523,12 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) goto out; if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) goto out; - lo->lo_pending++; loop_add_bio(lo, old_bio); + wake_up(&lo->lo_event); spin_unlock_irq(&lo->lo_lock); - complete(&lo->lo_bh_done); return 0; out: - if (lo->lo_pending == 0) - complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio, old_bio->bi_size); return 0; @@ -570,14 +568,18 @@ static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) * to avoid blocking in our make_request_fn. it also does loop decrypting * on reads for block backed loop, as that is too heavy to do from * b_end_io context where irqs may be disabled. + * + * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before + * calling kthread_stop(). Therefore once kthread_should_stop() is + * true, make_request will not place any more requests. Therefore + * once kthread_should_stop() is true and lo_bio is NULL, we are + * done with the loop. */ static int loop_thread(void *data) { struct loop_device *lo = data; struct bio *bio; - daemonize("loop%d", lo->lo_number); - /* * loop can be used in an encrypted device, * hence, it mustn't be stopped at all @@ -587,47 +589,21 @@ static int loop_thread(void *data) set_user_nice(current, -20); - lo->lo_state = Lo_bound; - lo->lo_pending = 1; - - /* - * complete it, we are running - */ - complete(&lo->lo_done); + while (!kthread_should_stop() || lo->lo_bio) { - for (;;) { - int pending; + wait_event_interruptible(lo->lo_event, + lo->lo_bio || kthread_should_stop()); - if (wait_for_completion_interruptible(&lo->lo_bh_done)) + if (!lo->lo_bio) continue; - spin_lock_irq(&lo->lo_lock); - - /* - * could be completed because of tear-down, not pending work - */ - if (unlikely(!lo->lo_pending)) { - spin_unlock_irq(&lo->lo_lock); - break; - } - bio = loop_get_bio(lo); - lo->lo_pending--; - pending = lo->lo_pending; spin_unlock_irq(&lo->lo_lock); BUG_ON(!bio); loop_handle_bio(lo, bio); - - /* - * upped both for pending work and tear-down, lo_pending - * will hit zero then - */ - if (unlikely(!pending)) - break; } - complete(&lo->lo_done); return 0; } @@ -840,10 +816,15 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, set_blocksize(bdev, lo_blocksize); - error = kernel_thread(loop_thread, lo, CLONE_KERNEL); - if (error < 0) + lo->lo_thread = kthread_create(loop_thread, lo, "loop%d", + lo->lo_number); + if (IS_ERR(lo->lo_thread)) { + error = PTR_ERR(lo->lo_thread); + lo->lo_thread = NULL; goto out_putf; - wait_for_completion(&lo->lo_done); + } + lo->lo_state = Lo_bound; + wake_up_process(lo->lo_thread); return 0; out_putf: @@ -907,12 +888,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; - lo->lo_pending--; - if (!lo->lo_pending) - complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); - wait_for_completion(&lo->lo_done); + kthread_stop(lo->lo_thread); lo->lo_backing_file = NULL; @@ -925,6 +903,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) lo->lo_sizelimit = 0; lo->lo_encrypt_key_size = 0; lo->lo_flags = 0; + lo->lo_thread = NULL; memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); @@ -1287,9 +1266,9 @@ static int __init loop_init(void) if (!lo->lo_queue) goto out_mem4; mutex_init(&lo->lo_ctl_mutex); - init_completion(&lo->lo_done); - init_completion(&lo->lo_bh_done); lo->lo_number = i; + lo->lo_thread = NULL; + init_waitqueue_head(&lo->lo_event); spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; disk->first_minor = i; diff --git a/include/linux/loop.h b/include/linux/loop.h index e76c7611d6cc..191a595055f0 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -59,10 +59,9 @@ struct loop_device { struct bio *lo_bio; struct bio *lo_biotail; int lo_state; - struct completion lo_done; - struct completion lo_bh_done; struct mutex lo_ctl_mutex; - int lo_pending; + struct task_struct *lo_thread; + wait_queue_head_t lo_event; request_queue_t *lo_queue; }; -- cgit v1.2.3-71-gd317 From db0b0ead60815155c791e8f479ee4777e7946369 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 29 Sep 2006 01:59:28 -0700 Subject: [PATCH] lockdep: don't pull in includes when lockdep disabled Do not pull in various includes through lockdep.h if lockdep is disabled. Signed-off-by: Michael S. Tsirkin Cc: Ingo Molnar Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index c040a8c969aa..1314ca0f29be 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -8,13 +8,13 @@ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H +#ifdef CONFIG_LOCKDEP + #include #include #include #include -#ifdef CONFIG_LOCKDEP - /* * Lock-class usage-state bits: */ -- cgit v1.2.3-71-gd317 From 650a898342b3fa21c392c06a2b7010fa19823efa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 29 Sep 2006 01:59:35 -0700 Subject: [PATCH] vfs: define new lookup flag for chdir In the "operation does permission checking" model used by fuse, chdir permission is not checked, since there's no chdir method. For this case set a lookup flag, which will be passed to ->permission(), so fuse can distinguish it from permission checks for other operations. Signed-off-by: Miklos Szeredi Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 2 +- fs/open.c | 3 ++- include/linux/namei.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 409ce6a7cca4..f85b2a282f13 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -776,7 +776,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) return -EACCES; - if (nd && (nd->flags & LOOKUP_ACCESS)) + if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) return fuse_access(inode, mask); return 0; } diff --git a/fs/open.c b/fs/open.c index 303f06d2a7b9..1574d8fe4909 100644 --- a/fs/open.c +++ b/fs/open.c @@ -546,7 +546,8 @@ asmlinkage long sys_chdir(const char __user * filename) struct nameidata nd; int error; - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + error = __user_walk(filename, + LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); if (error) goto out; diff --git a/include/linux/namei.h b/include/linux/namei.h index 45511a5918d3..c6470ba00668 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -54,6 +54,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_OPEN (0x0100) #define LOOKUP_CREATE (0x0200) #define LOOKUP_ACCESS (0x0400) +#define LOOKUP_CHDIR (0x0800) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); -- cgit v1.2.3-71-gd317 From 2e0c1f6ce7b816f63fea2af3e5e2cb20c66430e9 Mon Sep 17 00:00:00 2001 From: Shem Multinymous Date: Fri, 29 Sep 2006 01:59:37 -0700 Subject: [PATCH] DMI: Decode and save OEM String information This teaches dmi_decode() how to decode and save OEM Strings (type 11) DMI information, which is currently discarded silently. Existing code using DMI is not affected. Follows the "System Management BIOS (SMBIOS) Specification" (http://www.dmtf.org/standards/smbios), and also the userspace dmidecode.c code. OEM Strings are the only safe way to identify some hardware, e.g., the ThinkPad embedded controller used by the soon-to-be-submitted tp_smapi driver. This will also let us eliminate the long whitelist in the mainline hdaps driver (in a future patch). Signed-off-by: Shem Multinymous Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 23 +++++++++++++++++++++++ include/linux/dmi.h | 3 ++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index b9e3886d9e16..b8b596d5778d 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -123,6 +123,26 @@ static void __init dmi_save_devices(struct dmi_header *dm) dev->type = *d++ & 0x7f; dev->name = dmi_string(dm, *d); dev->device_data = NULL; + list_add(&dev->list, &dmi_devices); + } +} + +static void __init dmi_save_oem_strings_devices(struct dmi_header *dm) +{ + int i, count = *(u8 *)(dm + 1); + struct dmi_device *dev; + + for (i = 1; i <= count; i++) { + dev = dmi_alloc(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR + "dmi_save_oem_strings_devices: out of memory.\n"); + break; + } + + dev->type = DMI_DEV_TYPE_OEM_STRING; + dev->name = dmi_string(dm, i); + dev->device_data = NULL; list_add(&dev->list, &dmi_devices); } @@ -181,6 +201,9 @@ static void __init dmi_decode(struct dmi_header *dm) case 10: /* Onboard Devices Information */ dmi_save_devices(dm); break; + case 11: /* OEM Strings */ + dmi_save_oem_strings_devices(dm); + break; case 38: /* IPMI Device Information */ dmi_save_ipmi_device(dm); } diff --git a/include/linux/dmi.h b/include/linux/dmi.h index b2cd2071d432..38dc403be70b 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -27,7 +27,8 @@ enum dmi_device_type { DMI_DEV_TYPE_ETHERNET, DMI_DEV_TYPE_TOKENRING, DMI_DEV_TYPE_SOUND, - DMI_DEV_TYPE_IPMI = -1 + DMI_DEV_TYPE_IPMI = -1, + DMI_DEV_TYPE_OEM_STRING = -2 }; struct dmi_header { -- cgit v1.2.3-71-gd317 From 402749ea2538be9ddad981a990739b93a0178bc6 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Fri, 29 Sep 2006 01:59:37 -0700 Subject: [PATCH] Remove unused tty_struct field Unused: tty_struct.max_flip_cnt $ git grep max_flip_cnt include/linux/tty.h: int max_flip_cnt; $ Cc: Paul Fulghum Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tty.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 04827ca65781..d1dec3d0c814 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -190,7 +190,6 @@ struct tty_struct { struct tty_struct *link; struct fasync_struct *fasync; struct tty_bufhead buf; - int max_flip_cnt; int alt_speed; /* For magic substitution of 38400 bps */ wait_queue_head_t write_wait; wait_queue_head_t read_wait; -- cgit v1.2.3-71-gd317 From 3d5b6fccc4b900cc4267692f015ea500bad4c6bf Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 01:59:40 -0700 Subject: [PATCH] task_struct: ifdef Missed'em V IPC ipc/sem.c only. $ agrep sysvsem -w -n ipc/sem.c:912: undo_list = current->sysvsem.undo_list; ipc/sem.c:932: undo_list = current->sysvsem.undo_list; ipc/sem.c:954: undo_list = current->sysvsem.undo_list; ipc/sem.c:963: current->sysvsem.undo_list = undo_list; ipc/sem.c:1247: tsk->sysvsem.undo_list = undo_list; ipc/sem.c:1249: tsk->sysvsem.undo_list = NULL; ipc/sem.c:1271: undo_list = tsk->sysvsem.undo_list; include/linux/sched.h:876: struct sysv_sem sysvsem; Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9d4aa7f95bc8..27122575d902 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -886,8 +886,10 @@ struct task_struct { - initialized normally by flush_old_exec */ /* file system info */ int link_count, total_link_count; +#ifdef CONFIG_SYSVIPC /* ipc stuff */ struct sysv_sem sysvsem; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* filesystem information */ -- cgit v1.2.3-71-gd317 From 6c5c934153513dc72e2d6464f39e8ef1f27c0a3e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 01:59:40 -0700 Subject: [PATCH] ifdef blktrace debugging fields Signed-off-by: Alexey Dobriyan Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/blktrace.c | 6 ++++-- block/ll_rw_blk.c | 3 +-- include/linux/blkdev.h | 4 ++-- include/linux/sched.h | 3 ++- kernel/fork.c | 2 ++ 5 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/block/blktrace.c b/block/blktrace.c index 2b4ef2b89b8d..8ff33441d8a2 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -450,8 +450,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(request_queue_t *q) { - blk_trace_startstop(q, 0); - blk_trace_remove(q); + if (q->blk_trace) { + blk_trace_startstop(q, 0); + blk_trace_remove(q); + } } /* diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 9c3a06bcb7ba..51dc0edf76e0 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1847,8 +1847,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); - if (q->blk_trace) - blk_trace_shutdown(q); + blk_trace_shutdown(q); kmem_cache_free(requestq_cachep, q); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c773ee545ebd..cfde8b3ee919 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -417,9 +417,9 @@ struct request_queue unsigned int sg_timeout; unsigned int sg_reserved_size; int node; - +#ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace *blk_trace; - +#endif /* * reserved for flush operations */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 27122575d902..3696f2f7126d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -784,8 +784,9 @@ struct task_struct { struct prio_array *array; unsigned short ioprio; +#ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; - +#endif unsigned long sleep_avg; unsigned long long timestamp, last_ran; unsigned long long sched_time; /* sched_clock time spent running */ diff --git a/kernel/fork.c b/kernel/fork.c index 802b1cf0e63f..bca6ce6d3ded 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -183,7 +183,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); +#ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; +#endif tsk->splice_pipe = NULL; return tsk; } -- cgit v1.2.3-71-gd317 From d6bd3a39f7c6ebad49c261c3d458df974c880758 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Fri, 29 Sep 2006 01:59:48 -0700 Subject: [PATCH] Move valid_dma_direction() from x86_64 to generic code As suggested by Muli Ben-Yehuda this function is moved to generic code as may be useful for all archs. [akpm@osdl.org: fix] Signed-off-by: Rolf Eike Beer Cc: Muli Ben-Yehuda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-swiotlb.c | 3 ++- drivers/net/fec_8xx/fec_main.c | 2 +- drivers/net/fs_enet/fs_enet.h | 3 +-- include/asm-x86_64/dma-mapping.h | 7 ------- include/linux/dma-mapping.h | 7 +++++++ 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c index 6a55f87ba97f..697f0aa794b9 100644 --- a/arch/x86_64/kernel/pci-swiotlb.c +++ b/arch/x86_64/kernel/pci-swiotlb.c @@ -3,7 +3,8 @@ #include #include #include -#include +#include + #include #include #include diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c index 22ac2df1aeb0..e17a1449ee10 100644 --- a/drivers/net/fec_8xx/fec_main.c +++ b/drivers/net/fec_8xx/fec_main.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,6 @@ #include #include #include -#include #include "fec_8xx.h" diff --git a/drivers/net/fs_enet/fs_enet.h b/drivers/net/fs_enet/fs_enet.h index 95022c005f75..92590d8fc24b 100644 --- a/drivers/net/fs_enet/fs_enet.h +++ b/drivers/net/fs_enet/fs_enet.h @@ -6,11 +6,10 @@ #include #include #include +#include #include -#include - #ifdef CONFIG_CPM1 #include diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index b6da83dcc7a6..10174b110a5c 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h @@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address; extern struct dma_mapping_ops* dma_ops; extern int iommu_merge; -static inline int valid_dma_direction(int dma_direction) -{ - return ((dma_direction == DMA_BIDIRECTIONAL) || - (dma_direction == DMA_TO_DEVICE) || - (dma_direction == DMA_FROM_DEVICE)); -} - static inline int dma_mapping_error(dma_addr_t dma_addr) { if (dma_ops->mapping_error) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 635690cf3e3d..ff203c465fed 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -24,6 +24,13 @@ enum dma_data_direction { #define DMA_28BIT_MASK 0x000000000fffffffULL #define DMA_24BIT_MASK 0x0000000000ffffffULL +static inline int valid_dma_direction(int dma_direction) +{ + return ((dma_direction == DMA_BIDIRECTIONAL) || + (dma_direction == DMA_TO_DEVICE) || + (dma_direction == DMA_FROM_DEVICE)); +} + #include /* Backwards compat, remove in 2.7.x */ -- cgit v1.2.3-71-gd317 From 0e51a720b9d9ea5ebf0fda39108919c6626bffa3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 01:59:56 -0700 Subject: [PATCH] ifdef ->quota_read, ->quota_write All suppliers of ->quota_read, ->quota_write (I've found ext2, ext3, UFS, reiserfs) already have them properly ifdeffed. All callers of ->quota_read, ->quota_write are under CONFIG_QUOTA umbrella, so... Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f74dfbb2edd..4caec6cebc42 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1143,9 +1143,10 @@ struct super_operations { int (*show_options)(struct seq_file *, struct vfsmount *); int (*show_stats)(struct seq_file *, struct vfsmount *); - +#ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); +#endif }; /* Inode state bits. Protected by inode_lock. */ -- cgit v1.2.3-71-gd317 From 068fbb315dd1e9dd3418aac39a9cfeabe39c16a6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 01:59:58 -0700 Subject: [PATCH] reiserfs: ifdef xattr_sem Shrink reiserfs inode by 12 bytes for xattr non-users (me). -reiser_inode_cache 356 11 +reiser_inode_cache 344 11 Signed-off-by: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 4 ++-- include/linux/reiserfs_fs_i.h | 2 ++ include/linux/reiserfs_xattr.h | 8 ++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 8810fda0da46..78f23f406932 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1129,7 +1129,7 @@ static void init_inode(struct inode *inode, struct path *path) REISERFS_I(inode)->i_jl = NULL; REISERFS_I(inode)->i_acl_access = NULL; REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem(&REISERFS_I(inode)->xattr_sem); + reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { struct stat_data_v1 *sd = @@ -1836,7 +1836,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); REISERFS_I(inode)->i_acl_access = NULL; REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem(&REISERFS_I(inode)->xattr_sem); + reiserfs_init_xattr_rwsem(inode); if (old_format_only(sb)) make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h index 149be8d9a0c9..711e7e7cafa5 100644 --- a/include/linux/reiserfs_fs_i.h +++ b/include/linux/reiserfs_fs_i.h @@ -55,7 +55,9 @@ struct reiserfs_inode_info { struct posix_acl *i_acl_access; struct posix_acl *i_acl_default; +#ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore xattr_sem; +#endif struct inode vfs_inode; }; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index 5e961035c725..966c35851b2e 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -97,6 +97,11 @@ static inline void reiserfs_mark_inode_private(struct inode *inode) inode->i_flags |= S_PRIVATE; } +static inline void reiserfs_init_xattr_rwsem(struct inode *inode) +{ + init_rwsem(&REISERFS_I(inode)->xattr_sem); +} + #else #define is_reiserfs_priv_object(inode) 0 @@ -129,6 +134,9 @@ static inline int reiserfs_xattr_init(struct super_block *sb, int mount_flags) sb->s_flags = (sb->s_flags & ~MS_POSIXACL); /* to be sure */ return 0; }; +static inline void reiserfs_init_xattr_rwsem(struct inode *inode) +{ +} #endif #endif /* __KERNEL__ */ -- cgit v1.2.3-71-gd317 From cfe14677f286c9be5d683b88214def8f4b8a6f24 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 02:00:00 -0700 Subject: [PATCH] reiserfs: ifdef ACL stuff from inode Shrink reiserfs inode more (by 8 bytes) for ACL non-users: -reiser_inode_cache 344 11 +reiser_inode_cache 336 11 Signed-off-by: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 10 ++++++---- fs/reiserfs/super.c | 6 ++++++ include/linux/reiserfs_acl.h | 17 +++++++++++++++++ include/linux/reiserfs_fs_i.h | 3 ++- 4 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 78f23f406932..7e5a2f5ebeb0 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1127,8 +1127,8 @@ static void init_inode(struct inode *inode, struct path *path) REISERFS_I(inode)->i_prealloc_count = 0; REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; + reiserfs_init_acl_access(inode); + reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { @@ -1834,8 +1834,8 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(inode)->i_attrs = REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; + reiserfs_init_acl_access(inode); + reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); if (old_format_only(sb)) @@ -1974,11 +1974,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, * iput doesn't deadlock in reiserfs_delete_xattrs. The locking * code really needs to be reworked, but this will take care of it * for now. -jeffm */ +#ifdef CONFIG_REISERFS_FS_POSIX_ACL if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { reiserfs_write_unlock_xattrs(dir->i_sb); iput(inode); reiserfs_write_lock_xattrs(dir->i_sb); } else +#endif iput(inode); return err; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b40d4d64d598..80fc3b32802f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -510,8 +510,10 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); +#ifdef CONFIG_REISERFS_FS_POSIX_ACL ei->i_acl_access = NULL; ei->i_acl_default = NULL; +#endif } } @@ -560,6 +562,7 @@ static void reiserfs_dirty_inode(struct inode *inode) reiserfs_write_unlock(inode->i_sb); } +#ifdef CONFIG_REISERFS_FS_POSIX_ACL static void reiserfs_clear_inode(struct inode *inode) { struct posix_acl *acl; @@ -574,6 +577,9 @@ static void reiserfs_clear_inode(struct inode *inode) posix_acl_release(acl); REISERFS_I(inode)->i_acl_default = NULL; } +#else +#define reiserfs_clear_inode NULL +#endif #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h index 806ec5b06707..fe00f781a622 100644 --- a/include/linux/reiserfs_acl.h +++ b/include/linux/reiserfs_acl.h @@ -56,6 +56,16 @@ extern int reiserfs_xattr_posix_acl_init(void) __init; extern int reiserfs_xattr_posix_acl_exit(void); extern struct reiserfs_xattr_handler posix_acl_default_handler; extern struct reiserfs_xattr_handler posix_acl_access_handler; + +static inline void reiserfs_init_acl_access(struct inode *inode) +{ + REISERFS_I(inode)->i_acl_access = NULL; +} + +static inline void reiserfs_init_acl_default(struct inode *inode) +{ + REISERFS_I(inode)->i_acl_default = NULL; +} #else #define reiserfs_cache_default_acl(inode) 0 @@ -87,4 +97,11 @@ reiserfs_inherit_default_acl(const struct inode *dir, struct dentry *dentry, return 0; } +static inline void reiserfs_init_acl_access(struct inode *inode) +{ +} + +static inline void reiserfs_init_acl_default(struct inode *inode) +{ +} #endif diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h index 711e7e7cafa5..5b3b297aa2c5 100644 --- a/include/linux/reiserfs_fs_i.h +++ b/include/linux/reiserfs_fs_i.h @@ -52,9 +52,10 @@ struct reiserfs_inode_info { ** flushed */ unsigned long i_trans_id; struct reiserfs_journal_list *i_jl; - +#ifdef CONFIG_REISERFS_FS_POSIX_ACL struct posix_acl *i_acl_access; struct posix_acl *i_acl_default; +#endif #ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore xattr_sem; #endif -- cgit v1.2.3-71-gd317 From 50462062a02226a698a211d5bd535376c89b8603 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 02:00:01 -0700 Subject: [PATCH] fs.h: ifdef security fields [assuming BSD security levels are deleted] The only user of i_security, f_security, s_security fields is SELinux, however, quite a few security modules are trying to get into kernel. So, wrap them under CONFIG_SECURITY. Adding config option for each security field is likely an overkill. Following Stephen Smalley's suggestion, i_security initialization is moved to security_inode_alloc() to not clutter core code with ifdefs and make alloc_inode() codepath tiny little bit smaller and faster. The user of (highly greppable) struct fown_struct::security field is still to be found. I've checked every "fown_struct" and every "f_owner" occurence. Additionally it's removal doesn't break i386 allmodconfig build. struct inode, struct file, struct super_block, struct fown_struct become smaller. P.S. Combined with two reiserfs inode shrinking patches sent to linux-fsdevel, I can finally suck 12 reiserfs inodes into one page. /proc/slabinfo -ext2_inode_cache 388 10 +ext2_inode_cache 384 10 -inode_cache 280 14 +inode_cache 276 14 -proc_inode_cache 296 13 +proc_inode_cache 292 13 -reiser_inode_cache 336 11 +reiser_inode_cache 332 12 <= -shmem_inode_cache 372 10 +shmem_inode_cache 368 10 Signed-off-by: Alexey Dobriyan Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 1 - include/linux/fs.h | 8 ++++++-- include/linux/security.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index f5c04dd9ae8a..abf77471e6c4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -133,7 +133,6 @@ static struct inode *alloc_inode(struct super_block *sb) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_rdev = 0; - inode->i_security = NULL; inode->dirtied_when = 0; if (security_inode_alloc(inode)) { if (inode->i_sb->s_op->destroy_inode) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4caec6cebc42..6eafbe309483 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -553,7 +553,9 @@ struct inode { unsigned int i_flags; atomic_t i_writecount; +#ifdef CONFIG_SECURITY void *i_security; +#endif void *i_private; /* fs or device private pointer */ #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; @@ -645,7 +647,6 @@ struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ int pid; /* pid or -pgrp where SIGIO should be sent */ uid_t uid, euid; /* uid/euid of process setting the owner */ - void *security; int signum; /* posix.1b rt signal to be delivered on IO */ }; @@ -688,8 +689,9 @@ struct file { struct file_ra_state f_ra; unsigned long f_version; +#ifdef CONFIG_SECURITY void *f_security; - +#endif /* needed for tty driver, and maybe others */ void *private_data; @@ -877,7 +879,9 @@ struct super_block { int s_syncing; int s_need_sync_fs; atomic_t s_active; +#ifdef CONFIG_SECURITY void *s_security; +#endif struct xattr_handler **s_xattr; struct list_head s_inodes; /* all inodes */ diff --git a/include/linux/security.h b/include/linux/security.h index 9f56fb8a4a6c..9b5fea81f55e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1595,6 +1595,7 @@ static inline void security_sb_post_pivotroot (struct nameidata *old_nd, static inline int security_inode_alloc (struct inode *inode) { + inode->i_security = NULL; return security_ops->inode_alloc_security (inode); } -- cgit v1.2.3-71-gd317 From ae78bf9c4f5fde3c67e2829505f195d7347ce3e4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 29 Sep 2006 02:00:03 -0700 Subject: [PATCH] add -o flush for fat Fat is commonly used on removable media. Mounting with -o flush tells the FS to write things to disk as quickly as possible. It is like -o sync, but much faster (and not as safe). Signed-off-by: Chris Mason Cc: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/file.c | 13 +++++++++++ fs/fat/inode.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++-- fs/msdos/namei.c | 11 ++++++++- include/linux/msdos_fs.h | 3 +++ 4 files changed, 83 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/fat/file.c b/fs/fat/file.c index 1ee25232e6af..d50fc47169c1 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -13,6 +13,7 @@ #include #include #include +#include int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, } } +static int fat_file_release(struct inode *inode, struct file *filp) +{ + if ((filp->f_mode & FMODE_WRITE) && + MSDOS_SB(inode->i_sb)->options.flush) { + fat_flush_inodes(inode->i_sb, inode, NULL); + blk_congestion_wait(WRITE, HZ/10); + } + return 0; +} + const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, + .release = fat_file_release, .ioctl = fat_generic_ioctl, .fsync = file_fsync, .sendfile = generic_file_sendfile, @@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode) lock_kernel(); fat_free(inode, nr_clusters); unlock_kernel(); + fat_flush_inodes(inode->i_sb, inode, NULL); } struct inode_operations fat_file_inode_operations = { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ab96ae823753..045738032a83 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -853,7 +854,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_err, + Opt_obsolate, Opt_flush, Opt_err, }; static match_table_t fat_tokens = { @@ -885,7 +886,8 @@ static match_table_t fat_tokens = { {Opt_obsolate, "cvf_format=%20s"}, {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, - {Opt_err, NULL} + {Opt_flush, "flush"}, + {Opt_err, NULL}, }; static match_table_t msdos_tokens = { {Opt_nodots, "nodots"}, @@ -1026,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, return 0; opts->codepage = option; break; + case Opt_flush: + opts->flush = 1; + break; /* msdos specific */ case Opt_dots: @@ -1425,6 +1430,56 @@ out_fail: EXPORT_SYMBOL_GPL(fat_fill_super); +/* + * helper function for fat_flush_inodes. This writes both the inode + * and the file data blocks, waiting for in flight data blocks before + * the start of the call. It does not wait for any io started + * during the call + */ +static int writeback_inode(struct inode *inode) +{ + + int ret; + struct address_space *mapping = inode->i_mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = 0, + }; + /* if we used WB_SYNC_ALL, sync_inode waits for the io for the + * inode to finish. So WB_SYNC_NONE is sent down to sync_inode + * and filemap_fdatawrite is used for the data blocks + */ + ret = sync_inode(inode, &wbc); + if (!ret) + ret = filemap_fdatawrite(mapping); + return ret; +} + +/* + * write data and metadata corresponding to i1 and i2. The io is + * started but we do not wait for any of it to finish. + * + * filemap_flush is used for the block device, so if there is a dirty + * page for a block already in flight, we will not wait and start the + * io over again + */ +int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) +{ + int ret = 0; + if (!MSDOS_SB(sb)->options.flush) + return 0; + if (i1) + ret = writeback_inode(i1); + if (!ret && i2) + ret = writeback_inode(i2); + if (!ret && sb) { + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + ret = filemap_flush(mapping); + } + return ret; +} +EXPORT_SYMBOL_GPL(fat_flush_inodes); + static int __init init_fat_fs(void) { int err; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 9e44158a7540..d220165d4918 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -280,7 +280,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct super_block *sb = dir->i_sb; - struct inode *inode; + struct inode *inode = NULL; struct fat_slot_info sinfo; struct timespec ts; unsigned char msdos_name[MSDOS_NAME]; @@ -316,6 +316,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, d_instantiate(dentry, inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(sb, dir, inode); return err; } @@ -348,6 +350,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) fat_detach(inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(inode->i_sb, dir, inode); return err; } @@ -401,6 +405,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); unlock_kernel(); + fat_flush_inodes(sb, dir, inode); return 0; out_free: @@ -430,6 +435,8 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) fat_detach(inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(inode->i_sb, dir, inode); return err; } @@ -635,6 +642,8 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir, new_msdos_name, new_dentry, is_hid); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); return err; } diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index bae62d62dc3e..ce6c85815cbd 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -204,6 +204,7 @@ struct fat_mount_options { unicode_xlate:1, /* create escape sequences for unhandled Unicode */ numtail:1, /* Does first alias have a numeric '~1' type tail? */ atari:1, /* Use Atari GEMDOS variation of MS-DOS fs */ + flush:1, /* write things quickly */ nocase:1; /* Does this need case conversion? 0=need case conversion*/ }; @@ -412,6 +413,8 @@ extern int fat_sync_inode(struct inode *inode); extern int fat_fill_super(struct super_block *sb, void *data, int silent, struct inode_operations *fs_dir_inode_ops, int isvfat); +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); /* fat/misc.c */ extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); extern void fat_clusters_flush(struct super_block *sb); -- cgit v1.2.3-71-gd317 From ca9bda00b4aafc42cd3d1b9d32934463e2993b4c Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 29 Sep 2006 02:00:03 -0700 Subject: [PATCH] tty locking on resize The current kernel serializes console resizes but does not serialize the resize against the tty structure updates. This means that while two parallel resizes cannot mess up the console you can get incorrect results reported. Secondly while doing this I added vc_lock_resize() to lock and resize the console. This leaves all knowledge of the console_sem in the vt/console driver and kicks it out of the tty layer, which is good Thirdly while doing this I decided I couldn't stand "disallocate" any longer so I switched it to "deallocate". Signed-off-by: Alan Cox Cc: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/selection.c | 2 +- drivers/char/tty_io.c | 29 +++++++++++++++-------------- drivers/char/vt.c | 12 +++++++++++- drivers/char/vt_ioctl.c | 17 +++++++---------- include/linux/vt_kern.h | 3 ++- 5 files changed, 36 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/selection.c b/drivers/char/selection.c index 71093a9fc462..74cff839c857 100644 --- a/drivers/char/selection.c +++ b/drivers/char/selection.c @@ -33,7 +33,7 @@ extern void poke_blanked_console(void); /* Variables for selection control. */ /* Use a dynamic buffer, instead of static (Dec 1994) */ -struct vc_data *sel_cons; /* must not be disallocated */ +struct vc_data *sel_cons; /* must not be deallocated */ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index eb881cfa53e0..2a1e95b0f282 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2770,12 +2770,11 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg) * actually has driver level meaning and triggers a VC resize. * * Locking: - * The console_sem is used to ensure we do not try and resize - * the console twice at once. - * FIXME: Two racing size sets may leave the console and kernel - * parameters disagreeing. Is this exploitable ? - * FIXME: Random values racing a window size get is wrong - * should lock here against that + * Called function use the console_sem is used to ensure we do + * not try and resize the console twice at once. + * The tty->termios_sem is used to ensure we don't double + * resize and get confused. Lock order - tty->termios.sem before + * console sem */ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, @@ -2785,17 +2784,17 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) return -EFAULT; + + down(&tty->termios_sem); if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg))) - return 0; + goto done; + #ifdef CONFIG_VT if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) { - int rc; - - acquire_console_sem(); - rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row); - release_console_sem(); - if (rc) - return -ENXIO; + if (vc_lock_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row)) { + up(&tty->termios_sem); + return -ENXIO; + } } #endif if (tty->pgrp > 0) @@ -2804,6 +2803,8 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, kill_pg(real_tty->pgrp, SIGWINCH, 1); tty->winsize = tmp_ws; real_tty->winsize = tmp_ws; +done: + up(&tty->termios_sem); return 0; } diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 0fca83ededff..b49f03375439 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -885,8 +885,17 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines) return err; } +int vc_lock_resize(struct vc_data *vc, unsigned int cols, unsigned int lines) +{ + int rc; + + acquire_console_sem(); + rc = vc_resize(vc, cols, lines); + release_console_sem(); + return rc; +} -void vc_disallocate(unsigned int currcons) +void vc_deallocate(unsigned int currcons) { WARN_CONSOLE_UNLOCKED(); @@ -3790,6 +3799,7 @@ EXPORT_SYMBOL(default_blu); EXPORT_SYMBOL(update_region); EXPORT_SYMBOL(redraw_screen); EXPORT_SYMBOL(vc_resize); +EXPORT_SYMBOL(vc_lock_resize); EXPORT_SYMBOL(fg_console); EXPORT_SYMBOL(console_blank_hook); EXPORT_SYMBOL(console_blanked); diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index a5628a8b6620..a53e382cc107 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -96,7 +96,7 @@ do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, struct kbd_str if (!perm) return -EPERM; if (!i && v == K_NOSUCHMAP) { - /* disallocate map */ + /* deallocate map */ key_map = key_maps[s]; if (s && key_map) { key_maps[s] = NULL; @@ -819,20 +819,20 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, if (arg > MAX_NR_CONSOLES) return -ENXIO; if (arg == 0) { - /* disallocate all unused consoles, but leave 0 */ + /* deallocate all unused consoles, but leave 0 */ acquire_console_sem(); for (i=1; iv_rows) || get_user(cc, &vtsizes->v_cols)) return -EFAULT; - for (i = 0; i < MAX_NR_CONSOLES; i++) { - acquire_console_sem(); - vc_resize(vc_cons[i].d, cc, ll); - release_console_sem(); - } + for (i = 0; i < MAX_NR_CONSOLES; i++) + vc_lock_resize(vc_cons[i].d, cc, ll); return 0; } diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 918a29763aea..1009d3fe1fc2 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -33,7 +33,8 @@ extern int fg_console, last_console, want_console; int vc_allocate(unsigned int console); int vc_cons_allocated(unsigned int console); int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines); -void vc_disallocate(unsigned int console); +int vc_lock_resize(struct vc_data *vc, unsigned int cols, unsigned int lines); +void vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); void do_unblank_screen(int leaving_gfx); -- cgit v1.2.3-71-gd317 From 3b9b8ab65d8eed784b9164d03807cb2bda7b5cd6 Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Fri, 29 Sep 2006 02:00:05 -0700 Subject: [PATCH] Fix unserialized task->files changing Fixed race on put_files_struct on exec with proc. Restoring files on current on error path may lead to proc having a pointer to already kfree-d files_struct. ->files changing at exit.c and khtread.c are safe as exit_files() makes all things under lock. Found during OpenVZ stress testing. [akpm@osdl.org: add export] Signed-off-by: Pavel Emelianov Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 6 ++---- fs/binfmt_misc.c | 6 ++---- fs/exec.c | 3 +-- include/linux/file.h | 1 + kernel/exit.c | 12 ++++++++++++ 5 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index dfd8cfb7fb5d..bb43da5cde5c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1038,10 +1038,8 @@ out_free_interp: out_free_file: sys_close(elf_exec_fileno); out_free_fh: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); out_free_ph: kfree(elf_phdata); goto out; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 66ba137f8661..1713c48fef54 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -215,10 +215,8 @@ _error: bprm->interp_flags = 0; bprm->interp_data = 0; _unshare: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); goto _ret; } diff --git a/fs/exec.c b/fs/exec.c index 97df6e0aeaee..a8efe35176b0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm) return 0; mmap_failed: - put_files_struct(current->files); - current->files = files; + reset_files_struct(current, files); out: return retval; } diff --git a/include/linux/file.h b/include/linux/file.h index 9f7c2513866f..74183e6f7f45 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -112,5 +112,6 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void FASTCALL(put_files_struct(struct files_struct *fs)); +void reset_files_struct(struct task_struct *, struct files_struct *); #endif /* __LINUX_FILE_H */ diff --git a/kernel/exit.c b/kernel/exit.c index d891883420f7..4b6fb054b25d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -487,6 +487,18 @@ void fastcall put_files_struct(struct files_struct *files) EXPORT_SYMBOL(put_files_struct); +void reset_files_struct(struct task_struct *tsk, struct files_struct *files) +{ + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} +EXPORT_SYMBOL(reset_files_struct); + static inline void __exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; -- cgit v1.2.3-71-gd317 From f400e198b2ed26ce55b22a1412ded0896e7516ac Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 29 Sep 2006 02:00:07 -0700 Subject: [PATCH] pidspace: is_init() This is an updated version of Eric Biederman's is_init() patch. (http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and replaces a few more instances of ->pid == 1 with is_init(). Further, is_init() checks pid and thus removes dependency on Eric's other patches for now. Eric's original description: There are a lot of places in the kernel where we test for init because we give it special properties. Most significantly init must not die. This results in code all over the kernel test ->pid == 1. Introduce is_init to capture this case. With multiple pid spaces for all of the cases affected we are looking for only the first process on the system, not some other process that has pid == 1. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Dave Hansen Cc: Serge Hallyn Cc: Cedric Le Goater Cc: Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/fault.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm26/mm/fault.c | 2 +- arch/i386/lib/usercopy.c | 2 +- arch/i386/mm/fault.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m32r/mm/fault.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/mips/mm/fault.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- arch/ppc/kernel/traps.c | 2 +- arch/ppc/mm/fault.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/sh/mm/fault.c | 2 +- arch/sh64/mm/fault.c | 6 +++--- arch/um/kernel/trap.c | 2 +- arch/x86_64/mm/fault.c | 4 ++-- arch/xtensa/mm/fault.c | 2 +- drivers/char/sysrq.c | 2 +- include/linux/sched.h | 10 ++++++++++ kernel/capability.c | 2 +- kernel/cpuset.c | 2 +- kernel/exit.c | 2 +- kernel/kexec.c | 2 +- kernel/ptrace.c | 1 + kernel/sysctl.c | 2 +- mm/oom_kill.c | 2 +- security/commoncap.c | 2 +- 29 files changed, 41 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 622dabd84680..8871529a34e2 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -193,7 +193,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* We ran out of memory, or some other thing happened to us that made us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a5b33ff3924e..5e658a874498 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -198,7 +198,7 @@ survive: return fault; } - if (tsk->pid != 1) + if (!is_init(tsk)) goto out; /* diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c index a7c4cc922095..a1f6d8a9cc32 100644 --- a/arch/arm26/mm/fault.c +++ b/arch/arm26/mm/fault.c @@ -185,7 +185,7 @@ survive: } fault = -3; /* out of memory */ - if (tsk->pid != 1) + if (!is_init(tsk)) goto out; /* diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index efc7e7d5f4d0..08502fc6d0cb 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -739,7 +739,7 @@ survive: retval = get_user_pages(current, current->mm, (unsigned long )to, 1, 1, 0, &pg, NULL); - if (retval == -ENOMEM && current->pid == 1) { + if (retval == -ENOMEM && is_init(current)) { up_read(¤t->mm->mmap_sem); blk_congestion_wait(WRITE, HZ/50); goto survive; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 50d8617391dd..2581575786c1 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -589,7 +589,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index d8b1b4ac7f26..59f3ab937615 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -280,7 +280,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index dc18a33eefef..8d5f551b5754 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -299,7 +299,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 5e2d87c10c87..911f2ce3f53e 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -181,7 +181,7 @@ good_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index a4f8c45c4e8e..8423d8590779 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -171,7 +171,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 77953f41d754..e8fa50624b70 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -386,7 +386,7 @@ bad_area_nosemaphore: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 903115d67fdc..311ed1993fc0 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -337,7 +337,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) err->disposition == RTAS_DISP_NOT_RECOVERED && err->target == RTAS_TARGET_MEMORY && err->type == RTAS_TYPE_ECC_UNCORR && - !(current->pid == 0 || current->pid == 1)) { + !(current->pid == 0 || is_init(current))) { /* Kill off a user process with an ECC error */ printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", current->pid); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index d7a433049b48..aafc8e8893d1 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -119,7 +119,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) * generate the same exception over and over again and we get * nowhere. Better to kill it and let the kernel panic. */ - if (current->pid == 1) { + if (is_init(current)) { __sighandler_t handler; spin_lock_irq(¤t->sighand->siglock); diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c index bc776beb3136..465f451f3bc3 100644 --- a/arch/ppc/mm/fault.c +++ b/arch/ppc/mm/fault.c @@ -291,7 +291,7 @@ bad_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index f2b9a84dc2bf..9c3c19fe62fc 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -353,7 +353,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 507f28914706..68663b8f99ae 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -149,7 +149,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c index f08d0eaf6497..8e2f6c28b739 100644 --- a/arch/sh64/mm/fault.c +++ b/arch/sh64/mm/fault.c @@ -277,7 +277,7 @@ bad_area: show_regs(regs); #endif } - if (tsk->pid == 1) { + if (is_init(tsk)) { panic("INIT had user mode bad_area\n"); } tsk->thread.address = address; @@ -319,14 +319,14 @@ no_context: * us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { panic("INIT out of memory\n"); yield(); goto survive; } printk("fault:Out of memory\n"); up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 61a23fff4395..c7b195c7e51f 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -120,7 +120,7 @@ out_nosemaphore: * us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { up_read(&mm->mmap_sem); yield(); down_read(&mm->mmap_sem); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 9ba54cc2b5f6..3751b4788e28 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -244,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) int unhandled_signal(struct task_struct *tsk, int sig) { - if (tsk->pid == 1) + if (is_init(tsk)) return 1; if (tsk->ptrace & PT_PTRACED) return 0; @@ -580,7 +580,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); goto again; } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index a945a33e85a1..dd0dbec2e57e 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -144,7 +144,7 @@ bad_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index ee3ca8f1768e..0ad6cb081db4 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -208,7 +208,7 @@ static void send_sig_all(int sig) struct task_struct *p; for_each_process(p) { - if (p->mm && p->pid != 1) + if (p->mm && !is_init(p)) /* Not swapper, init nor kernel thread */ force_sig(sig, p); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 3696f2f7126d..ed2af8671589 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1033,6 +1033,16 @@ static inline int pid_alive(struct task_struct *p) return p->pids[PIDTYPE_PID].pid != NULL; } +/** + * is_init - check if a task structure is the first user space + * task the kernel created. + * @p: Task structure to be checked. + */ +static inline int is_init(struct task_struct *tsk) +{ + return tsk->pid == 1; +} + extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) diff --git a/kernel/capability.c b/kernel/capability.c index c7685ad00a97..edb845a6e84a 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -133,7 +133,7 @@ static inline int cap_set_all(kernel_cap_t *effective, int found = 0; do_each_thread(g, target) { - if (target == current || target->pid == 1) + if (target == current || is_init(target)) continue; found = 1; if (security_capset_check(target, effective, inheritable, diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1b32c2c04c15..584bb4e6c042 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -240,7 +240,7 @@ static struct super_block *cpuset_sb; * A cpuset can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cpusets is empty. Since all * tasks in the system use _some_ cpuset, and since there is always at - * least one task in the system (init, pid == 1), therefore, top_cpuset + * least one task in the system (init), therefore, top_cpuset * always has either children cpusets and/or using tasks. So we don't * need a special hack to ensure that top_cpuset cannot be deleted. * diff --git a/kernel/exit.c b/kernel/exit.c index 4b6fb054b25d..9961192d6055 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -219,7 +219,7 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) do_each_task_pid(pgrp, PIDTYPE_PGID, p) { if (p == ignored_task || p->exit_state - || p->real_parent->pid == 1) + || is_init(p->real_parent)) continue; if (process_group(p->real_parent) != pgrp && p->real_parent->signal->session == p->signal->session) { diff --git a/kernel/kexec.c b/kernel/kexec.c index 50087ecf337e..37cad75cf494 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -40,7 +40,7 @@ struct resource crashk_res = { int kexec_should_crash(struct task_struct *p) { - if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) + if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops) return 1; return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8aad0331d82e..4d50e06fd745 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -440,6 +440,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) child = find_task_by_pid(pid); if (child) get_task_struct(child); + read_unlock(&tasklist_lock); if (!child) return ERR_PTR(-ESRCH); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bfa7d117c54..9535a3839930 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1915,7 +1915,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, return -EPERM; } - op = (current->pid == 1) ? OP_SET : OP_AND; + op = is_init(current) ? OP_SET : OP_AND; return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_bset_conv,&op); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index bada3d03119f..f3dd79c1c367 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -255,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) */ static void __oom_kill_task(struct task_struct *p, const char *message) { - if (p->pid == 1) { + if (is_init(p)) { WARN_ON(1); printk(KERN_WARNING "tried to kill init!\n"); return; diff --git a/security/commoncap.c b/security/commoncap.c index f50fc298cf80..5a5ef5ca7ea9 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -169,7 +169,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ - if (current->pid != 1) { + if (!is_init(current)) { current->cap_permitted = new_permitted; current->cap_effective = cap_intersect (new_permitted, bprm->cap_effective); -- cgit v1.2.3-71-gd317 From 3ca212b813299899d2968aa0a24a797c3746f5ec Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 29 Sep 2006 02:00:14 -0700 Subject: [PATCH] Remove another config.h After the asm/ uses of #include this one is the next biggest source of noise. Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 176c7f797339..c89df55f6e03 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -3,7 +3,6 @@ #include #include -#include #include #include -- cgit v1.2.3-71-gd317 From af410fc13d95f079910fc3dca7496590c3275967 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Sep 2006 02:00:14 -0700 Subject: [PATCH] make leds.h include relevant headers Make it possible to include linux/leds.h without first including list.h and spinlock.h. Signed-off-by: Johannes Berg Acked-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/leds.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index dc23c7c639f3..88afceffb7cb 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -12,6 +12,9 @@ #ifndef __LINUX_LEDS_H_INCLUDED #define __LINUX_LEDS_H_INCLUDED +#include +#include + struct device; struct class_device; /* -- cgit v1.2.3-71-gd317 From 1711ef3866b0360e102327389fe4b76c849bbe83 Mon Sep 17 00:00:00 2001 From: Toyo Abe Date: Fri, 29 Sep 2006 02:00:28 -0700 Subject: [PATCH] posix-timers: Fix clock_nanosleep() doesn't return the remaining time in compatibility mode The clock_nanosleep() function does not return the time remaining when the sleep is interrupted by a signal. This patch creates a new call out, compat_clock_nanosleep_restart(), which handles returning the remaining time after a sleep is interrupted. This patch revives clock_nanosleep_restart(). It is now accessed via the new call out. The compat_clock_nanosleep_restart() is used for compatibility access. Since this is implemented in compatibility mode the normal path is virtually unaffected - no real performance impact. Signed-off-by: Toyo Abe Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 1 + include/linux/posix-timers.h | 4 ++++ kernel/compat.c | 33 +++++++++++++++++++++++++++++++++ kernel/hrtimer.c | 20 ++++++++++---------- kernel/posix-cpu-timers.c | 17 ++++++++++++----- kernel/posix-timers.c | 21 +++++++++++++++++++++ 6 files changed, 81 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4fc379de6c2f..fca93025ab51 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -138,6 +138,7 @@ extern long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid); +extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 95572c434bc9..a7dd38f30ade 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -72,6 +72,7 @@ struct k_clock { int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, struct timespec *, struct timespec __user *); + long (*nsleep_restart) (struct restart_block *restart_block); int (*timer_set) (struct k_itimer * timr, int flags, struct itimerspec * new_setting, struct itimerspec * old_setting); @@ -97,6 +98,7 @@ int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts); int posix_cpu_timer_create(struct k_itimer *timer); int posix_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp, struct timespec __user *rmtp); +long posix_cpu_nsleep_restart(struct restart_block *restart_block); int posix_cpu_timer_set(struct k_itimer *timer, int flags, struct itimerspec *new, struct itimerspec *old); int posix_cpu_timer_del(struct k_itimer *timer); @@ -111,4 +113,6 @@ void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); +long clock_nanosleep_restart(struct restart_block *restart_block); + #endif diff --git a/kernel/compat.c b/kernel/compat.c index 126dee9530aa..75573e5d27b0 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -601,6 +602,30 @@ long compat_sys_clock_getres(clockid_t which_clock, return err; } +static long compat_clock_nanosleep_restart(struct restart_block *restart) +{ + long err; + mm_segment_t oldfs; + struct timespec tu; + struct compat_timespec *rmtp = (struct compat_timespec *)(restart->arg1); + + restart->arg1 = (unsigned long) &tu; + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = clock_nanosleep_restart(restart); + set_fs(oldfs); + + if ((err == -ERESTART_RESTARTBLOCK) && rmtp && + put_compat_timespec(&tu, rmtp)) + return -EFAULT; + + if (err == -ERESTART_RESTARTBLOCK) { + restart->fn = compat_clock_nanosleep_restart; + restart->arg1 = (unsigned long) rmtp; + } + return err; +} + long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp) @@ -608,6 +633,7 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, long err; mm_segment_t oldfs; struct timespec in, out; + struct restart_block *restart; if (get_compat_timespec(&in, rqtp)) return -EFAULT; @@ -618,9 +644,16 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, (struct timespec __user *) &in, (struct timespec __user *) &out); set_fs(oldfs); + if ((err == -ERESTART_RESTARTBLOCK) && rmtp && put_compat_timespec(&out, rmtp)) return -EFAULT; + + if (err == -ERESTART_RESTARTBLOCK) { + restart = ¤t_thread_info()->restart_block; + restart->fn = compat_clock_nanosleep_restart; + restart->arg1 = (unsigned long) rmtp; + } return err; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 21c38a7e666b..d0ba190dfeb6 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -693,7 +693,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod return t->task == NULL; } -static long __sched nanosleep_restart(struct restart_block *restart) +long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; struct timespec __user *rmtp; @@ -702,13 +702,13 @@ static long __sched nanosleep_restart(struct restart_block *restart) restart->fn = do_no_restart_syscall; - hrtimer_init(&t.timer, restart->arg3, HRTIMER_ABS); - t.timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; + hrtimer_init(&t.timer, restart->arg0, HRTIMER_ABS); + t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; if (do_nanosleep(&t, HRTIMER_ABS)) return 0; - rmtp = (struct timespec __user *) restart->arg2; + rmtp = (struct timespec __user *) restart->arg1; if (rmtp) { time = ktime_sub(t.timer.expires, t.timer.base->get_time()); if (time.tv64 <= 0) @@ -718,7 +718,7 @@ static long __sched nanosleep_restart(struct restart_block *restart) return -EFAULT; } - restart->fn = nanosleep_restart; + restart->fn = hrtimer_nanosleep_restart; /* The other values in restart are already filled in */ return -ERESTART_RESTARTBLOCK; @@ -751,11 +751,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, } restart = ¤t_thread_info()->restart_block; - restart->fn = nanosleep_restart; - restart->arg0 = t.timer.expires.tv64 & 0xFFFFFFFF; - restart->arg1 = t.timer.expires.tv64 >> 32; - restart->arg2 = (unsigned long) rmtp; - restart->arg3 = (unsigned long) t.timer.base->index; + restart->fn = hrtimer_nanosleep_restart; + restart->arg0 = (unsigned long) t.timer.base->index; + restart->arg1 = (unsigned long) rmtp; + restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF; + restart->arg3 = t.timer.expires.tv64 >> 32; return -ERESTART_RESTARTBLOCK; } diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d38d9ec3276c..5667fef89dd1 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1393,8 +1393,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } } -static long posix_cpu_clock_nanosleep_restart(struct restart_block *); - int posix_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp, struct timespec __user *rmtp) { @@ -1471,7 +1469,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; - restart_block->fn = posix_cpu_clock_nanosleep_restart; + restart_block->fn = posix_cpu_nsleep_restart; /* Caller already set restart_block->arg1 */ restart_block->arg0 = which_clock; restart_block->arg1 = (unsigned long) rmtp; @@ -1484,8 +1482,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, return error; } -static long -posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block) +long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->arg0; struct timespec __user *rmtp; @@ -1524,6 +1521,10 @@ static int process_cpu_nsleep(const clockid_t which_clock, int flags, { return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); } +static long process_cpu_nsleep_restart(struct restart_block *restart_block) +{ + return -EINVAL; +} static int thread_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { @@ -1544,6 +1545,10 @@ static int thread_cpu_nsleep(const clockid_t which_clock, int flags, { return -EINVAL; } +static long thread_cpu_nsleep_restart(struct restart_block *restart_block) +{ + return -EINVAL; +} static __init int init_posix_cpu_timers(void) { @@ -1553,6 +1558,7 @@ static __init int init_posix_cpu_timers(void) .clock_set = do_posix_clock_nosettime, .timer_create = process_cpu_timer_create, .nsleep = process_cpu_nsleep, + .nsleep_restart = process_cpu_nsleep_restart, }; struct k_clock thread = { .clock_getres = thread_cpu_clock_getres, @@ -1560,6 +1566,7 @@ static __init int init_posix_cpu_timers(void) .clock_set = do_posix_clock_nosettime, .timer_create = thread_cpu_timer_create, .nsleep = thread_cpu_nsleep, + .nsleep_restart = thread_cpu_nsleep_restart, }; register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ac6dc8744429..e5ebcc1ec3a0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -973,3 +973,24 @@ sys_clock_nanosleep(const clockid_t which_clock, int flags, return CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t, rmtp)); } + +/* + * nanosleep_restart for monotonic and realtime clocks + */ +static int common_nsleep_restart(struct restart_block *restart_block) +{ + return hrtimer_nanosleep_restart(restart_block); +} + +/* + * This will restart clock_nanosleep. This is required only by + * compat_clock_nanosleep_restart for now. + */ +long +clock_nanosleep_restart(struct restart_block *restart_block) +{ + clockid_t which_clock = restart_block->arg0; + + return CLOCK_DISPATCH(which_clock, nsleep_restart, + (restart_block)); +} -- cgit v1.2.3-71-gd317 From 3171a0305d62e6627a24bff35af4f997e4988a80 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 29 Sep 2006 02:00:32 -0700 Subject: [PATCH] simplify update_times (avoid jiffies/jiffies_64 aliasing problem) Pass ticks to do_timer() and update_times(), and adjust x86_64 and s390 timer interrupt handler with this change. Currently update_times() calculates ticks by "jiffies - wall_jiffies", but callers of do_timer() should know how many ticks to update. Passing ticks get rid of this redundant calculation. Also there are another redundancy pointed out by Martin Schwidefsky. This cleanup make a barrier added by 5aee405c662ca644980c184774277fc6d0769a84 needless. So this patch removes it. As a bonus, this cleanup make wall_jiffies can be removed easily, since now wall_jiffies is always synced with jiffies. (This patch does not really remove wall_jiffies. It would be another cleanup patch) Signed-off-by: Atsushi Nemoto Cc: Martin Schwidefsky Cc: "Eric W. Biederman" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: john stultz Cc: Andi Kleen Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Richard Henderson Cc: Ivan Kokshaysky Acked-by: Russell King Cc: Ian Molton Cc: Mikael Starvik Acked-by: David Howells Cc: Yoshinori Sato Cc: Hirokazu Takata Acked-by: Ralf Baechle Cc: Kyle McMartin Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Chris Zankel Acked-by: "Luck, Tony" Cc: Geert Uytterhoeven Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/time.c | 2 +- arch/arm/kernel/time.c | 2 +- arch/arm26/kernel/time.c | 2 +- arch/avr32/kernel/time.c | 2 +- arch/cris/arch-v10/kernel/time.c | 2 +- arch/cris/arch-v32/kernel/time.c | 2 +- arch/frv/kernel/time.c | 2 +- arch/h8300/kernel/time.c | 2 +- arch/ia64/kernel/time.c | 2 +- arch/m32r/kernel/time.c | 2 +- arch/m68k/kernel/time.c | 2 +- arch/m68k/sun3/sun3ints.c | 2 +- arch/m68knommu/kernel/time.c | 2 +- arch/mips/au1000/common/time.c | 6 +++--- arch/mips/gt64120/common/time.c | 2 +- arch/mips/kernel/time.c | 2 +- arch/mips/momentum/ocelot_g/gt-irq.c | 2 +- arch/mips/sgi-ip27/ip27-timer.c | 2 +- arch/parisc/kernel/time.c | 2 +- arch/powerpc/kernel/time.c | 2 +- arch/ppc/kernel/time.c | 2 +- arch/s390/kernel/time.c | 9 ++++----- arch/sh/kernel/time.c | 2 +- arch/sh64/kernel/time.c | 2 +- arch/sparc/kernel/pcic.c | 2 +- arch/sparc/kernel/time.c | 2 +- arch/sparc64/kernel/time.c | 4 ++-- arch/um/kernel/time.c | 2 +- arch/v850/kernel/time.c | 2 +- arch/x86_64/kernel/time.c | 8 ++++---- arch/xtensa/kernel/time.c | 2 +- include/asm-arm/arch-clps711x/time.h | 2 +- include/asm-arm/arch-l7200/time.h | 2 +- include/asm-i386/mach-default/do_timer.h | 2 +- include/asm-i386/mach-visws/do_timer.h | 2 +- include/asm-i386/mach-voyager/do_timer.h | 2 +- include/linux/sched.h | 2 +- kernel/timer.c | 19 ++++++------------- 38 files changed, 52 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index b191cc759737..7c1e44420a78 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -132,7 +132,7 @@ irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs) nticks = delta >> FIX_SHIFT; while (nticks > 0) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index d4dceb5f06e9..f7d5165796ef 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -337,7 +337,7 @@ void timer_tick(struct pt_regs *regs) profile_tick(CPU_PROFILING, regs); do_leds(); do_set_rtc(); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c index db63d75d0715..80adbd005fc5 100644 --- a/arch/arm26/kernel/time.c +++ b/arch/arm26/kernel/time.c @@ -194,7 +194,7 @@ EXPORT_SYMBOL(do_settimeofday); static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index b0e6b5855a38..3e56b9f4358a 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -148,7 +148,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * Call the generic timer interrupt handler */ write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); write_sequnlock(&xtime_lock); /* diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 9c22b76e129a..ebacf1457d91 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -227,7 +227,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) /* call the real timer interrupt handler */ - do_timer(regs); + do_timer(1); cris_do_profile(regs); /* Save profiling information */ diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c index 50f3f93293d6..be0a01657d4f 100644 --- a/arch/cris/arch-v32/kernel/time.c +++ b/arch/cris/arch-v32/kernel/time.c @@ -219,7 +219,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; /* call the real timer interrupt handler */ - do_timer(regs); + do_timer(1); /* * If we have an externally synchronized Linux clock, then update diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index 3d0284bccb94..7e55884135ed 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -70,7 +70,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs) */ write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING, regs); diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c index 688a5100604c..e569d17b4ae6 100644 --- a/arch/h8300/kernel/time.c +++ b/arch/h8300/kernel/time.c @@ -41,7 +41,7 @@ static void timer_interrupt(int irq, void *dummy, struct pt_regs * regs) /* may need to kick the hardware timer */ platform_timer_eoi(); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 6928ef0d64d8..16262687a103 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -78,7 +78,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) * xtime_lock. */ write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); local_cpu_data->itm_next = new_itm; write_sequnlock(&xtime_lock); } else diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c index ded0be07a476..7a896893cd28 100644 --- a/arch/m32r/kernel/time.c +++ b/arch/m32r/kernel/time.c @@ -202,7 +202,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) #ifndef CONFIG_SMP profile_tick(CPU_PROFILING, regs); #endif - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 98e4b1adfa29..1072e4946a4a 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -40,7 +40,7 @@ static inline int set_rtc_mmss(unsigned long nowtime) */ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index f18b9d3ef16d..dc4ea7e074a6 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -65,7 +65,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id, struct pt_regs *fp) #ifdef CONFIG_SUN3 intersil_clear(); #endif - do_timer(fp); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(fp)); #endif diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index 1db987272220..db1e1ce0a349 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -51,7 +51,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs) write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c index 7fbea1bf7b48..0a067f3113a5 100644 --- a/arch/mips/au1000/common/time.c +++ b/arch/mips/au1000/common/time.c @@ -96,7 +96,7 @@ void mips_timer_interrupt(struct pt_regs *regs) timerlo = count; kstat_this_cpu.irqs[irq]++; - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif @@ -137,7 +137,7 @@ irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs) } while (time_elapsed > 0) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif @@ -156,7 +156,7 @@ irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs) if (jiffie_drift >= 999) { jiffie_drift -= 999; - do_timer(regs); /* increment jiffies by one */ + do_timer(1); /* increment jiffies by one */ #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/mips/gt64120/common/time.c b/arch/mips/gt64120/common/time.c index d837b26fbe51..7feca49350d1 100644 --- a/arch/mips/gt64120/common/time.c +++ b/arch/mips/gt64120/common/time.c @@ -34,7 +34,7 @@ static void gt64120_irq(int irq, void *dev_id, struct pt_regs *regs) if (irq_src & 0x00000800) { /* Check for timer interrupt */ handled = 1; irq_src &= ~0x00000800; - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 170cb67f4ede..6ab8d975a974 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -434,7 +434,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) /* * call the generic timer interrupt handling */ - do_timer(regs); + do_timer(1); /* * If we have an externally synchronized Linux clock, then update diff --git a/arch/mips/momentum/ocelot_g/gt-irq.c b/arch/mips/momentum/ocelot_g/gt-irq.c index 9fb2493fff02..6cd87cf0195a 100644 --- a/arch/mips/momentum/ocelot_g/gt-irq.c +++ b/arch/mips/momentum/ocelot_g/gt-irq.c @@ -133,7 +133,7 @@ static irqreturn_t gt64240_p0int_irq(int irq, void *dev, struct pt_regs *regs) MV_WRITE(TIMER_COUNTER_0_3_INTERRUPT_CAUSE, 0x0); /* handle the timer call */ - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index b029ba79c27a..c62a3a9ef867 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -111,7 +111,7 @@ again: kstat_this_cpu.irqs[irq]++; /* kstat only for bootcpu? */ if (cpu == 0) - do_timer(regs); + do_timer(1); update_process_times(user_mode(regs)); diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 5facc9bff4ef..700df10924dd 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -79,7 +79,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) #endif if (cpu == 0) { write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); write_sequnlock(&xtime_lock); } } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7a3c3f791ade..71f71da98e7d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -693,7 +693,7 @@ void timer_interrupt(struct pt_regs * regs) tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy; if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) { tb_last_jiffy = tb_next_jiffy; - do_timer(regs); + do_timer(1); timer_recalc_offset(tb_last_jiffy); timer_check_rtc(); } diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c index 6ab8cc7226ab..1e1f31554767 100644 --- a/arch/ppc/kernel/time.c +++ b/arch/ppc/kernel/time.c @@ -153,7 +153,7 @@ void timer_interrupt(struct pt_regs * regs) /* We are in an interrupt, no need to save/restore flags */ write_seqlock(&xtime_lock); tb_last_stamp = jiffy_stamp; - do_timer(regs); + do_timer(1); /* * update the rtc when needed, this should be performed on the diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 1981c6199fa2..abab42e9f5f8 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(do_settimeofday); void account_ticks(struct pt_regs *regs) { __u64 tmp; - __u32 ticks, xticks; + __u32 ticks; /* Calculate how many ticks have passed. */ if (S390_lowcore.int_clock < S390_lowcore.jiffy_timer) { @@ -204,6 +204,7 @@ void account_ticks(struct pt_regs *regs) */ write_seqlock(&xtime_lock); if (S390_lowcore.jiffy_timer > xtime_cc) { + __u32 xticks; tmp = S390_lowcore.jiffy_timer - xtime_cc; if (tmp >= 2*CLK_TICKS_PER_JIFFY) { xticks = __div(tmp, CLK_TICKS_PER_JIFFY); @@ -212,13 +213,11 @@ void account_ticks(struct pt_regs *regs) xticks = 1; xtime_cc += CLK_TICKS_PER_JIFFY; } - while (xticks--) - do_timer(regs); + do_timer(xticks); } write_sequnlock(&xtime_lock); #else - for (xticks = ticks; xticks > 0; xticks--) - do_timer(regs); + do_timer(ticks); #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index 149d9713eddf..f664a196c4f5 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -117,7 +117,7 @@ static long last_rtc_update; */ void handle_timer_tick(struct pt_regs *regs) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c index b8162e59030e..3b61e06f9d72 100644 --- a/arch/sh64/kernel/time.c +++ b/arch/sh64/kernel/time.c @@ -298,7 +298,7 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs) asm ("getcon cr62, %0" : "=r" (current_ctc)); ctc_last_interrupt = (unsigned long) current_ctc; - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index bfd31aac2df3..e19b1bad9bc5 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -712,7 +712,7 @@ static irqreturn_t pcic_timer_handler (int irq, void *h, struct pt_regs *regs) { write_seqlock(&xtime_lock); /* Dummy, to show that we remember */ pcic_clear_clock_irq(); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index 845081b01267..6f84fa1b58e5 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c @@ -128,7 +128,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) #endif clear_clock_irq(); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index b0b4feeec098..ca1193482f07 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -465,7 +465,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) profile_tick(CPU_PROFILING, regs); update_process_times(user_mode(regs)); #endif - do_timer(regs); + do_timer(1); /* Guarantee that the following sequences execute * uninterrupted. @@ -496,7 +496,7 @@ void timer_tick_interrupt(struct pt_regs *regs) { write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); timer_check_rtc(); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 820affbf3e16..a92965f8f9cd 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -93,7 +93,7 @@ irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs) write_seqlock_irqsave(&xtime_lock, flags); - do_timer(regs); + do_timer(1); nsecs = get_time(); xtime.tv_sec = nsecs / NSEC_PER_SEC; diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c index a0b46695f186..f4d1a4d3cdc2 100644 --- a/arch/v850/kernel/time.c +++ b/arch/v850/kernel/time.c @@ -51,7 +51,7 @@ static irqreturn_t timer_interrupt (int irq, void *dummy, struct pt_regs *regs) if (mach_tick) mach_tick (); - do_timer (regs); + do_timer (1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 1c255ee76e7c..7ea3bf2a858c 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -415,16 +415,16 @@ void main_timer_handler(struct pt_regs *regs) (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; } - if (lost > 0) { + if (lost > 0) handle_lost_ticks(lost, regs); - jiffies += lost; - } + else + lost = 0; /* * Do the timer stuff. */ - do_timer(regs); + do_timer(lost + 1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 412ab32de391..241db201f40e 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -175,7 +175,7 @@ again: last_ccount_stamp = next; next += CCOUNT_PER_JIFFY; - do_timer (regs); /* Linux handler in kernel/timer.c */ + do_timer (1); /* Linux handler in kernel/timer.c */ if (ntp_synced() && xtime.tv_sec - last_rtc_update >= 659 && diff --git a/include/asm-arm/arch-clps711x/time.h b/include/asm-arm/arch-clps711x/time.h index 9cb27cd4e6ae..0e4a3901d3b3 100644 --- a/include/asm-arm/arch-clps711x/time.h +++ b/include/asm-arm/arch-clps711x/time.h @@ -29,7 +29,7 @@ static irqreturn_t p720t_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { do_leds(); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/include/asm-arm/arch-l7200/time.h b/include/asm-arm/arch-l7200/time.h index 7b98b533e63a..c69cb508735f 100644 --- a/include/asm-arm/arch-l7200/time.h +++ b/include/asm-arm/arch-l7200/time.h @@ -45,7 +45,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/include/asm-i386/mach-default/do_timer.h b/include/asm-i386/mach-default/do_timer.h index 6312c3e79814..4182c347ef85 100644 --- a/include/asm-i386/mach-default/do_timer.h +++ b/include/asm-i386/mach-default/do_timer.h @@ -16,7 +16,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode_vm(regs)); #endif diff --git a/include/asm-i386/mach-visws/do_timer.h b/include/asm-i386/mach-visws/do_timer.h index 95568e6ca91c..8db618c5a72b 100644 --- a/include/asm-i386/mach-visws/do_timer.h +++ b/include/asm-i386/mach-visws/do_timer.h @@ -9,7 +9,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs) /* Clear the interrupt */ co_cpu_write(CO_CPU_STAT,co_cpu_read(CO_CPU_STAT) & ~CO_STAT_TIMEINTR); - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode_vm(regs)); #endif diff --git a/include/asm-i386/mach-voyager/do_timer.h b/include/asm-i386/mach-voyager/do_timer.h index eaf518098981..099fe9f5c1b2 100644 --- a/include/asm-i386/mach-voyager/do_timer.h +++ b/include/asm-i386/mach-voyager/do_timer.h @@ -3,7 +3,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs) { - do_timer(regs); + do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode_vm(regs)); #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index ed2af8671589..503dea61ff99 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1206,7 +1206,7 @@ extern void switch_uid(struct user_struct *); #include -extern void do_timer(struct pt_regs *); +extern void do_timer(unsigned long ticks); extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state)); extern int FASTCALL(wake_up_process(struct task_struct * tsk)); diff --git a/kernel/timer.c b/kernel/timer.c index a2cb1ecb1b28..4f55622b0d38 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1222,10 +1222,8 @@ static inline void calc_load(unsigned long ticks) unsigned long active_tasks; /* fixed-point */ static int count = LOAD_FREQ; - count -= ticks; - if (count < 0) { - count += LOAD_FREQ; - active_tasks = count_active_tasks(); + active_tasks = count_active_tasks(); + for (count -= ticks; count < 0; count += LOAD_FREQ) { CALC_LOAD(avenrun[0], EXP_1, active_tasks); CALC_LOAD(avenrun[1], EXP_5, active_tasks); CALC_LOAD(avenrun[2], EXP_15, active_tasks); @@ -1270,11 +1268,8 @@ void run_local_timers(void) * Called by the timer interrupt. xtime_lock must already be taken * by the timer IRQ! */ -static inline void update_times(void) +static inline void update_times(unsigned long ticks) { - unsigned long ticks; - - ticks = jiffies - wall_jiffies; wall_jiffies += ticks; update_wall_time(); calc_load(ticks); @@ -1286,12 +1281,10 @@ static inline void update_times(void) * jiffies is defined in the linker script... */ -void do_timer(struct pt_regs *regs) +void do_timer(unsigned long ticks) { - jiffies_64++; - /* prevent loading jiffies before storing new jiffies_64 value. */ - barrier(); - update_times(); + jiffies_64 += ticks; + update_times(ticks); } #ifdef __ARCH_WANT_SYS_ALARM -- cgit v1.2.3-71-gd317 From 5785c95baede8459d70c4aa0f7becb6e8b5fde4b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 29 Sep 2006 02:00:43 -0700 Subject: [PATCH] tty: make termios_sem a mutex [akpm@osdl.org: fix] Cc: Alan Cox Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 23 ++++++++++++----------- drivers/char/tty_ioctl.c | 17 +++++++++-------- include/linux/tty.h | 2 +- 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index b4f37c65b95c..142427c6e8f3 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -618,9 +618,9 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags); static void tty_set_termios_ldisc(struct tty_struct *tty, int num) { - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); tty->termios->c_line = num; - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); } /* @@ -1338,9 +1338,9 @@ static void do_tty_hangup(void *data) */ if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); *tty->termios = tty->driver->init_termios; - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); } /* Defer ldisc switch */ @@ -2750,9 +2750,9 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg) { int err; - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); err = copy_to_user(arg, &tty->winsize, sizeof(*arg)); - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); return err ? -EFAULT: 0; } @@ -2782,14 +2782,15 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) return -EFAULT; - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg))) goto done; #ifdef CONFIG_VT if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) { - if (vc_lock_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row)) { - up(&tty->termios_sem); + if (vc_lock_resize(tty->driver_data, tmp_ws.ws_col, + tmp_ws.ws_row)) { + mutex_unlock(&tty->termios_mutex); return -ENXIO; } } @@ -2801,7 +2802,7 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, tty->winsize = tmp_ws; real_tty->winsize = tmp_ws; done: - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); return 0; } @@ -3576,7 +3577,7 @@ static void initialize_tty_struct(struct tty_struct *tty) tty_buffer_init(tty); INIT_WORK(&tty->buf.work, flush_to_ldisc, tty); init_MUTEX(&tty->buf.pty_sem); - init_MUTEX(&tty->termios_sem); + mutex_init(&tty->termios_mutex); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); INIT_WORK(&tty->hangup_work, do_tty_hangup, tty); diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index 4ad47d321bd4..4d540619ac84 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -131,7 +132,7 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios /* FIXME: we need to decide on some locking/ordering semantics for the set_termios notification eventually */ - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); *tty->termios = *new_termios; unset_locked_termios(tty->termios, &old_termios, tty->termios_locked); @@ -176,7 +177,7 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios (ld->set_termios)(tty, &old_termios); tty_ldisc_deref(ld); } - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); } /** @@ -284,13 +285,13 @@ static int get_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb) { struct sgttyb tmp; - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); tmp.sg_ispeed = 0; tmp.sg_ospeed = 0; tmp.sg_erase = tty->termios->c_cc[VERASE]; tmp.sg_kill = tty->termios->c_cc[VKILL]; tmp.sg_flags = get_sgflags(tty); - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); return copy_to_user(sgttyb, &tmp, sizeof(tmp)) ? -EFAULT : 0; } @@ -345,12 +346,12 @@ static int set_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb) if (copy_from_user(&tmp, sgttyb, sizeof(tmp))) return -EFAULT; - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); termios = *tty->termios; termios.c_cc[VERASE] = tmp.sg_erase; termios.c_cc[VKILL] = tmp.sg_kill; set_sgflags(&termios, tmp.sg_flags); - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); change_termios(tty, &termios); return 0; } @@ -592,11 +593,11 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file, case TIOCSSOFTCAR: if (get_user(arg, (unsigned int __user *) arg)) return -EFAULT; - down(&tty->termios_sem); + mutex_lock(&tty->termios_mutex); tty->termios->c_cflag = ((tty->termios->c_cflag & ~CLOCAL) | (arg ? CLOCAL : 0)); - up(&tty->termios_sem); + mutex_unlock(&tty->termios_mutex); return 0; default: return -ENOIOCTLCMD; diff --git a/include/linux/tty.h b/include/linux/tty.h index d1dec3d0c814..ea4c2605f8da 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -174,7 +174,7 @@ struct tty_struct { struct tty_driver *driver; int index; struct tty_ldisc ldisc; - struct semaphore termios_sem; + struct mutex termios_mutex; struct termios *termios, *termios_locked; char name[64]; int pgrp; -- cgit v1.2.3-71-gd317 From 416bc51292f977b43b010c6dd937522b90062390 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 29 Sep 2006 02:00:45 -0700 Subject: [PATCH] Use decimal for PTRACE_ATTACH and PTRACE_DETACH. It is sure confusing that linux/ptrace.h has: #define PTRACE_SINGLESTEP 9 #define PTRACE_ATTACH 0x10 #define PTRACE_DETACH 0x11 #define PTRACE_SYSCALL 24 All the low-numbered constants are in decimal, but the last two in hex. It sure makes it likely that someone will look at this and think that 9, 10, 11 are used, and that 16 and 17 are not used. How about we use the same notation for all the numbers [0,24] in the same short list? Signed-off-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 8b2749a259dc..eeb1976ef7bf 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -16,8 +16,8 @@ #define PTRACE_KILL 8 #define PTRACE_SINGLESTEP 9 -#define PTRACE_ATTACH 0x10 -#define PTRACE_DETACH 0x11 +#define PTRACE_ATTACH 16 +#define PTRACE_DETACH 17 #define PTRACE_SYSCALL 24 -- cgit v1.2.3-71-gd317 From 57a6f51c4281aa3119975473c70dce0480d322bd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:00:49 -0700 Subject: [PATCH] introduce is_rt_policy() helper Imho, makes the code a bit easier to read. Signed-off-by: Oleg Nesterov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Steven Rostedt Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++-- kernel/sched.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 503dea61ff99..fbc69cc3923d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -504,8 +504,8 @@ struct signal_struct { #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) #define rt_task(p) rt_prio((p)->prio) #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) -#define has_rt_policy(p) \ - unlikely((p)->policy != SCHED_NORMAL && (p)->policy != SCHED_BATCH) +#define is_rt_policy(p) ((p) != SCHED_NORMAL && (p) != SCHED_BATCH) +#define has_rt_policy(p) unlikely(is_rt_policy((p)->policy)) /* * Some day this will be a full-fledged user tracking system.. diff --git a/kernel/sched.c b/kernel/sched.c index f9b3c6a414f1..c3c718aea618 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4109,8 +4109,7 @@ recheck: (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) return -EINVAL; - if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) - != (param->sched_priority == 0)) + if (is_rt_policy(policy) != (param->sched_priority != 0)) return -EINVAL; /* @@ -4134,7 +4133,7 @@ recheck: !rlim_rtprio) return -EPERM; /* can't increase priority */ - if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) && + if (is_rt_policy(policy) && param->sched_priority > p->rt_priority && param->sched_priority > rlim_rtprio) return -EPERM; -- cgit v1.2.3-71-gd317 From 9f50b93f066f8dc339de9b0eb78a22a75e6c8f8f Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 29 Sep 2006 02:00:59 -0700 Subject: [PATCH] Make spinlock/rwlock annotations more accurate by using parameters, not types The lock annotations used on spinlocks and rwlocks currently use __{acquires,releases}(spinlock_t) and __{acquires,releases}(rwlock_t), respectively. This loses the information of which lock actually got acquired or released, and assumes a different type for the parameter of __acquires and __releases than the rest of the kernel. While the current implementations of __acquires and __releases throw away their argument, this will not always remain the case. Change this to use the lock parameter instead, to preserve this information and increase consistency in usage of __acquires and __releases. Signed-off-by: Josh Triplett Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spinlock_api_smp.h | 50 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index b2c4f8299464..8828b8155e9c 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -19,41 +19,41 @@ int in_lock_functions(unsigned long addr); #define assert_spin_locked(x) BUG_ON(!spin_is_locked(x)) -void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t); +void __lockfunc _spin_lock(spinlock_t *lock) __acquires(lock); void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) - __acquires(spinlock_t); -void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t); -void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t); -void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t); -void __lockfunc _read_lock_bh(rwlock_t *lock) __acquires(rwlock_t); -void __lockfunc _write_lock_bh(rwlock_t *lock) __acquires(rwlock_t); -void __lockfunc _spin_lock_irq(spinlock_t *lock) __acquires(spinlock_t); -void __lockfunc _read_lock_irq(rwlock_t *lock) __acquires(rwlock_t); -void __lockfunc _write_lock_irq(rwlock_t *lock) __acquires(rwlock_t); + __acquires(lock); +void __lockfunc _read_lock(rwlock_t *lock) __acquires(lock); +void __lockfunc _write_lock(rwlock_t *lock) __acquires(lock); +void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(lock); +void __lockfunc _read_lock_bh(rwlock_t *lock) __acquires(lock); +void __lockfunc _write_lock_bh(rwlock_t *lock) __acquires(lock); +void __lockfunc _spin_lock_irq(spinlock_t *lock) __acquires(lock); +void __lockfunc _read_lock_irq(rwlock_t *lock) __acquires(lock); +void __lockfunc _write_lock_irq(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) - __acquires(spinlock_t); + __acquires(lock); unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) - __acquires(rwlock_t); + __acquires(lock); unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) - __acquires(rwlock_t); + __acquires(lock); int __lockfunc _spin_trylock(spinlock_t *lock); int __lockfunc _read_trylock(rwlock_t *lock); int __lockfunc _write_trylock(rwlock_t *lock); int __lockfunc _spin_trylock_bh(spinlock_t *lock); -void __lockfunc _spin_unlock(spinlock_t *lock) __releases(spinlock_t); -void __lockfunc _read_unlock(rwlock_t *lock) __releases(rwlock_t); -void __lockfunc _write_unlock(rwlock_t *lock) __releases(rwlock_t); -void __lockfunc _spin_unlock_bh(spinlock_t *lock) __releases(spinlock_t); -void __lockfunc _read_unlock_bh(rwlock_t *lock) __releases(rwlock_t); -void __lockfunc _write_unlock_bh(rwlock_t *lock) __releases(rwlock_t); -void __lockfunc _spin_unlock_irq(spinlock_t *lock) __releases(spinlock_t); -void __lockfunc _read_unlock_irq(rwlock_t *lock) __releases(rwlock_t); -void __lockfunc _write_unlock_irq(rwlock_t *lock) __releases(rwlock_t); +void __lockfunc _spin_unlock(spinlock_t *lock) __releases(lock); +void __lockfunc _read_unlock(rwlock_t *lock) __releases(lock); +void __lockfunc _write_unlock(rwlock_t *lock) __releases(lock); +void __lockfunc _spin_unlock_bh(spinlock_t *lock) __releases(lock); +void __lockfunc _read_unlock_bh(rwlock_t *lock) __releases(lock); +void __lockfunc _write_unlock_bh(rwlock_t *lock) __releases(lock); +void __lockfunc _spin_unlock_irq(spinlock_t *lock) __releases(lock); +void __lockfunc _read_unlock_irq(rwlock_t *lock) __releases(lock); +void __lockfunc _write_unlock_irq(rwlock_t *lock) __releases(lock); void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) - __releases(spinlock_t); + __releases(lock); void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) - __releases(rwlock_t); + __releases(lock); void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) - __releases(rwlock_t); + __releases(lock); #endif /* __LINUX_SPINLOCK_API_SMP_H */ -- cgit v1.2.3-71-gd317 From 303912e2a32aa73785b4c4dee15466d944a38a46 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 29 Sep 2006 02:01:00 -0700 Subject: [PATCH] Replace _spin_trylock with spin_trylock in the IRQ variants to use __cond_lock spin_trylock_irq and spin_trylock_irqsave use _spin_trylock, which does not use the __cond_lock wrapper annotation and thus does not affect the lock context; change them to use spin_trylock instead, which does use __cond_lock. Signed-off-by: Josh Triplett Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 31473db92d3b..456e74f0e129 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -241,14 +241,14 @@ do { \ #define spin_trylock_irq(lock) \ ({ \ local_irq_disable(); \ - _spin_trylock(lock) ? \ + spin_trylock(lock) ? \ 1 : ({ local_irq_enable(); 0; }); \ }) #define spin_trylock_irqsave(lock, flags) \ ({ \ local_irq_save(flags); \ - _spin_trylock(lock) ? \ + spin_trylock(lock) ? \ 1 : ({ local_irq_restore(flags); 0; }); \ }) -- cgit v1.2.3-71-gd317 From dcc8e559ee5ae03fa6bdb8611d76d86d0083e793 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 29 Sep 2006 02:01:03 -0700 Subject: [PATCH] Pass a lock expression to __cond_lock, like __acquire and __release Currently, __acquire and __release take a lock expression, but __cond_lock takes only a condition, not the lock acquired if the expression evaluates to true. Change __cond_lock to accept a lock expression, and change all the callers to pass in a lock expression. Signed-off-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 4 ++-- include/linux/spinlock.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 060b96112ec6..0780de440220 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -14,7 +14,7 @@ # define __releases(x) __attribute__((context(1,0))) # define __acquire(x) __context__(1) # define __release(x) __context__(-1) -# define __cond_lock(x) ((x) ? ({ __context__(1); 1; }) : 0) +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) extern void __chk_user_ptr(void __user *); extern void __chk_io_ptr(void __iomem *); #else @@ -31,7 +31,7 @@ extern void __chk_io_ptr(void __iomem *); # define __releases(x) # define __acquire(x) (void)0 # define __release(x) (void)0 -# define __cond_lock(x) (x) +# define __cond_lock(x,c) (c) #endif #ifdef __KERNEL__ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 456e74f0e129..b800d2d68b32 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -167,9 +167,9 @@ do { \ * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various * methods are defined as nops in the case they are not required. */ -#define spin_trylock(lock) __cond_lock(_spin_trylock(lock)) -#define read_trylock(lock) __cond_lock(_read_trylock(lock)) -#define write_trylock(lock) __cond_lock(_write_trylock(lock)) +#define spin_trylock(lock) __cond_lock(lock, _spin_trylock(lock)) +#define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) +#define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) #define spin_lock(lock) _spin_lock(lock) @@ -236,7 +236,7 @@ do { \ _write_unlock_irqrestore(lock, flags) #define write_unlock_bh(lock) _write_unlock_bh(lock) -#define spin_trylock_bh(lock) __cond_lock(_spin_trylock_bh(lock)) +#define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock)) #define spin_trylock_irq(lock) \ ({ \ @@ -264,7 +264,7 @@ do { \ */ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(_atomic_dec_and_lock(atomic, lock)) + __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) /** * spin_can_lock - would spin_trylock() succeed? -- cgit v1.2.3-71-gd317 From 368bdb3d616fa352971f45b423ae6344715e620b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 29 Sep 2006 02:01:05 -0700 Subject: [PATCH] cramfs: make cramfs_uncompress_exit() return void It always returns 0, so relying on it is useless. The only caller isn't checking return value. In general, un-, de-, -free functions should return void. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cramfs/uncompress.c | 3 +-- include/linux/cramfs_fs.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index 8def89f2c438..fc3ccb74626f 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c @@ -68,11 +68,10 @@ int cramfs_uncompress_init(void) return 0; } -int cramfs_uncompress_exit(void) +void cramfs_uncompress_exit(void) { if (!--initialized) { zlib_inflateEnd(&stream); vfree(stream.workspace); } - return 0; } diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h index a41f38428c37..1dba681e428d 100644 --- a/include/linux/cramfs_fs.h +++ b/include/linux/cramfs_fs.h @@ -87,6 +87,6 @@ struct cramfs_super { /* Uncompression interfaces to the underlying zlib */ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen); int cramfs_uncompress_init(void); -int cramfs_uncompress_exit(void); +void cramfs_uncompress_exit(void); #endif -- cgit v1.2.3-71-gd317 From e8106b941ceab68cc5ff713df7b1276484554584 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 29 Sep 2006 02:01:08 -0700 Subject: [PATCH] lockdep: core, add enable/disable_irq_irqsave/irqrestore() APIs Introduce the disable_irq_nosync_lockdep_irqsave() and enable_irq_lockdep_irqrestore() APIs. These are needed for NE2000; basically NE2000 calls disable_irq and enable_irq as locking against the IRQ handler, but both in cases where interrupts are on and off. This means that lockdep needs to track the old state of the virtual irq flags on disable_irq, and restore these at enable_irq time. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/8390.c | 6 +++--- include/linux/interrupt.h | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/8390.c b/drivers/net/8390.c index 5b6b05ed8f3c..9d34056147ad 100644 --- a/drivers/net/8390.c +++ b/drivers/net/8390.c @@ -299,7 +299,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev) * Slow phase with lock held. */ - disable_irq_nosync_lockdep(dev->irq); + disable_irq_nosync_lockdep_irqsave(dev->irq, &flags); spin_lock(&ei_local->page_lock); @@ -338,7 +338,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); outb_p(ENISR_ALL, e8390_base + EN0_IMR); spin_unlock(&ei_local->page_lock); - enable_irq_lockdep(dev->irq); + enable_irq_lockdep_irqrestore(dev->irq, &flags); ei_local->stat.tx_errors++; return 1; } @@ -379,7 +379,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev) outb_p(ENISR_ALL, e8390_base + EN0_IMR); spin_unlock(&ei_local->page_lock); - enable_irq_lockdep(dev->irq); + enable_irq_lockdep_irqrestore(dev->irq, &flags); dev_kfree_skb (skb); ei_local->stat.tx_bytes += send_length; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d5afee95fd43..1f97e3d92639 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -123,6 +123,14 @@ static inline void disable_irq_nosync_lockdep(unsigned int irq) #endif } +static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags) +{ + disable_irq_nosync(irq); +#ifdef CONFIG_LOCKDEP + local_irq_save(*flags); +#endif +} + static inline void disable_irq_lockdep(unsigned int irq) { disable_irq(irq); @@ -139,6 +147,14 @@ static inline void enable_irq_lockdep(unsigned int irq) enable_irq(irq); } +static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags) +{ +#ifdef CONFIG_LOCKDEP + local_irq_restore(*flags); +#endif + enable_irq(irq); +} + /* IRQ wakeup (PM) control: */ extern int set_irq_wake(unsigned int irq, unsigned int on); -- cgit v1.2.3-71-gd317 From 55a101f8f71a3d3dbda7b5c77083ffe47552f831 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:01:10 -0700 Subject: [PATCH] kill PF_DEAD flag After the previous change (->flags & PF_DEAD) <=> (->state == EXIT_DEAD), we don't need PF_DEAD any longer. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/exit.c | 6 ++---- kernel/sched.c | 16 ++++++++-------- mm/oom_kill.c | 4 ++-- 4 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fbc69cc3923d..9763de334f09 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1061,7 +1061,6 @@ static inline void put_task_struct(struct task_struct *t) /* Not implemented yet, only for 486*/ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_DEAD 0x00000008 /* Dead */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ diff --git a/kernel/exit.c b/kernel/exit.c index 3d759c98fb11..9dd5f1336da2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,10 +953,8 @@ fastcall NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); - BUG_ON(tsk->flags & PF_DEAD); - tsk->flags |= PF_DEAD; + /* causes final put_task_struct in finish_task_switch(). */ tsk->state = EXIT_DEAD; schedule(); @@ -972,7 +970,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) { if (comp) complete(comp); - + do_exit(code); } diff --git a/kernel/sched.c b/kernel/sched.c index e1646b044b69..a9405d7cc6ab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1755,27 +1755,27 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) __releases(rq->lock) { struct mm_struct *mm = rq->prev_mm; - unsigned long prev_task_flags; + long prev_state; rq->prev_mm = NULL; /* * A task struct has one reference for the use as "current". - * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and - * calls schedule one last time. The schedule call will never return, - * and the scheduled task must drop that reference. - * The test for EXIT_ZOMBIE must occur while the runqueue locks are + * If a task dies, then it sets EXIT_DEAD in tsk->state and calls + * schedule one last time. The schedule call will never return, and + * the scheduled task must drop that reference. + * The test for EXIT_DEAD must occur while the runqueue locks are * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. * Manfred Spraul */ - prev_task_flags = prev->flags; + prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_task_flags & PF_DEAD)) { + if (unlikely(prev_state == EXIT_DEAD)) { /* * Remove function-return probe instances associated with this * task and put them back on the free list. @@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->flags & PF_DEAD); + BUG_ON(p->state == EXIT_DEAD); get_task_struct(p); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f3dd79c1c367..202f186a753a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -226,8 +226,8 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || p->flags & PF_EXITING; if (releasing) { - /* PF_DEAD tasks have already released their mm */ - if (p->flags & PF_DEAD) + /* TASK_DEAD tasks have already released their mm */ + if (p->state == EXIT_DEAD) continue; if (p->flags & PF_EXITING && p == current) { chosen = p; -- cgit v1.2.3-71-gd317 From c394cc9fbb367f87faa2228ec2eabacd2d4701c6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:01:11 -0700 Subject: [PATCH] introduce TASK_DEAD state I am not sure about this patch, I am asking Ingo to take a decision. task_struct->state == EXIT_DEAD is a very special case, to avoid a confusion it makes sense to introduce a new state, TASK_DEAD, while EXIT_DEAD should live only in ->exit_state as documented in sched.h. Note that this state is not visible to user-space, get_task_state() masks off unsuitable states. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/exit.c | 2 +- kernel/sched.c | 8 ++++---- mm/oom_kill.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9763de334f09..a06fc89cf6e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -148,6 +148,7 @@ extern unsigned long weighted_cpuload(const int cpu); #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_DEAD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) diff --git a/kernel/exit.c b/kernel/exit.c index 9dd5f1336da2..2e4c13cba95a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -955,7 +955,7 @@ fastcall NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ - tsk->state = EXIT_DEAD; + tsk->state = TASK_DEAD; schedule(); BUG(); diff --git a/kernel/sched.c b/kernel/sched.c index a9405d7cc6ab..74f169ac0773 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1761,10 +1761,10 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) /* * A task struct has one reference for the use as "current". - * If a task dies, then it sets EXIT_DEAD in tsk->state and calls + * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for EXIT_DEAD must occur while the runqueue locks are + * The test for TASK_DEAD must occur while the runqueue locks are * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. @@ -1775,7 +1775,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_state == EXIT_DEAD)) { + if (unlikely(prev_state == TASK_DEAD)) { /* * Remove function-return probe instances associated with this * task and put them back on the free list. @@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->state == EXIT_DEAD); + BUG_ON(p->state == TASK_DEAD); get_task_struct(p); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 202f186a753a..21f0a7e8e514 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -227,7 +227,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) p->flags & PF_EXITING; if (releasing) { /* TASK_DEAD tasks have already released their mm */ - if (p->state == EXIT_DEAD) + if (p->state == TASK_DEAD) continue; if (p->flags & PF_EXITING && p == current) { chosen = p; -- cgit v1.2.3-71-gd317 From 38837fc75acb7fa9b0e111b0241fe4fe76c5d4b3 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Fri, 29 Sep 2006 02:01:16 -0700 Subject: [PATCH] cpuset: top_cpuset tracks hotplug changes to node_online_map Change the list of memory nodes allowed to tasks in the top (root) nodeset to dynamically track what cpus are online, using a call to a cpuset hook from the memory hotplug code. Make this top cpus file read-only. On systems that have cpusets configured in their kernel, but that aren't actively using cpusets (for some distros, this covers the majority of systems) all tasks end up in the top cpuset. If that system does support memory hotplug, then these tasks cannot make use of memory nodes that are added after system boot, because the memory nodes are not allowed in the top cpuset. This is a surprising regression over earlier kernels that didn't have cpusets enabled. One key motivation for this change is to remain consistent with the behaviour for the top_cpuset's 'cpus', which is also read-only, and which automatically tracks the cpu_online_map. This change also has the minor benefit that it fixes a long standing, little noticed, minor bug in cpusets. The cpuset performance tweak to short circuit the cpuset_zone_allowed() check on systems with just a single cpuset (see 'number_of_cpusets', in linux/cpuset.h) meant that simply changing the 'mems' of the top_cpuset had no affect, even though the change (the write system call) appeared to succeed. With the following change, that write to the 'mems' file fails -EACCES, and the 'mems' file stubbornly refuses to be changed via user space writes. Thus no one should be mislead into thinking they've changed the top_cpusets's 'mems' when in affect they haven't. In order to keep the behaviour of cpusets consistent between systems actively making use of them and systems not using them, this patch changes the behaviour of the 'mems' file in the top (root) cpuset, making it read only, and making it automatically track the value of node_online_map. Thus tasks in the top cpuset will have automatic use of hot plugged memory nodes allowed by their cpuset. [akpm@osdl.org: build fix] [bunk@stusta.de: build fix] Signed-off-by: Paul Jackson Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpusets.txt | 10 +++++----- include/linux/cpuset.h | 4 ++++ kernel/cpuset.c | 28 +++++++++++++++++++++++++--- mm/memory_hotplug.c | 3 +++ 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 76b44290c154..842f0d1ab216 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -217,11 +217,11 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs) to represent the cpuset hierarchy provides for a familiar permission and name space for cpusets, with a minimum of additional kernel code. -The cpus file in the root (top_cpuset) cpuset is read-only. -It automatically tracks the value of cpu_online_map, using a CPU -hotplug notifier. If and when memory nodes can be hotplugged, -we expect to make the mems file in the root cpuset read-only -as well, and have it track the value of node_online_map. +The cpus and mems files in the root (top_cpuset) cpuset are +read-only. The cpus file automatically tracks the value of +cpu_online_map using a CPU hotplug notifier, and the mems file +automatically tracks the value of node_online_map using the +cpuset_track_online_nodes() hook. 1.4 What are exclusive cpusets ? diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 9354722a9217..4d8adf663681 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -63,6 +63,8 @@ static inline int cpuset_do_slab_mem_spread(void) return current->flags & PF_SPREAD_SLAB; } +extern void cpuset_track_online_nodes(void); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -126,6 +128,8 @@ static inline int cpuset_do_slab_mem_spread(void) return 0; } +static inline void cpuset_track_online_nodes(void) {} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 584bb4e6c042..794af5024c2f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -912,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) int fudge; int retval; + /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ + if (cs == &top_cpuset) + return -EACCES; + trialcs = *cs; retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) @@ -2042,9 +2046,8 @@ out: * (of no affect) on systems that are actively using CPU hotplug * but making no active use of cpusets. * - * This handles CPU hotplug (cpuhp) events. If someday Memory - * Nodes can be hotplugged (dynamically changing node_online_map) - * then we should handle that too, perhaps in a similar way. + * This routine ensures that top_cpuset.cpus_allowed tracks + * cpu_online_map on each CPU hotplug (cpuhp) event. */ #ifdef CONFIG_HOTPLUG_CPU @@ -2063,6 +2066,25 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, } #endif +/* + * Keep top_cpuset.mems_allowed tracking node_online_map. + * Call this routine anytime after you change node_online_map. + * See also the previous routine cpuset_handle_cpuhp(). + */ + +#ifdef CONFIG_MEMORY_HOTPLUG +void cpuset_track_online_nodes() +{ + mutex_lock(&manage_mutex); + mutex_lock(&callback_mutex); + + top_cpuset.mems_allowed = node_online_map; + + mutex_unlock(&callback_mutex); + mutex_unlock(&manage_mutex); +} +#endif + /** * cpuset_init_smp - initialize cpus_allowed * diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c37319542b70..9576ed920c0a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -283,6 +284,8 @@ int add_memory(int nid, u64 start, u64 size) /* we online node here. we can't roll back from here. */ node_set_online(nid); + cpuset_track_online_nodes(); + if (new_pgdat) { ret = register_one_node(nid); /* -- cgit v1.2.3-71-gd317 From 2d1d43f6a43b703587e759145f69467e7c6553a7 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Fri, 29 Sep 2006 02:01:25 -0700 Subject: [PATCH] call mm/page-writeback.c:set_ratelimit() when new pages are hot-added ratelimit_pages in page-writeback.c is recalculated (in set_ratelimit()) every time a CPU is hot-added/removed. But this value is not recalculated when new pages are hot-added. This patch fixes that problem by calling set_ratelimit() when new pages are hot-added. [akpm@osdl.org: cleanups] Signed-off-by: Chandra Seetharaman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 + mm/memory_hotplug.c | 2 ++ mm/page-writeback.c | 6 +++--- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 56a23a0e7f2e..9d4074ecd0cd 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -117,6 +117,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); void set_page_dirty_balance(struct page *page); +void writeback_set_ratelimit(void); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9576ed920c0a..2053bb165a21 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -192,6 +193,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) if (need_zonelists_rebuild) build_all_zonelists(); vm_total_pages = nr_free_pagecache_pages(); + writeback_set_ratelimit(); return 0; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index efd2705e4986..488b7088557c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -501,7 +501,7 @@ void laptop_sync_completion(void) * will write six megabyte chunks, max. */ -static void set_ratelimit(void) +void writeback_set_ratelimit(void) { ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); if (ratelimit_pages < 16) @@ -513,7 +513,7 @@ static void set_ratelimit(void) static int __cpuinit ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { - set_ratelimit(); + writeback_set_ratelimit(); return 0; } @@ -546,7 +546,7 @@ void __init page_writeback_init(void) vm_dirty_ratio = 1; } mod_timer(&wb_timer, jiffies + dirty_writeback_interval); - set_ratelimit(); + writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } -- cgit v1.2.3-71-gd317 From 04b1db9fd7eea63c9663072feece616ea41b0a79 Mon Sep 17 00:00:00 2001 From: "Ian S. Nelson" Date: Fri, 29 Sep 2006 02:01:31 -0700 Subject: [PATCH] /sys/modules: allow full length section names I've been using systemtap for some debugging and I noticed that it can't probe a lot of modules. Turns out it's kind of silly, the sections section of /sys/module is limited to 32byte filenames and many of the actual sections are a a bit longer than that. [akpm@osdl.org: rewrite to use dymanic allocation] Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 3 ++- include/linux/module.h | 4 ++-- kernel/module.c | 26 ++++++++++++++++++++------ 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/CREDITS b/CREDITS index 8feb2bb49e35..66e82466dde8 100644 --- a/CREDITS +++ b/CREDITS @@ -2478,7 +2478,8 @@ S: Derbyshire DE4 3RL S: United Kingdom N: Ian S. Nelson -E: ian.nelson@echostar.com +E: nelsonis@earthlink.net +P: 1024D/00D3D983 3EFD 7B86 B888 D7E2 29B6 9E97 576F 1B97 00D3 D983 D: Minor mmap and ide hacks S: 1370 Atlantis Ave. S: Lafayette CO, 80026 diff --git a/include/linux/module.h b/include/linux/module.h index d4486cc2e7fe..2c599175c583 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -232,17 +232,17 @@ enum module_state }; /* Similar stuff for section attributes. */ -#define MODULE_SECT_NAME_LEN 32 struct module_sect_attr { struct module_attribute mattr; - char name[MODULE_SECT_NAME_LEN]; + char *name; unsigned long address; }; struct module_sect_attrs { struct attribute_group grp; + int nsections; struct module_sect_attr attrs[0]; }; diff --git a/kernel/module.c b/kernel/module.c index b7fe6e840963..05625d5dc758 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -933,6 +933,15 @@ static ssize_t module_sect_show(struct module_attribute *mattr, return sprintf(buf, "0x%lx\n", sattr->address); } +static void free_sect_attrs(struct module_sect_attrs *sect_attrs) +{ + int section; + + for (section = 0; section < sect_attrs->nsections; section++) + kfree(sect_attrs->attrs[section].name); + kfree(sect_attrs); +} + static void add_sect_attrs(struct module *mod, unsigned int nsect, char *secstrings, Elf_Shdr *sechdrs) { @@ -949,21 +958,26 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, + nloaded * sizeof(sect_attrs->attrs[0]), sizeof(sect_attrs->grp.attrs[0])); size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); - if (! (sect_attrs = kmalloc(size[0] + size[1], GFP_KERNEL))) + sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); + if (sect_attrs == NULL) return; /* Setup section attributes. */ sect_attrs->grp.name = "sections"; sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; + sect_attrs->nsections = 0; sattr = §_attrs->attrs[0]; gattr = §_attrs->grp.attrs[0]; for (i = 0; i < nsect; i++) { if (! (sechdrs[i].sh_flags & SHF_ALLOC)) continue; sattr->address = sechdrs[i].sh_addr; - strlcpy(sattr->name, secstrings + sechdrs[i].sh_name, - MODULE_SECT_NAME_LEN); + sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, + GFP_KERNEL); + if (sattr->name == NULL) + goto out; + sect_attrs->nsections++; sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; @@ -979,7 +993,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, mod->sect_attrs = sect_attrs; return; out: - kfree(sect_attrs); + free_sect_attrs(sect_attrs); } static void remove_sect_attrs(struct module *mod) @@ -989,13 +1003,13 @@ static void remove_sect_attrs(struct module *mod) &mod->sect_attrs->grp); /* We are positive that no one is using any sect attrs * at this point. Deallocate immediately. */ - kfree(mod->sect_attrs); + free_sect_attrs(mod->sect_attrs); mod->sect_attrs = NULL; } } - #else + static inline void add_sect_attrs(struct module *mod, unsigned int nsect, char *sectstrings, Elf_Shdr *sechdrs) { -- cgit v1.2.3-71-gd317 From f0c8bd164e1a0585d7e46896553136b4f488bd19 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Sep 2006 02:01:34 -0700 Subject: [PATCH] Generic infrastructure for acls The patches solve the following problem: We want to grant access to devices based on who is logged in from where, etc. This includes switching back and forth between multiple user sessions, etc. Using ACLs to define device access for logged-in users gives us all the flexibility we need in order to fully solve the problem. Device special files nowadays usually live on tmpfs, hence tmpfs ACLs. Different distros have come up with solutions that solve the problem to different degrees: SUSE uses a resource manager which tracks login sessions and sets ACLs on device inodes as appropriate. RedHat uses pam_console, which changes the primary file ownership to the logged-in user. Others use a set of groups that users must be in in order to be granted the appropriate accesses. The freedesktop.org project plans to implement a combination of a console-tracker and a HAL-device-list based solution to grant access to devices to users, and more distros will likely follow this approach. These patches have first been posted here on 2 February 2005, and again on 8 January 2006. We have been shipping them in SLES9 and SLES10 with no problems reported. The previous submission is archived here: http://lkml.org/lkml/2006/1/8/229 http://lkml.org/lkml/2006/1/8/230 http://lkml.org/lkml/2006/1/8/231 This patch: Add some infrastructure for access control lists on in-memory filesystems such as tmpfs. Signed-off-by: Andreas Gruenbacher Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 4 + fs/Makefile | 1 + fs/generic_acl.c | 197 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/generic_acl.h | 36 ++++++++ 4 files changed, 238 insertions(+) create mode 100644 fs/generic_acl.c create mode 100644 include/linux/generic_acl.h (limited to 'include/linux') diff --git a/fs/Kconfig b/fs/Kconfig index d311198bba43..deb9eec9f6ee 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1940,6 +1940,10 @@ config 9P_FS If unsure, say N. +config GENERIC_ACL + bool + select FS_POSIX_ACL + endmenu menu "Partition Types" diff --git a/fs/Makefile b/fs/Makefile index 89135428a539..46b8cfe497b2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ +obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/generic_acl.c b/fs/generic_acl.c new file mode 100644 index 000000000000..9ccb78947171 --- /dev/null +++ b/fs/generic_acl.c @@ -0,0 +1,197 @@ +/* + * fs/generic_acl.c + * + * (C) 2005 Andreas Gruenbacher + * + * This file is released under the GPL. + */ + +#include +#include +#include + +/** + * generic_acl_list - Generic xattr_handler->list() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +size_t +generic_acl_list(struct inode *inode, struct generic_acl_operations *ops, + int type, char *list, size_t list_size) +{ + struct posix_acl *acl; + const char *name; + size_t size; + + acl = ops->getacl(inode, type); + if (!acl) + return 0; + posix_acl_release(acl); + + switch(type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + + default: + return 0; + } + size = strlen(name) + 1; + if (list && size <= list_size) + memcpy(list, name, size); + return size; +} + +/** + * generic_acl_get - Generic xattr_handler->get() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +int +generic_acl_get(struct inode *inode, struct generic_acl_operations *ops, + int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + acl = ops->getacl(inode, type); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +/** + * generic_acl_set - Generic xattr_handler->set() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +int +generic_acl_set(struct inode *inode, struct generic_acl_operations *ops, + int type, const void *value, size_t size) +{ + struct posix_acl *acl = NULL; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + return -EPERM; + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (acl) { + mode_t mode; + + error = posix_acl_valid(acl); + if (error) + goto failed; + switch(type) { + case ACL_TYPE_ACCESS: + mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + goto failed; + inode->i_mode = mode; + if (error == 0) { + posix_acl_release(acl); + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) { + error = -EINVAL; + goto failed; + } + break; + } + } + ops->setacl(inode, type, acl); + error = 0; +failed: + posix_acl_release(acl); + return error; +} + +/** + * generic_acl_init - Take care of acl inheritance at @inode create time + * @ops: Filesystem specific getacl and setacl callbacks + * + * Files created inside a directory with a default ACL inherit the + * directory's default ACL. + */ +int +generic_acl_init(struct inode *inode, struct inode *dir, + struct generic_acl_operations *ops) +{ + struct posix_acl *acl = NULL; + mode_t mode = inode->i_mode; + int error; + + inode->i_mode = mode & ~current->fs->umask; + if (!S_ISLNK(inode->i_mode)) + acl = ops->getacl(dir, ACL_TYPE_DEFAULT); + if (acl) { + struct posix_acl *clone; + + if (S_ISDIR(inode->i_mode)) { + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + ops->setacl(inode, ACL_TYPE_DEFAULT, clone); + posix_acl_release(clone); + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) + ops->setacl(inode, ACL_TYPE_ACCESS, clone); + } + posix_acl_release(clone); + } + error = 0; + +cleanup: + posix_acl_release(acl); + return error; +} + +/** + * generic_acl_chmod - change the access acl of @inode upon chmod() + * @ops: FIlesystem specific getacl and setacl callbacks + * + * A chmod also changes the permissions of the owner, group/mask, and + * other ACL entries. + */ +int +generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) +{ + struct posix_acl *acl, *clone; + int error = 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + acl = ops->getacl(inode, ACL_TYPE_ACCESS); + if (acl) { + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + ops->setacl(inode, ACL_TYPE_ACCESS, clone); + posix_acl_release(clone); + } + return error; +} diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h new file mode 100644 index 000000000000..80764f40be75 --- /dev/null +++ b/include/linux/generic_acl.h @@ -0,0 +1,36 @@ +/* + * fs/generic_acl.c + * + * (C) 2005 Andreas Gruenbacher + * + * This file is released under the GPL. + */ + +#ifndef GENERIC_ACL_H +#define GENERIC_ACL_H + +#include +#include + +/** + * struct generic_acl_operations - filesystem operations + * + * Filesystems must make these operations available to the generic + * operations. + */ +struct generic_acl_operations { + struct posix_acl *(*getacl)(struct inode *, int); + void (*setacl)(struct inode *, int, struct posix_acl *); +}; + +size_t generic_acl_list(struct inode *, struct generic_acl_operations *, int, + char *, size_t); +int generic_acl_get(struct inode *, struct generic_acl_operations *, int, + void *, size_t); +int generic_acl_set(struct inode *, struct generic_acl_operations *, int, + const void *, size_t); +int generic_acl_init(struct inode *, struct inode *, + struct generic_acl_operations *); +int generic_acl_chmod(struct inode *, struct generic_acl_operations *); + +#endif -- cgit v1.2.3-71-gd317 From 39f0247d3823e4e0bf8f6838a10362864b1e1053 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Sep 2006 02:01:35 -0700 Subject: [PATCH] Access Control Lists for tmpfs Add access control lists for tmpfs. Signed-off-by: Andreas Gruenbacher Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 13 ++++ include/linux/shmem_fs.h | 24 ++++++ mm/Makefile | 1 + mm/shmem.c | 99 +++++++++++++++++++++++- mm/shmem_acl.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 332 insertions(+), 2 deletions(-) create mode 100644 mm/shmem_acl.c (limited to 'include/linux') diff --git a/fs/Kconfig b/fs/Kconfig index deb9eec9f6ee..4fd9efac29ab 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -881,6 +881,19 @@ config TMPFS See for details. +config TMPFS_POSIX_ACL + bool "Tmpfs POSIX Access Control Lists" + depends on TMPFS + select GENERIC_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + + If you don't know what Access Control Lists are, say N. + config HUGETLBFS bool "HugeTLB file system support" depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index c057f0b32318..f3c51899117f 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -19,6 +19,10 @@ struct shmem_inode_info { swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ struct list_head swaplist; /* chain of maybes on swap */ struct inode vfs_inode; +#ifdef CONFIG_TMPFS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif }; struct shmem_sb_info { @@ -36,4 +40,24 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) return container_of(inode, struct shmem_inode_info, vfs_inode); } +#ifdef CONFIG_TMPFS_POSIX_ACL +int shmem_permission(struct inode *, int, struct nameidata *); +int shmem_acl_init(struct inode *, struct inode *); +void shmem_acl_destroy_inode(struct inode *); + +extern struct xattr_handler shmem_xattr_acl_access_handler; +extern struct xattr_handler shmem_xattr_acl_default_handler; + +extern struct generic_acl_operations shmem_acl_ops; + +#else +static inline int shmem_acl_init(struct inode *inode, struct inode *dir) +{ + return 0; +} +static inline void shmem_acl_destroy_inode(struct inode *inode) +{ +} +#endif /* CONFIG_TMPFS_POSIX_ACL */ + #endif diff --git a/mm/Makefile b/mm/Makefile index 60c56c0b5e10..6200c6d6afd2 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o +obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_SLAB) += slab.o diff --git a/mm/shmem.c b/mm/shmem.c index eda907c3a86a..b96de69f236b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -177,6 +179,7 @@ static const struct address_space_operations shmem_aops; static struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; +static struct inode_operations shmem_special_inode_operations; static struct vm_operations_struct shmem_vm_ops; static struct backing_dev_info shmem_backing_dev_info __read_mostly = { @@ -637,7 +640,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) struct page *page = NULL; int error; - if (attr->ia_valid & ATTR_SIZE) { + if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { if (attr->ia_size < inode->i_size) { /* * If truncating down to a partial page, then @@ -670,6 +673,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (!error) error = inode_setattr(inode, attr); +#ifdef CONFIG_TMPFS_POSIX_ACL + if (!error && (attr->ia_valid & ATTR_MODE)) + error = generic_acl_chmod(inode, &shmem_acl_ops); +#endif if (page) page_cache_release(page); return error; @@ -1362,6 +1369,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) switch (mode & S_IFMT) { default: + inode->i_op = &shmem_special_inode_operations; init_special_inode(inode, mode, dev); break; case S_IFREG: @@ -1682,7 +1690,11 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) iput(inode); return error; } - error = 0; + } + error = shmem_acl_init(inode, dir); + if (error) { + iput(inode); + return error; } if (dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; @@ -1897,6 +1909,53 @@ static struct inode_operations shmem_symlink_inode_operations = { .put_link = shmem_put_link, }; +#ifdef CONFIG_TMPFS_POSIX_ACL +/** + * Superblocks without xattr inode operations will get security.* xattr + * support from the VFS "for free". As soon as we have any other xattrs + * like ACLs, we also need to implement the security.* handlers at + * filesystem level, though. + */ + +static size_t shmem_xattr_security_list(struct inode *inode, char *list, + size_t list_len, const char *name, + size_t name_len) +{ + return security_inode_listsecurity(inode, list, list_len); +} + +static int shmem_xattr_security_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return security_inode_getsecurity(inode, name, buffer, size, + -EOPNOTSUPP); +} + +static int shmem_xattr_security_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return security_inode_setsecurity(inode, name, value, size, flags); +} + +struct xattr_handler shmem_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = shmem_xattr_security_list, + .get = shmem_xattr_security_get, + .set = shmem_xattr_security_set, +}; + +static struct xattr_handler *shmem_xattr_handlers[] = { + &shmem_xattr_acl_access_handler, + &shmem_xattr_acl_default_handler, + &shmem_xattr_security_handler, + NULL +}; +#endif + static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes, int *policy, nodemask_t *policy_nodes) @@ -2094,6 +2153,10 @@ static int shmem_fill_super(struct super_block *sb, sb->s_magic = TMPFS_MAGIC; sb->s_op = &shmem_ops; sb->s_time_gran = 1; +#ifdef CONFIG_TMPFS_POSIX_ACL + sb->s_xattr = shmem_xattr_handlers; + sb->s_flags |= MS_POSIXACL; +#endif inode = shmem_get_inode(sb, S_IFDIR | mode, 0); if (!inode) @@ -2130,6 +2193,7 @@ static void shmem_destroy_inode(struct inode *inode) /* only struct inode is valid if it's an inline symlink */ mpol_free_shared_policy(&SHMEM_I(inode)->policy); } + shmem_acl_destroy_inode(inode); kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } @@ -2141,6 +2205,10 @@ static void init_once(void *foo, struct kmem_cache *cachep, if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&p->vfs_inode); +#ifdef CONFIG_TMPFS_POSIX_ACL + p->i_acl = NULL; + p->i_default_acl = NULL; +#endif } } @@ -2184,6 +2252,14 @@ static struct inode_operations shmem_inode_operations = { .truncate = shmem_truncate, .setattr = shmem_notify_change, .truncate_range = shmem_truncate_range, +#ifdef CONFIG_TMPFS_POSIX_ACL + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, + .permission = shmem_permission, +#endif + }; static struct inode_operations shmem_dir_inode_operations = { @@ -2198,6 +2274,25 @@ static struct inode_operations shmem_dir_inode_operations = { .mknod = shmem_mknod, .rename = shmem_rename, #endif +#ifdef CONFIG_TMPFS_POSIX_ACL + .setattr = shmem_notify_change, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, + .permission = shmem_permission, +#endif +}; + +static struct inode_operations shmem_special_inode_operations = { +#ifdef CONFIG_TMPFS_POSIX_ACL + .setattr = shmem_notify_change, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, + .permission = shmem_permission, +#endif }; static struct super_operations shmem_ops = { diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c new file mode 100644 index 000000000000..c946bf468718 --- /dev/null +++ b/mm/shmem_acl.c @@ -0,0 +1,197 @@ +/* + * mm/shmem_acl.c + * + * (C) 2005 Andreas Gruenbacher + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include + +/** + * shmem_get_acl - generic_acl_operations->getacl() operation + */ +static struct posix_acl * +shmem_get_acl(struct inode *inode, int type) +{ + struct posix_acl *acl = NULL; + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = posix_acl_dup(SHMEM_I(inode)->i_acl); + break; + + case ACL_TYPE_DEFAULT: + acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl); + break; + } + spin_unlock(&inode->i_lock); + + return acl; +} + +/** + * shmem_get_acl - generic_acl_operations->setacl() operation + */ +static void +shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *free = NULL; + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + free = SHMEM_I(inode)->i_acl; + SHMEM_I(inode)->i_acl = posix_acl_dup(acl); + break; + + case ACL_TYPE_DEFAULT: + free = SHMEM_I(inode)->i_default_acl; + SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl); + break; + } + spin_unlock(&inode->i_lock); + posix_acl_release(free); +} + +struct generic_acl_operations shmem_acl_ops = { + .getacl = shmem_get_acl, + .setacl = shmem_set_acl, +}; + +/** + * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access, + * shmem_xattr_acl_access_handler - plumbing code to implement the + * system.posix_acl_access xattr using the generic acl functions. + */ + +static size_t +shmem_list_acl_access(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, + list, list_size); +} + +static int +shmem_get_acl_access(struct inode *inode, const char *name, void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer, + size); +} + +static int +shmem_set_acl_access(struct inode *inode, const char *name, const void *value, + size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value, + size); +} + +struct xattr_handler shmem_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = shmem_list_acl_access, + .get = shmem_get_acl_access, + .set = shmem_set_acl_access, +}; + +/** + * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default, + * shmem_xattr_acl_default_handler - plumbing code to implement the + * system.posix_acl_default xattr using the generic acl functions. + */ + +static size_t +shmem_list_acl_default(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, + list, list_size); +} + +static int +shmem_get_acl_default(struct inode *inode, const char *name, void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer, + size); +} + +static int +shmem_set_acl_default(struct inode *inode, const char *name, const void *value, + size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value, + size); +} + +struct xattr_handler shmem_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = shmem_list_acl_default, + .get = shmem_get_acl_default, + .set = shmem_set_acl_default, +}; + +/** + * shmem_acl_init - Inizialize the acl(s) of a new inode + */ +int +shmem_acl_init(struct inode *inode, struct inode *dir) +{ + return generic_acl_init(inode, dir, &shmem_acl_ops); +} + +/** + * shmem_acl_destroy_inode - destroy acls hanging off the in-memory inode + * + * This is done before destroying the actual inode. + */ + +void +shmem_acl_destroy_inode(struct inode *inode) +{ + if (SHMEM_I(inode)->i_acl) + posix_acl_release(SHMEM_I(inode)->i_acl); + SHMEM_I(inode)->i_acl = NULL; + if (SHMEM_I(inode)->i_default_acl) + posix_acl_release(SHMEM_I(inode)->i_default_acl); + SHMEM_I(inode)->i_default_acl = NULL; +} + +/** + * shmem_check_acl - check_acl() callback for generic_permission() + */ +static int +shmem_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS); + + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + return -EAGAIN; +} + +/** + * shmem_permission - permission() inode operation + */ +int +shmem_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, shmem_check_acl); +} -- cgit v1.2.3-71-gd317 From 34596dc9e59d7bece16fe5aba08116b49465da26 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Sep 2006 01:47:55 +0200 Subject: [PATCH] Define vsyscall cache as blob to make clearer that user space shouldn't use it Signed-off-by: Andi Kleen --- arch/x86_64/kernel/vsyscall.c | 8 ++++---- include/linux/getcpu.h | 12 +++++++----- kernel/sys.c | 8 ++++---- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index ac48c3857ddb..07c086382059 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -155,8 +155,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) We do this here because otherwise user space would do it on its own in a likely inferior way (no access to jiffies). If you don't like it pass NULL. */ - if (tcache && tcache->t0 == (j = __jiffies)) { - p = tcache->t1; + if (tcache && tcache->blob[0] == (j = __jiffies)) { + p = tcache->blob[1]; } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ rdtscp(dummy, dummy, p); @@ -165,8 +165,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } if (tcache) { - tcache->t0 = j; - tcache->t1 = p; + tcache->blob[0] = j; + tcache->blob[1] = p; } if (cpu) *cpu = p & 0xfff; diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h index 031ed3780e45..c7372d7a97be 100644 --- a/include/linux/getcpu.h +++ b/include/linux/getcpu.h @@ -1,16 +1,18 @@ #ifndef _LINUX_GETCPU_H #define _LINUX_GETCPU_H 1 -/* Cache for getcpu() to speed it up. Results might be upto a jiffie +/* Cache for getcpu() to speed it up. Results might be a short time out of date, but will be faster. + User programs should not refer to the contents of this structure. - It is only a cache for vgetcpu(). It might change in future kernels. + I repeat they should not refer to it. If they do they will break + in future kernels. + + It is only a private cache for vgetcpu(). It will change in future kernels. The user program must store this information per thread (__thread) If you want 100% accurate information pass NULL instead. */ struct getcpu_cache { - unsigned long t0; - unsigned long t1; - unsigned long res[4]; + unsigned long blob[128 / sizeof(long)]; }; #endif diff --git a/kernel/sys.c b/kernel/sys.c index 8647061c084a..b88806c66244 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2083,12 +2083,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, * padding */ unsigned long t0, t1; - get_user(t0, &cache->t0); - get_user(t1, &cache->t1); + get_user(t0, &cache->blob[0]); + get_user(t1, &cache->blob[1]); t0++; t1++; - put_user(t0, &cache->t0); - put_user(t1, &cache->t1); + put_user(t0, &cache->blob[0]); + put_user(t1, &cache->blob[1]); } return err ? -EFAULT : 0; } -- cgit v1.2.3-71-gd317 From 95d4e6be25a68cd9fbe8c0d356b585504d8db1c7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 29 Sep 2006 17:05:05 -0700 Subject: [NetLabel]: audit fixups due to delayed feedback Fix some issues Steve Grubb had with the way NetLabel was using the audit subsystem. This should make NetLabel more consistent with other kernel generated audit messages specifying configuration changes. Signed-off-by: Paul Moore Acked-by: Steve Grubb Signed-off-by: David S. Miller --- include/linux/audit.h | 11 +++-- include/net/cipso_ipv4.h | 4 +- include/net/netlabel.h | 8 +++- net/ipv4/cipso_ipv4.c | 4 +- net/netlabel/netlabel_cipso_v4.c | 48 +++++++++++++--------- net/netlabel/netlabel_domainhash.c | 82 ++++++++++++++++++++------------------ net/netlabel/netlabel_domainhash.h | 8 ++-- net/netlabel/netlabel_mgmt.c | 27 +++++++++---- net/netlabel/netlabel_unlabeled.c | 34 +++++++++++----- net/netlabel/netlabel_user.c | 66 ++++-------------------------- net/netlabel/netlabel_user.h | 16 +++++++- 11 files changed, 157 insertions(+), 151 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 42719d07612a..c3aa09751814 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -95,12 +95,11 @@ #define AUDIT_MAC_POLICY_LOAD 1403 /* Policy file load */ #define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ #define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ -#define AUDIT_MAC_UNLBL_ACCEPT 1406 /* NetLabel: allow unlabeled traffic */ -#define AUDIT_MAC_UNLBL_DENY 1407 /* NetLabel: deny unlabeled traffic */ -#define AUDIT_MAC_CIPSOV4_ADD 1408 /* NetLabel: add CIPSOv4 DOI entry */ -#define AUDIT_MAC_CIPSOV4_DEL 1409 /* NetLabel: del CIPSOv4 DOI entry */ -#define AUDIT_MAC_MAP_ADD 1410 /* NetLabel: add LSM domain mapping */ -#define AUDIT_MAC_MAP_DEL 1411 /* NetLabel: del LSM domain mapping */ +#define AUDIT_MAC_UNLBL_ALLOW 1406 /* NetLabel: allow unlabeled traffic */ +#define AUDIT_MAC_CIPSOV4_ADD 1407 /* NetLabel: add CIPSOv4 DOI entry */ +#define AUDIT_MAC_CIPSOV4_DEL 1408 /* NetLabel: del CIPSOv4 DOI entry */ +#define AUDIT_MAC_MAP_ADD 1409 /* NetLabel: add LSM domain mapping */ +#define AUDIT_MAC_MAP_DEL 1410 /* NetLabel: del LSM domain mapping */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 5d6ae1b2b196..718b4d9c891f 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -129,7 +129,7 @@ extern int cipso_v4_rbm_strictvalid; #ifdef CONFIG_NETLABEL int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); int cipso_v4_doi_walk(u32 *skip_cnt, @@ -145,7 +145,7 @@ static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } static inline int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)) { return 0; diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 190bfdbbdba6..c63a58058e21 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -92,11 +92,17 @@ * */ +/* NetLabel audit information */ +struct netlbl_audit { + u32 secid; + uid_t loginuid; +}; + /* Domain mapping definition struct */ struct netlbl_dom_map; /* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain, u32 audit_secid); +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); /* LSM security attributes */ struct netlbl_lsm_cache { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c4e469ff842d..a8e2e879a647 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -485,7 +485,7 @@ doi_add_failure_rlock: * */ int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)) { struct cipso_v4_doi *doi_def; @@ -506,7 +506,7 @@ int cipso_v4_doi_remove(u32 doi, list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) netlbl_domhsh_remove(dom_iter->domain, - audit_secid); + audit_info); cipso_v4_cache_invalidate(); rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 09986ca962a6..a6ce1d6d5c59 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -384,11 +384,15 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) u32 doi; const char *type_str = "(unknown)"; struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_CIPSOV4_A_DOI] || !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (type) { case CIPSO_V4_MAP_STD: @@ -401,13 +405,14 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) break; } - if (ret_val == 0) { - doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, - NETLINK_CB(skb).sid); - audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str); - audit_log_end(audit_buf); - } + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, + type_str, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); return ret_val; } @@ -668,20 +673,25 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) int ret_val = -EINVAL; u32 doi = 0; struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; - if (info->attrs[NLBL_CIPSOV4_A_DOI]) { - doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - ret_val = cipso_v4_doi_remove(doi, - NETLINK_CB(skb).sid, - netlbl_cipsov4_doi_free); - } + if (!info->attrs[NLBL_CIPSOV4_A_DOI]) + return -EINVAL; - if (ret_val == 0) { - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, - NETLINK_CB(skb).sid); - audit_log_format(audit_buf, " doi=%u", doi); - audit_log_end(audit_buf); - } + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = cipso_v4_doi_remove(doi, + &audit_info, + netlbl_cipsov4_doi_free); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u res=%u", + doi, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); return ret_val; } diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index d64e2ae3b129..af4371d3b459 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -188,7 +188,7 @@ int netlbl_domhsh_init(u32 size) /** * netlbl_domhsh_add - Adds a entry to the domain hash table * @entry: the entry to add - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Adds a new entry to the domain hash table and handles any updates to the @@ -196,7 +196,8 @@ int netlbl_domhsh_init(u32 size) * negative on failure. * */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { int ret_val; u32 bkt; @@ -241,26 +242,26 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) spin_unlock(&netlbl_domhsh_def_lock); } else ret_val = -EINVAL; - if (ret_val == 0) { - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, - audit_secid); - audit_log_format(audit_buf, " domain=%s", audit_domain); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: - audit_log_format(audit_buf, - " protocol=cipsov4 doi=%u", - entry->type_def.cipsov4->doi); - break; - } - audit_log_end(audit_buf); + + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + entry->type_def.cipsov4->doi); + break; } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + rcu_read_unlock(); if (ret_val != 0) { @@ -279,7 +280,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) /** * netlbl_domhsh_add_default - Adds the default entry to the domain hash table * @entry: the entry to add - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Adds a new default entry to the domain hash table and handles any updates @@ -287,15 +288,16 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) * negative on failure. * */ -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { - return netlbl_domhsh_add(entry, audit_secid); + return netlbl_domhsh_add(entry, audit_info); } /** * netlbl_domhsh_remove - Removes an entry from the domain hash table * @domain: the domain to remove - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes an entry from the domain hash table and handles any updates to the @@ -303,7 +305,7 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) * negative on failure. * */ -int netlbl_domhsh_remove(const char *domain, u32 audit_secid) +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) { int ret_val = -ENOENT; struct netlbl_dom_map *entry; @@ -345,18 +347,20 @@ int netlbl_domhsh_remove(const char *domain, u32 audit_secid) ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_def_lock); } - if (ret_val == 0) { - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, - audit_secid); - audit_log_format(audit_buf, " domain=%s", audit_domain); - audit_log_end(audit_buf); + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); + audit_log_format(audit_buf, + " nlbl_domain=%s res=%u", + audit_domain, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + if (ret_val == 0) call_rcu(&entry->rcu, netlbl_domhsh_free_entry); - } remove_return: rcu_read_unlock(); @@ -365,7 +369,7 @@ remove_return: /** * netlbl_domhsh_remove_default - Removes the default entry from the table - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes/resets the default entry for the domain hash table and handles any @@ -373,9 +377,9 @@ remove_return: * success, non-zero on failure. * */ -int netlbl_domhsh_remove_default(u32 audit_secid) +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) { - return netlbl_domhsh_remove(NULL, audit_secid); + return netlbl_domhsh_remove(NULL, audit_info); } /** diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index d50f13cacdca..3689956c3436 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -57,9 +57,11 @@ struct netlbl_dom_map { int netlbl_domhsh_init(u32 size); /* Manipulate the domain hash table */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid); -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid); -int netlbl_domhsh_remove_default(u32 audit_secid); +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 0ac314f18ad1..53c9079ad2c3 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -87,11 +87,14 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) struct netlbl_dom_map *entry = NULL; size_t tmp_size; u32 tmp_val; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN] || !info->attrs[NLBL_MGMT_A_PROTOCOL]) goto add_failure; + netlbl_netlink_auditinfo(skb, &audit_info); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; @@ -108,7 +111,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -125,7 +128,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto add_failure; } - ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add(entry, &audit_info); rcu_read_unlock(); break; default: @@ -156,12 +159,15 @@ add_failure: static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) { char *domain; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN]) return -EINVAL; + netlbl_netlink_auditinfo(skb, &audit_info); + domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); - return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid); + return netlbl_domhsh_remove(domain, &audit_info); } /** @@ -264,10 +270,13 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) int ret_val = -EINVAL; struct netlbl_dom_map *entry = NULL; u32 tmp_val; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) goto adddef_failure; + netlbl_netlink_auditinfo(skb, &audit_info); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; @@ -277,8 +286,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry, - NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -295,8 +303,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto adddef_failure; } - ret_val = netlbl_domhsh_add_default(entry, - NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); rcu_read_unlock(); break; default: @@ -324,7 +331,11 @@ adddef_failure: */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { - return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid); + struct netlbl_audit audit_info; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_domhsh_remove_default(&audit_info); } /** diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index ab36675fee8c..1833ad233b39 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -70,18 +70,25 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { /** * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag * @value: desired value - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Set the value of the unlabeled accept flag to @value. * */ -static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) +static void netlbl_unlabel_acceptflg_set(u8 value, + struct netlbl_audit *audit_info) { + struct audit_buffer *audit_buf; + u8 old_val; + + old_val = atomic_read(&netlabel_unlabel_accept_flg); atomic_set(&netlabel_unlabel_accept_flg, value); - netlbl_audit_nomsg((value ? - AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY), - audit_secid); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, + audit_info); + audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); + audit_log_end(audit_buf); } /* @@ -101,12 +108,13 @@ static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { u8 value; + struct netlbl_audit audit_info; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { - netlbl_unlabel_acceptflg_set(value, - NETLINK_CB(skb).sid); + netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_unlabel_acceptflg_set(value, &audit_info); return 0; } } @@ -250,19 +258,23 @@ int netlbl_unlabel_defconf(void) { int ret_val; struct netlbl_dom_map *entry; - u32 secid; + struct netlbl_audit audit_info; - security_task_getsecid(current, &secid); + /* Only the kernel is allowed to call this function and the only time + * it is called is at bootup before the audit subsystem is reporting + * messages so don't worry to much about these values. */ + security_task_getsecid(current, &audit_info.secid); + audit_info.loginuid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->type = NETLBL_NLTYPE_UNLABELED; - ret_val = netlbl_domhsh_add_default(entry, secid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; - netlbl_unlabel_acceptflg_set(1, secid); + netlbl_unlabel_acceptflg_set(1, &audit_info); return 0; } diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index c2343af584cb..98a416381e61 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -85,7 +85,7 @@ int netlbl_netlink_init(void) /** * netlbl_audit_start_common - Start an audit message * @type: audit message type - * @secid: LSM context ID + * @audit_info: NetLabel audit information * * Description: * Start an audit message using the type specified in @type and fill the audit @@ -93,14 +93,11 @@ int netlbl_netlink_init(void) * a pointer to the audit buffer on success, NULL on failure. * */ -struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info) { struct audit_context *audit_ctx = current->audit_context; struct audit_buffer *audit_buf; - uid_t audit_loginuid; - const char *audit_tty; - char audit_comm[sizeof(current->comm)]; - struct vm_area_struct *vma; char *secctx; u32 secctx_len; @@ -108,60 +105,13 @@ struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) if (audit_buf == NULL) return NULL; - audit_loginuid = audit_get_loginuid(audit_ctx); - if (current->signal && - current->signal->tty && - current->signal->tty->name) - audit_tty = current->signal->tty->name; - else - audit_tty = "(none)"; - get_task_comm(audit_comm, current); + audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid); - audit_log_format(audit_buf, - "netlabel: auid=%u uid=%u tty=%s pid=%d", - audit_loginuid, - current->uid, - audit_tty, - current->pid); - audit_log_format(audit_buf, " comm="); - audit_log_untrustedstring(audit_buf, audit_comm); - if (current->mm) { - down_read(¤t->mm->mmap_sem); - vma = current->mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { - audit_log_d_path(audit_buf, - " exe=", - vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - break; - } - vma = vma->vm_next; - } - up_read(¤t->mm->mmap_sem); - } - - if (secid != 0 && - security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) + if (audit_info->secid != 0 && + security_secid_to_secctx(audit_info->secid, + &secctx, + &secctx_len) == 0) audit_log_format(audit_buf, " subj=%s", secctx); return audit_buf; } - -/** - * netlbl_audit_nomsg - Send an audit message without additional text - * @type: audit message type - * @secid: LSM context ID - * - * Description: - * Send an audit message with only the common NetLabel audit fields. - * - */ -void netlbl_audit_nomsg(int type, u32 secid) -{ - struct audit_buffer *audit_buf; - - audit_buf = netlbl_audit_start_common(type, secid); - audit_log_end(audit_buf); -} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index ab840acfc964..47967ef32964 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -72,13 +72,25 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, NETLBL_PROTO_VERSION); } +/** + * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg + * @skb: the packet + * @audit_info: NetLabel audit information + */ +static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, + struct netlbl_audit *audit_info) +{ + audit_info->secid = NETLINK_CB(skb).sid; + audit_info->loginuid = NETLINK_CB(skb).loginuid; +} + /* NetLabel NETLINK I/O functions */ int netlbl_netlink_init(void); /* NetLabel Audit Functions */ -struct audit_buffer *netlbl_audit_start_common(int type, u32 secid); -void netlbl_audit_nomsg(int type, u32 secid); +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info); #endif -- cgit v1.2.3-71-gd317 From f9317a40c4e09e20ef01601fc9f5de9e6acb5b96 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 29 Sep 2006 17:06:23 -0700 Subject: [BNX2]: Disable MSI on 5706 if AMD 8132 bridge is present. MSI is defined to be 32-bit write. The 5706 does 64-bit MSI writes with byte enables disabled on the unused 32-bit word. This is legal but causes problems on the AMD 8132 which will eventually stop responding after a while. Without this patch, the MSI test done by the driver during open will pass, but MSI will eventually stop working after a few MSIs are written by the device. AMD believes this incompatibility is unique to the 5706, and prefers to locally disable MSI rather than globally disabling it using pci_msi_quirk. Update version to 1.4.45. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 32 ++++++++++++++++++++++++++++++-- include/linux/pci_ids.h | 1 + 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 7fcf015021ec..6b4edb63c4c4 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -56,8 +56,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.4.44" -#define DRV_MODULE_RELDATE "August 10, 2006" +#define DRV_MODULE_VERSION "1.4.45" +#define DRV_MODULE_RELDATE "September 29, 2006" #define RUN_AT(x) (jiffies + (x)) @@ -5805,6 +5805,34 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->cmd_ticks_int = bp->cmd_ticks; } + /* Disable MSI on 5706 if AMD 8132 bridge is found. + * + * MSI is defined to be 32-bit write. The 5706 does 64-bit MSI writes + * with byte enables disabled on the unused 32-bit word. This is legal + * but causes problems on the AMD 8132 which will eventually stop + * responding after a while. + * + * AMD believes this incompatibility is unique to the 5706, and + * prefers to locally disable MSI rather than globally disabling it + * using pci_msi_quirk. + */ + if (CHIP_NUM(bp) == CHIP_NUM_5706 && disable_msi == 0) { + struct pci_dev *amd_8132 = NULL; + + while ((amd_8132 = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_8132_BRIDGE, + amd_8132))) { + u8 rev; + + pci_read_config_byte(amd_8132, PCI_REVISION_ID, &rev); + if (rev >= 0x10 && rev <= 0x13) { + disable_msi = 1; + pci_dev_put(amd_8132); + break; + } + } + } + bp->autoneg = AUTONEG_SPEED | AUTONEG_FLOW_CTRL; bp->req_line_speed = 0; if (bp->phy_flags & PHY_SERDES_FLAG) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b7e85ff045ea..c9ffbc3843d5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -507,6 +507,7 @@ #define PCI_DEVICE_ID_AMD_8151_0 0x7454 #define PCI_DEVICE_ID_AMD_8131_BRIDGE 0x7450 #define PCI_DEVICE_ID_AMD_8131_APIC 0x7451 +#define PCI_DEVICE_ID_AMD_8132_BRIDGE 0x7458 #define PCI_DEVICE_ID_AMD_CS5536_ISA 0x2090 #define PCI_DEVICE_ID_AMD_CS5536_FLASH 0x2091 #define PCI_DEVICE_ID_AMD_CS5536_AUDIO 0x2093 -- cgit v1.2.3-71-gd317 From 1c9d3e72a7164c590437f2ab6c2c4f6da91f1703 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Fri, 29 Sep 2006 17:13:24 -0700 Subject: [ATM]: [lec] header indent, comment and whitespace cleanup Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- include/linux/atmlec.h | 119 ++++++++++++++++++---------------- net/atm/lec.h | 172 +++++++++++++++++++++++++++---------------------- net/atm/lec_arpc.h | 148 ++++++++++++++++++++++-------------------- 3 files changed, 235 insertions(+), 204 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmlec.h b/include/linux/atmlec.h index f267f2442766..6f5a1bab8f50 100644 --- a/include/linux/atmlec.h +++ b/include/linux/atmlec.h @@ -1,9 +1,7 @@ /* - * - * ATM Lan Emulation Daemon vs. driver interface - * - * mkiiskila@yahoo.com + * ATM Lan Emulation Daemon driver interface * + * Marko Kiiskila */ #ifndef _ATMLEC_H_ @@ -13,76 +11,87 @@ #include #include #include + /* ATM lec daemon control socket */ -#define ATMLEC_CTRL _IO('a',ATMIOC_LANE) -#define ATMLEC_DATA _IO('a',ATMIOC_LANE+1) -#define ATMLEC_MCAST _IO('a',ATMIOC_LANE+2) +#define ATMLEC_CTRL _IO('a', ATMIOC_LANE) +#define ATMLEC_DATA _IO('a', ATMIOC_LANE+1) +#define ATMLEC_MCAST _IO('a', ATMIOC_LANE+2) /* Maximum number of LEC interfaces (tweakable) */ #define MAX_LEC_ITF 48 -/* From the total of MAX_LEC_ITF, last NUM_TR_DEVS are reserved for Token Ring. +/* + * From the total of MAX_LEC_ITF, last NUM_TR_DEVS are reserved for Token Ring. * E.g. if MAX_LEC_ITF = 48 and NUM_TR_DEVS = 8, then lec0-lec39 are for * Ethernet ELANs and lec40-lec47 are for Token Ring ELANS. */ #define NUM_TR_DEVS 8 -typedef enum { - l_set_mac_addr, l_del_mac_addr, - l_svc_setup, - l_addr_delete, l_topology_change, - l_flush_complete, l_arp_update, - l_narp_req, /* LANE2 mandates the use of this */ - l_config, l_flush_tran_id, - l_set_lecid, l_arp_xmt, - l_rdesc_arp_xmt, - l_associate_req, - l_should_bridge /* should we bridge this MAC? */ +typedef enum { + l_set_mac_addr, + l_del_mac_addr, + l_svc_setup, + l_addr_delete, + l_topology_change, + l_flush_complete, + l_arp_update, + l_narp_req, /* LANE2 mandates the use of this */ + l_config, + l_flush_tran_id, + l_set_lecid, + l_arp_xmt, + l_rdesc_arp_xmt, + l_associate_req, + l_should_bridge /* should we bridge this MAC? */ } atmlec_msg_type; #define ATMLEC_MSG_TYPE_MAX l_should_bridge struct atmlec_config_msg { - unsigned int maximum_unknown_frame_count; - unsigned int max_unknown_frame_time; - unsigned short max_retry_count; - unsigned int aging_time; - unsigned int forward_delay_time; - unsigned int arp_response_time; - unsigned int flush_timeout; - unsigned int path_switching_delay; - unsigned int lane_version; /* LANE2: 1 for LANEv1, 2 for LANEv2 */ - int mtu; - int is_proxy; + unsigned int maximum_unknown_frame_count; + unsigned int max_unknown_frame_time; + unsigned short max_retry_count; + unsigned int aging_time; + unsigned int forward_delay_time; + unsigned int arp_response_time; + unsigned int flush_timeout; + unsigned int path_switching_delay; + unsigned int lane_version; /* LANE2: 1 for LANEv1, 2 for LANEv2 */ + int mtu; + int is_proxy; }; - + struct atmlec_msg { - atmlec_msg_type type; - int sizeoftlvs; /* LANE2: if != 0, tlvs follow */ - union { - struct { - unsigned char mac_addr[ETH_ALEN]; - unsigned char atm_addr[ATM_ESA_LEN]; - unsigned int flag;/* Topology_change flag, - remoteflag, permanent flag, - lecid, transaction id */ - unsigned int targetless_le_arp; /* LANE2 */ - unsigned int no_source_le_narp; /* LANE2 */ - } normal; - struct atmlec_config_msg config; - struct { - uint16_t lec_id; /* requestor lec_id */ - uint32_t tran_id; /* transaction id */ - unsigned char mac_addr[ETH_ALEN]; /* dst mac addr */ - unsigned char atm_addr[ATM_ESA_LEN]; /* reqestor ATM addr */ - } proxy; - /* For mapping LE_ARP requests to responses. Filled by */ - } content; /* zeppelin, returned by kernel. Used only when proxying */ + atmlec_msg_type type; + int sizeoftlvs; /* LANE2: if != 0, tlvs follow */ + union { + struct { + unsigned char mac_addr[ETH_ALEN]; + unsigned char atm_addr[ATM_ESA_LEN]; + unsigned int flag; /* + * Topology_change flag, + * remoteflag, permanent flag, + * lecid, transaction id + */ + unsigned int targetless_le_arp; /* LANE2 */ + unsigned int no_source_le_narp; /* LANE2 */ + } normal; + struct atmlec_config_msg config; + struct { + uint16_t lec_id; /* requestor lec_id */ + uint32_t tran_id; /* transaction id */ + unsigned char mac_addr[ETH_ALEN]; /* dst mac addr */ + unsigned char atm_addr[ATM_ESA_LEN]; /* reqestor ATM addr */ + } proxy; /* + * For mapping LE_ARP requests to responses. Filled by + * zeppelin, returned by kernel. Used only when proxying + */ + } content; } __ATM_API_ALIGN; struct atmlec_ioc { - int dev_num; - unsigned char atm_addr[ATM_ESA_LEN]; - unsigned char receive; /* 1= receive vcc, 0 = send vcc */ + int dev_num; + unsigned char atm_addr[ATM_ESA_LEN]; + unsigned char receive; /* 1= receive vcc, 0 = send vcc */ }; #endif /* _ATMLEC_H_ */ diff --git a/net/atm/lec.h b/net/atm/lec.h index c22a8bfa1f81..c700472f64df 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -1,14 +1,13 @@ /* - * * Lan Emulation client header file * - * Marko Kiiskila mkiiskila@yahoo.com - * + * Marko Kiiskila */ #ifndef _LEC_H_ #define _LEC_H_ +#include #include #include #include @@ -16,18 +15,18 @@ #define LEC_HEADER_LEN 16 struct lecdatahdr_8023 { - unsigned short le_header; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - unsigned short h_type; + unsigned short le_header; + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_type; }; struct lecdatahdr_8025 { - unsigned short le_header; - unsigned char ac_pad; - unsigned char fc; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + unsigned short le_header; + unsigned char ac_pad; + unsigned char fc; + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; }; #define LEC_MINIMUM_8023_SIZE 62 @@ -44,17 +43,18 @@ struct lecdatahdr_8025 { * */ struct lane2_ops { - int (*resolve)(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); - int (*associate_req)(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); - void (*associate_indicator)(struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs); + int (*resolve) (struct net_device *dev, u8 *dst_mac, int force, + u8 **tlvs, u32 *sizeoftlvs); + int (*associate_req) (struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs); + void (*associate_indicator) (struct net_device *dev, u8 *mac_addr, + u8 *tlvs, u32 sizeoftlvs); }; /* * ATM LAN Emulation supports both LLC & Dix Ethernet EtherType * frames. + * * 1. Dix Ethernet EtherType frames encoded by placing EtherType * field in h_type field. Data follows immediatelly after header. * 2. LLC Data frames whose total length, including LLC field and data, @@ -70,72 +70,88 @@ struct lane2_ops { #define LEC_ARP_TABLE_SIZE 16 struct lec_priv { - struct net_device_stats stats; - unsigned short lecid; /* Lecid of this client */ - struct lec_arp_table *lec_arp_empty_ones; - /* Used for storing VCC's that don't have a MAC address attached yet */ - struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE]; - /* Actual LE ARP table */ - struct lec_arp_table *lec_no_forward; - /* Used for storing VCC's (and forward packets from) which are to - age out by not using them to forward packets. - This is because to some LE clients there will be 2 VCCs. Only - one of them gets used. */ - struct lec_arp_table *mcast_fwds; - /* With LANEv2 it is possible that BUS (or a special multicast server) - establishes multiple Multicast Forward VCCs to us. This list - collects all those VCCs. LANEv1 client has only one item in this - list. These entries are not aged out. */ - spinlock_t lec_arp_lock; - struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ - struct atm_vcc *lecd; - struct timer_list lec_arp_timer; - /* C10 */ - unsigned int maximum_unknown_frame_count; -/* Within the period of time defined by this variable, the client will send - no more than C10 frames to BUS for a given unicast destination. (C11) */ - unsigned long max_unknown_frame_time; -/* If no traffic has been sent in this vcc for this period of time, - vcc will be torn down (C12)*/ - unsigned long vcc_timeout_period; -/* An LE Client MUST not retry an LE_ARP_REQUEST for a - given frame's LAN Destination more than maximum retry count times, - after the first LEC_ARP_REQUEST (C13)*/ - unsigned short max_retry_count; -/* Max time the client will maintain an entry in its arp cache in - absence of a verification of that relationship (C17)*/ - unsigned long aging_time; -/* Max time the client will maintain an entry in cache when - topology change flag is true (C18) */ - unsigned long forward_delay_time; -/* Topology change flag (C19)*/ - int topology_change; -/* Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE - cycle to take (C20)*/ - unsigned long arp_response_time; -/* Time limit ot wait to receive an LE_FLUSH_RESPONSE after the - LE_FLUSH_REQUEST has been sent before taking recover action. (C21)*/ - unsigned long flush_timeout; -/* The time since sending a frame to the bus after which the - LE Client may assume that the frame has been either discarded or - delivered to the recipient (C22) */ - unsigned long path_switching_delay; + struct net_device_stats stats; + unsigned short lecid; /* Lecid of this client */ + struct lec_arp_table *lec_arp_empty_ones; + /* Used for storing VCC's that don't have a MAC address attached yet */ + struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE]; + /* Actual LE ARP table */ + struct lec_arp_table *lec_no_forward; + /* + * Used for storing VCC's (and forward packets from) which are to + * age out by not using them to forward packets. + * This is because to some LE clients there will be 2 VCCs. Only + * one of them gets used. + */ + struct lec_arp_table *mcast_fwds; + /* + * With LANEv2 it is possible that BUS (or a special multicast server) + * establishes multiple Multicast Forward VCCs to us. This list + * collects all those VCCs. LANEv1 client has only one item in this + * list. These entries are not aged out. + */ + spinlock_t lec_arp_lock; + struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ + struct atm_vcc *lecd; + struct timer_list lec_arp_timer; /* C10 */ + unsigned int maximum_unknown_frame_count; + /* + * Within the period of time defined by this variable, the client will send + * no more than C10 frames to BUS for a given unicast destination. (C11) + */ + unsigned long max_unknown_frame_time; + /* + * If no traffic has been sent in this vcc for this period of time, + * vcc will be torn down (C12) + */ + unsigned long vcc_timeout_period; + /* + * An LE Client MUST not retry an LE_ARP_REQUEST for a + * given frame's LAN Destination more than maximum retry count times, + * after the first LEC_ARP_REQUEST (C13) + */ + unsigned short max_retry_count; + /* + * Max time the client will maintain an entry in its arp cache in + * absence of a verification of that relationship (C17) + */ + unsigned long aging_time; + /* + * Max time the client will maintain an entry in cache when + * topology change flag is true (C18) + */ + unsigned long forward_delay_time; /* Topology change flag (C19) */ + int topology_change; + /* + * Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE + * cycle to take (C20) + */ + unsigned long arp_response_time; + /* + * Time limit ot wait to receive an LE_FLUSH_RESPONSE after the + * LE_FLUSH_REQUEST has been sent before taking recover action. (C21) + */ + unsigned long flush_timeout; + /* The time since sending a frame to the bus after which the + * LE Client may assume that the frame has been either discarded or + * delivered to the recipient (C22) + */ + unsigned long path_switching_delay; - u8 *tlvs; /* LANE2: TLVs are new */ - u32 sizeoftlvs; /* The size of the tlv array in bytes */ - int lane_version; /* LANE2 */ - int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ - struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ - int is_proxy; /* bridge between ATM and Ethernet */ - int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */ + u8 *tlvs; /* LANE2: TLVs are new */ + u32 sizeoftlvs; /* The size of the tlv array in bytes */ + int lane_version; /* LANE2 */ + int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ + struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ + int is_proxy; /* bridge between ATM and Ethernet */ + int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */ }; struct lec_vcc_priv { - void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); + void (*old_pop) (struct atm_vcc *vcc, struct sk_buff *skb); int xoff; }; #define LEC_VCC_PRIV(vcc) ((struct lec_vcc_priv *)((vcc)->user_back)) -#endif /* _LEC_H_ */ - +#endif /* _LEC_H_ */ diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h index 397448094648..0230ca148c77 100644 --- a/net/atm/lec_arpc.h +++ b/net/atm/lec_arpc.h @@ -1,92 +1,98 @@ /* * Lec arp cache - * Marko Kiiskila mkiiskila@yahoo.com * + * Marko Kiiskila */ -#ifndef _LEC_ARP_H -#define _LEC_ARP_H +#ifndef _LEC_ARP_H_ +#define _LEC_ARP_H_ #include #include #include #include struct lec_arp_table { - struct lec_arp_table *next; /* Linked entry list */ - unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ - unsigned char mac_addr[ETH_ALEN]; /* Mac address */ - int is_rdesc; /* Mac address is a route descriptor */ - struct atm_vcc *vcc; /* Vcc this entry is attached */ - struct atm_vcc *recv_vcc; /* Vcc we receive data from */ - void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); - /* Push that leads to daemon */ - void (*old_recv_push)(struct atm_vcc *vcc, struct sk_buff *skb); - /* Push that leads to daemon */ - void (*old_close)(struct atm_vcc *vcc); - /* We want to see when this - * vcc gets closed */ - unsigned long last_used; /* For expiry */ - unsigned long timestamp; /* Used for various timestamping - * things: - * 1. FLUSH started - * (status=ESI_FLUSH_PENDING) - * 2. Counting to - * max_unknown_frame_time - * (status=ESI_ARP_PENDING|| - * status=ESI_VC_PENDING) - */ - unsigned char no_tries; /* No of times arp retry has been - tried */ - unsigned char status; /* Status of this entry */ - unsigned short flags; /* Flags for this entry */ - unsigned short packets_flooded; /* Data packets flooded */ - unsigned long flush_tran_id; /* Transaction id in flush protocol */ - struct timer_list timer; /* Arping timer */ - struct lec_priv *priv; /* Pointer back */ + struct lec_arp_table *next; /* Linked entry list */ + unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ + unsigned char mac_addr[ETH_ALEN]; /* Mac address */ + int is_rdesc; /* Mac address is a route descriptor */ + struct atm_vcc *vcc; /* Vcc this entry is attached */ + struct atm_vcc *recv_vcc; /* Vcc we receive data from */ - u8 *tlvs; /* LANE2: Each MAC address can have TLVs */ - u32 sizeoftlvs; /* associated with it. sizeoftlvs tells the */ - /* the length of the tlvs array */ - struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ + void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb); + /* Push that leads to daemon */ + + void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb); + /* Push that leads to daemon */ + + void (*old_close) (struct atm_vcc *vcc); + /* We want to see when this vcc gets closed */ + + unsigned long last_used; /* For expiry */ + unsigned long timestamp; /* Used for various timestamping things: + * 1. FLUSH started + * (status=ESI_FLUSH_PENDING) + * 2. Counting to + * max_unknown_frame_time + * (status=ESI_ARP_PENDING|| + * status=ESI_VC_PENDING) + */ + unsigned char no_tries; /* No of times arp retry has been tried */ + unsigned char status; /* Status of this entry */ + unsigned short flags; /* Flags for this entry */ + unsigned short packets_flooded; /* Data packets flooded */ + unsigned long flush_tran_id; /* Transaction id in flush protocol */ + struct timer_list timer; /* Arping timer */ + struct lec_priv *priv; /* Pointer back */ + u8 *tlvs; + u32 sizeoftlvs; /* + * LANE2: Each MAC address can have TLVs + * associated with it. sizeoftlvs tells the + * the length of the tlvs array + */ + struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ }; -struct tlv { /* LANE2: Template tlv struct for accessing */ - /* the tlvs in the lec_arp_table->tlvs array*/ - u32 type; - u8 length; - u8 value[255]; +/* + * LANE2: Template tlv struct for accessing + * the tlvs in the lec_arp_table->tlvs array + */ +struct tlv { + u32 type; + u8 length; + u8 value[255]; }; /* Status fields */ -#define ESI_UNKNOWN 0 /* - * Next packet sent to this mac address - * causes ARP-request to be sent - */ -#define ESI_ARP_PENDING 1 /* - * There is no ATM address associated with this - * 48-bit address. The LE-ARP protocol is in - * progress. - */ -#define ESI_VC_PENDING 2 /* - * There is a valid ATM address associated with - * this 48-bit address but there is no VC set - * up to that ATM address. The signaling - * protocol is in process. - */ -#define ESI_FLUSH_PENDING 4 /* - * The LEC has been notified of the FLUSH_START - * status and it is assumed that the flush - * protocol is in process. - */ -#define ESI_FORWARD_DIRECT 5 /* - * Either the Path Switching Delay (C22) has - * elapsed or the LEC has notified the Mapping - * that the flush protocol has completed. In - * either case, it is safe to forward packets - * to this address via the data direct VC. - */ +#define ESI_UNKNOWN 0 /* + * Next packet sent to this mac address + * causes ARP-request to be sent + */ +#define ESI_ARP_PENDING 1 /* + * There is no ATM address associated with this + * 48-bit address. The LE-ARP protocol is in + * progress. + */ +#define ESI_VC_PENDING 2 /* + * There is a valid ATM address associated with + * this 48-bit address but there is no VC set + * up to that ATM address. The signaling + * protocol is in process. + */ +#define ESI_FLUSH_PENDING 4 /* + * The LEC has been notified of the FLUSH_START + * status and it is assumed that the flush + * protocol is in process. + */ +#define ESI_FORWARD_DIRECT 5 /* + * Either the Path Switching Delay (C22) has + * elapsed or the LEC has notified the Mapping + * that the flush protocol has completed. In + * either case, it is safe to forward packets + * to this address via the data direct VC. + */ /* Flag values */ #define LEC_REMOTE_FLAG 0x0001 #define LEC_PERMANENT_FLAG 0x0002 -#endif +#endif /* _LEC_ARP_H_ */ -- cgit v1.2.3-71-gd317 From 4aff5e2333c9a1609662f2091f55c3f6fffdad36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Aug 2006 08:44:47 +0200 Subject: [PATCH] Split struct request ->flags into two parts Right now ->flags is a bit of a mess: some are request types, and others are just modifiers. Clean this up by splitting it into ->cmd_type and ->cmd_flags. This allows introduction of generic Linux block message types, useful for sending generic Linux commands to block devices. Signed-off-by: Jens Axboe --- block/as-iosched.c | 2 +- block/elevator.c | 26 +++--- block/ll_rw_blk.c | 101 ++++++++-------------- block/scsi_ioctl.c | 6 +- drivers/block/floppy.c | 4 +- drivers/block/nbd.c | 8 +- drivers/block/paride/pd.c | 2 +- drivers/block/pktcdvd.c | 6 +- drivers/block/xd.c | 2 +- drivers/cdrom/cdrom.c | 2 +- drivers/cdrom/cdu31a.c | 4 +- drivers/ide/ide-cd.c | 69 +++++++-------- drivers/ide/ide-disk.c | 5 +- drivers/ide/ide-dma.c | 2 +- drivers/ide/ide-floppy.c | 14 ++-- drivers/ide/ide-io.c | 36 ++++---- drivers/ide/ide-lib.c | 5 +- drivers/ide/ide-tape.c | 8 +- drivers/ide/ide-taskfile.c | 8 +- drivers/ide/ide.c | 4 +- drivers/ide/legacy/hd.c | 2 +- drivers/md/dm-emc.c | 3 +- drivers/message/i2o/i2o_block.c | 7 +- drivers/mmc/mmc_queue.c | 6 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/s390/block/dasd_diag.c | 2 +- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_fba.c | 2 +- drivers/scsi/aic7xxx_old.c | 4 +- drivers/scsi/ide-scsi.c | 14 ++-- drivers/scsi/pluto.c | 6 +- drivers/scsi/scsi_lib.c | 37 +++++---- drivers/scsi/sd.c | 5 +- drivers/scsi/sun3_NCR5380.c | 2 +- drivers/scsi/sun3_scsi.c | 2 +- drivers/scsi/sun3_scsi_vme.c | 2 +- include/linux/blkdev.h | 180 ++++++++++++++++++++++------------------ include/linux/blktrace_api.h | 2 +- include/scsi/scsi_tcq.h | 2 +- 39 files changed, 295 insertions(+), 301 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index 5da56d48fbd3..ad1cc4077819 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1335,7 +1335,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) arq->state = AS_RQ_NEW; if (rq_data_dir(arq->request) == READ - || (arq->request->flags & REQ_RW_SYNC)) + || (arq->request->cmd_flags & REQ_RW_SYNC)) arq->is_sync = 1; else arq->is_sync = 0; diff --git a/block/elevator.c b/block/elevator.c index 9b72dc7c8a5c..4ac97b642042 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -242,7 +242,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) + if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) break; if (rq->sector >= boundary) { if (pos->sector < boundary) @@ -313,7 +313,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) e->ops->elevator_deactivate_req_fn(q, rq); } - rq->flags &= ~REQ_STARTED; + rq->cmd_flags &= ~REQ_STARTED; elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); } @@ -344,13 +344,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) switch (where) { case ELEVATOR_INSERT_FRONT: - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_BACK: - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; elv_drain_elevator(q); list_add_tail(&rq->queuelist, &q->queue_head); /* @@ -369,7 +369,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) case ELEVATOR_INSERT_SORT: BUG_ON(!blk_fs_request(rq)); - rq->flags |= REQ_SORTED; + rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (q->last_merge == NULL && rq_mergeable(rq)) q->last_merge = rq; @@ -387,7 +387,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) * insertion; otherwise, requests should be requeued * in ordseq order. */ - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; if (q->ordseq == 0) { list_add(&rq->queuelist, &q->queue_head); @@ -429,9 +429,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { if (q->ordcolor) - rq->flags |= REQ_ORDERED_COLOR; + rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { + if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { /* * toggle ordered color */ @@ -452,7 +452,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } - } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) + } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) where = ELEVATOR_INSERT_BACK; if (plug) @@ -493,7 +493,7 @@ struct request *elv_next_request(request_queue_t *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { - if (!(rq->flags & REQ_STARTED)) { + if (!(rq->cmd_flags & REQ_STARTED)) { elevator_t *e = q->elevator; /* @@ -510,7 +510,7 @@ struct request *elv_next_request(request_queue_t *q) * it, a request that has been delayed should * not be passed by new incoming requests */ - rq->flags |= REQ_STARTED; + rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); } @@ -519,7 +519,7 @@ struct request *elv_next_request(request_queue_t *q) q->boundary_rq = NULL; } - if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) + if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) break; ret = q->prep_rq_fn(q, rq); @@ -541,7 +541,7 @@ struct request *elv_next_request(request_queue_t *q) nr_bytes = rq->data_len; blkdev_dequeue_request(rq); - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; end_that_request_chunk(rq, 0, nr_bytes); end_that_request_last(rq, 0); } else { diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 51dc0edf76e0..9b91bb70c5ed 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -382,8 +382,8 @@ unsigned blk_ordered_req_seq(struct request *rq) if (rq == &q->post_flush_rq) return QUEUE_ORDSEQ_POSTFLUSH; - if ((rq->flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) + if ((rq->cmd_flags & REQ_ORDERED_COLOR) == + (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) return QUEUE_ORDSEQ_DRAIN; else return QUEUE_ORDSEQ_DONE; @@ -446,8 +446,8 @@ static void queue_flush(request_queue_t *q, unsigned which) end_io = post_flush_end_io; } + rq->cmd_flags = REQ_HARDBARRIER; rq_init(q, rq); - rq->flags = REQ_HARDBARRIER; rq->elevator_private = NULL; rq->rq_disk = q->bar_rq.rq_disk; rq->rl = NULL; @@ -471,9 +471,11 @@ static inline struct request *start_ordered(request_queue_t *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; + rq->cmd_flags = 0; rq_init(q, rq); - rq->flags = bio_data_dir(q->orig_bar_rq->bio); - rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; + if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) + rq->cmd_flags |= REQ_RW; + rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; rq->elevator_private = NULL; rq->rl = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); @@ -1124,7 +1126,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq) } list_del_init(&rq->queuelist); - rq->flags &= ~REQ_QUEUED; + rq->cmd_flags &= ~REQ_QUEUED; rq->tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) @@ -1160,7 +1162,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq) struct blk_queue_tag *bqt = q->queue_tags; int tag; - if (unlikely((rq->flags & REQ_QUEUED))) { + if (unlikely((rq->cmd_flags & REQ_QUEUED))) { printk(KERN_ERR "%s: request %p for device [%s] already tagged %d", __FUNCTION__, rq, @@ -1174,7 +1176,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq) __set_bit(tag, bqt->tag_map); - rq->flags |= REQ_QUEUED; + rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; blkdev_dequeue_request(rq); @@ -1210,65 +1212,31 @@ void blk_queue_invalidate_tags(request_queue_t *q) printk(KERN_ERR "%s: bad tag found on list\n", __FUNCTION__); list_del_init(&rq->queuelist); - rq->flags &= ~REQ_QUEUED; + rq->cmd_flags &= ~REQ_QUEUED; } else blk_queue_end_tag(q, rq); - rq->flags &= ~REQ_STARTED; + rq->cmd_flags &= ~REQ_STARTED; __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); } } EXPORT_SYMBOL(blk_queue_invalidate_tags); -static const char * const rq_flags[] = { - "REQ_RW", - "REQ_FAILFAST", - "REQ_SORTED", - "REQ_SOFTBARRIER", - "REQ_HARDBARRIER", - "REQ_FUA", - "REQ_CMD", - "REQ_NOMERGE", - "REQ_STARTED", - "REQ_DONTPREP", - "REQ_QUEUED", - "REQ_ELVPRIV", - "REQ_PC", - "REQ_BLOCK_PC", - "REQ_SENSE", - "REQ_FAILED", - "REQ_QUIET", - "REQ_SPECIAL", - "REQ_DRIVE_CMD", - "REQ_DRIVE_TASK", - "REQ_DRIVE_TASKFILE", - "REQ_PREEMPT", - "REQ_PM_SUSPEND", - "REQ_PM_RESUME", - "REQ_PM_SHUTDOWN", - "REQ_ORDERED_COLOR", -}; - void blk_dump_rq_flags(struct request *rq, char *msg) { int bit; - printk("%s: dev %s: flags = ", msg, - rq->rq_disk ? rq->rq_disk->disk_name : "?"); - bit = 0; - do { - if (rq->flags & (1 << bit)) - printk("%s ", rq_flags[bit]); - bit++; - } while (bit < __REQ_NR_BITS); + printk("%s: dev %s: type=%x, flags=%x\n", msg, + rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, + rq->cmd_flags); printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); - if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { + if (blk_pc_request(rq)) { printk("cdb: "); for (bit = 0; bit < sizeof(rq->cmd); bit++) printk("%02x ", rq->cmd[bit]); @@ -1441,7 +1409,7 @@ static inline int ll_new_mergeable(request_queue_t *q, int nr_phys_segs = bio_phys_segments(q, bio); if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1464,7 +1432,7 @@ static inline int ll_new_hw_segment(request_queue_t *q, if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1491,7 +1459,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, max_sectors = q->max_sectors; if (req->nr_sectors + bio_sectors(bio) > max_sectors) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1530,7 +1498,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req, if (req->nr_sectors + bio_sectors(bio) > max_sectors) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -2029,7 +1997,7 @@ EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(request_queue_t *q, struct request *rq) { - if (rq->flags & REQ_ELVPRIV) + if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); } @@ -2044,17 +2012,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, return NULL; /* - * first three bits are identical in rq->flags and bio->bi_rw, + * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h */ - rq->flags = rw; + rq->cmd_flags = rw; if (priv) { if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { mempool_free(rq, q->rq.rq_pool); return NULL; } - rq->flags |= REQ_ELVPRIV; + rq->cmd_flags |= REQ_ELVPRIV; } return rq; @@ -2351,7 +2319,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq, * must not attempt merges on this) and that it acts as a soft * barrier */ - rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER; + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = data; @@ -2558,7 +2527,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; rq->rq_disk = bd_disk; - rq->flags |= REQ_NOMERGE; + rq->cmd_flags |= REQ_NOMERGE; rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); @@ -2728,7 +2697,7 @@ void __blk_put_request(request_queue_t *q, struct request *req) */ if (rl) { int rw = rq_data_dir(req); - int priv = req->flags & REQ_ELVPRIV; + int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); @@ -2890,22 +2859,22 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq) static void init_request_from_bio(struct request *req, struct bio *bio) { - req->flags |= REQ_CMD; + req->cmd_type = REQ_TYPE_FS; /* * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) */ if (bio_rw_ahead(bio) || bio_failfast(bio)) - req->flags |= REQ_FAILFAST; + req->cmd_flags |= REQ_FAILFAST; /* * REQ_BARRIER implies no merging, but lets make it explicit */ if (unlikely(bio_barrier(bio))) - req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) - req->flags |= REQ_RW_SYNC; + req->cmd_flags |= REQ_RW_SYNC; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; @@ -3306,7 +3275,7 @@ static int __end_that_request_first(struct request *req, int uptodate, req->errors = 0; if (!uptodate) { - if (blk_fs_request(req) && !(req->flags & REQ_QUIET)) + if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) printk("end_request: I/O error, dev %s, sector %llu\n", req->rq_disk ? req->rq_disk->disk_name : "?", (unsigned long long)req->sector); @@ -3569,8 +3538,8 @@ EXPORT_SYMBOL(end_request); void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) { - /* first two bits are identical in rq->flags and bio->bi_rw */ - rq->flags |= (bio->bi_rw & 3); + /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ + rq->cmd_flags |= (bio->bi_rw & 3); rq->nr_phys_segments = bio_phys_segments(q, bio); rq->nr_hw_segments = bio_hw_segments(q, bio); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index b33eda26e205..2dc326421a24 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q, rq->sense = sense; rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; bio = rq->bio; /* @@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, memset(sense, 0, sizeof(sense)); rq->sense = sense; rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; blk_execute_rq(q, disk, rq, 0); @@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c int err; rq = blk_get_request(q, WRITE, __GFP_WAIT); - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; rq->data_len = 0; rq->timeout = BLK_DEFAULT_TIMEOUT; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ad1d7065a1b2..629c5769d994 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q) if (usage_count == 0) { printk("warning: usage count=0, current_req=%p exiting\n", current_req); - printk("sect=%ld flags=%lx\n", (long)current_req->sector, - current_req->flags); + printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector, + current_req->cmd_type, current_req->cmd_flags); return; } if (test_bit(0, &fdc_busy)) { diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index bdbade9a5cf5..9d1035e8d9d8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q) struct nbd_device *lo; blkdev_dequeue_request(req); - dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n", - req->rq_disk->disk_name, req, req->flags); + dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", + req->rq_disk->disk_name, req, req->cmd_type); - if (!(req->flags & REQ_CMD)) + if (!blk_fs_request(req)) goto error_out; lo = req->rq_disk->private_data; @@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, switch (cmd) { case NBD_DISCONNECT: printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); - sreq.flags = REQ_SPECIAL; + sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; /* * Set these to sane values in case server implementation diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 2403721f9db1..12ff1a274d91 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -437,7 +437,7 @@ static char *pd_buf; /* buffer for request in progress */ static enum action do_pd_io_start(void) { - if (pd_req->flags & REQ_SPECIAL) { + if (blk_special_request(pd_req)) { phase = pd_special; return pd_special(); } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 451b996bba91..42891d2b054e 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -365,16 +365,16 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->sense = sense; memset(sense, 0, sizeof(sense)); rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_flags |= REQ_HARDBARRIER; if (cgc->quiet) - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); rq->ref_count++; - rq->flags |= REQ_NOMERGE; rq->waiting = &wait; rq->end_io = blk_end_sync_rq; elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); diff --git a/drivers/block/xd.c b/drivers/block/xd.c index e828e4cbd3e1..ebf3025721d1 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q) int res = 0; int retry; - if (!(req->flags & REQ_CMD)) { + if (!blk_fs_request(req)) { end_request(req, 0); continue; } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d239cf8b20bd..b38c84a7a8e3 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, rq->cmd[9] = 0xf8; rq->cmd_len = 12; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->timeout = 60 * HZ; bio = rq->bio; diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c index 37bdb0163f0d..ccd91c1a84bd 100644 --- a/drivers/cdrom/cdu31a.c +++ b/drivers/cdrom/cdu31a.c @@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q) } /* WTF??? */ - if (!(req->flags & REQ_CMD)) + if (!blk_fs_request(req)) { + end_request(req, 0); continue; + } if (rq_data_dir(req) == WRITE) { end_request(req, 0); continue; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 654d4cd09847..69bbb6206a00 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq, { int log = 0; - if (!sense || !rq || (rq->flags & REQ_QUIET)) + if (!sense || !rq || (rq->cmd_flags & REQ_QUIET)) return 0; switch (sense->sense_key) { @@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq) struct cdrom_info *cd = drive->driver_data; ide_init_drive_cmd(rq); - rq->flags = REQ_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->rq_disk = cd->disk; } @@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, rq->cmd[0] = GPCMD_REQUEST_SENSE; rq->cmd[4] = rq->data_len = 18; - rq->flags = REQ_SENSE; + rq->cmd_type = REQ_TYPE_SENSE; /* NOTE! Save the failed command in "rq->buffer" */ rq->buffer = (void *) failed_command; @@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate) struct request *rq = HWGROUP(drive)->rq; int nsectors = rq->hard_cur_sectors; - if ((rq->flags & REQ_SENSE) && uptodate) { + if (blk_sense_request(rq) && uptodate) { /* - * For REQ_SENSE, "rq->buffer" points to the original failed - * request + * For REQ_TYPE_SENSE, "rq->buffer" points to the original + * failed request */ struct request *failed = (struct request *) rq->buffer; struct cdrom_info *info = drive->driver_data; @@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) return 1; } - if (rq->flags & REQ_SENSE) { + if (blk_sense_request(rq)) { /* We got an error trying to get sense info from the drive (probably while trying to recover from a former error). Just give up. */ - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); ide_error(drive, "request sense failure", stat); return 1; - } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) { + } else if (blk_pc_request(rq)) { /* All other functions, except for READ. */ unsigned long flags; @@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) * if we have an error, pass back CHECK_CONDITION as the * scsi status byte */ - if ((rq->flags & REQ_BLOCK_PC) && !rq->errors) + if (!rq->errors) rq->errors = SAM_STAT_CHECK_CONDITION; /* Check for tray open. */ @@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) cdrom_saw_media_change (drive); /*printk("%s: media changed\n",drive->name);*/ return 0; - } else if (!(rq->flags & REQ_QUIET)) { + } else if (!(rq->cmd_flags & REQ_QUIET)) { /* Otherwise, print an error. */ ide_dump_status(drive, "packet command error", stat); } - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; /* * instead of playing games with moving completions around, @@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive) wait = ATAPI_WAIT_PC; break; default: - if (!(rq->flags & REQ_QUIET)) + if (!(rq->cmd_flags & REQ_QUIET)) printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]); wait = 0; break; @@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive) if (rq->current_nr_sectors > 0) { printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n", drive->name, rq->current_nr_sectors); - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); } else cdrom_end_request(drive, 1); @@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive) printk ("%s: cdrom_pc_intr: data underrun %d\n", drive->name, pc->buflen); */ - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); } return ide_stopped; @@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive) rq->data += thislen; rq->data_len -= thislen; - if (rq->flags & REQ_SENSE) + if (blk_sense_request(rq)) rq->sense_len += thislen; } else { confused: @@ -1517,7 +1517,7 @@ confused: "appears confused (ireason = 0x%02x). " "Trying to recover by ending request.\n", drive->name, ireason); - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); return ide_stopped; } @@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive) struct cdrom_info *info = drive->driver_data; info->dma = 0; - rq->flags &= ~REQ_FAILED; + rq->cmd_flags &= ~REQ_FAILED; len = rq->data_len; /* Start sending the command to the drive. */ @@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq) { struct request_sense sense; int retries = 10; - unsigned int flags = rq->flags; + unsigned int flags = rq->cmd_flags; if (rq->sense == NULL) rq->sense = &sense; @@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq) do { int error; unsigned long time = jiffies; - rq->flags = flags; + rq->cmd_flags = flags; error = ide_do_drive_cmd(drive, rq, ide_wait); time = jiffies - time; /* FIXME: we should probably abort/retry or something * in case of failure */ - if (rq->flags & REQ_FAILED) { + if (rq->cmd_flags & REQ_FAILED) { /* The request failed. Retry if it was due to a unit attention status (usually means media was changed). */ @@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq) } /* End of retry loop. */ - } while ((rq->flags & REQ_FAILED) && retries >= 0); + } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0); /* Return an error if the command failed. */ - return (rq->flags & REQ_FAILED) ? -EIO : 0; + return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0; } /* @@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { struct cdrom_info *info = drive->driver_data; - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; info->dma = 0; @@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block) } info->last_block = block; return action; - } else if (rq->flags & (REQ_PC | REQ_SENSE)) { + } else if (rq->cmd_type == REQ_TYPE_SENSE) { return cdrom_do_packet_command(drive); - } else if (rq->flags & REQ_BLOCK_PC) { + } else if (blk_pc_request(rq)) { return cdrom_do_block_pc(drive, rq); - } else if (rq->flags & REQ_SPECIAL) { + } else if (blk_special_request(rq)) { /* * right now this can only be a reset... */ @@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) req.sense = sense; req.cmd[0] = GPCMD_TEST_UNIT_READY; - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; #if ! STANDARD_ATAPI /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to @@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, req.cmd[0] = GPCMD_READ_CDVD_CAPACITY; req.data = (char *)&capbuf; req.data_len = sizeof(capbuf); - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; stat = cdrom_queue_packet_command(drive, &req); if (stat == 0) { @@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag, req.sense = sense; req.data = buf; req.data_len = buflen; - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; req.cmd[6] = trackno; req.cmd[7] = (buflen >> 8); @@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi, req.timeout = cgc->timeout; if (cgc->quiet) - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; req.sense = cgc->sense; cgc->stat = cdrom_queue_packet_command(drive, &req); @@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi) int ret; cdrom_prepare_request(drive, &req); - req.flags = REQ_SPECIAL | REQ_QUIET; + req.cmd_type = REQ_TYPE_SPECIAL; + req.cmd_flags = REQ_QUIET; ret = ide_do_drive_cmd(drive, &req, ide_wait); /* @@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq) static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq) { - if (rq->flags & REQ_CMD) + if (blk_fs_request(rq)) return ide_cdrom_prep_fs(q, rq); - else if (rq->flags & REQ_BLOCK_PC) + else if (blk_pc_request(rq)) return ide_cdrom_prep_pc(rq); return 0; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7cf3eb023521..0a05a377d66a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq) rq->cmd[0] = WIN_FLUSH_CACHE; - rq->flags |= REQ_DRIVE_TASK; + rq->cmd_type = REQ_TYPE_ATA_TASK; + rq->cmd_flags |= REQ_SOFTBARRIER; rq->buffer = rq->cmd; } @@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg) if (drive->special.b.set_multmode) return -EBUSY; ide_init_drive_cmd (&rq); - rq.flags = REQ_DRIVE_CMD; + rq.cmd_type = REQ_TYPE_ATA_CMD; drive->mult_req = arg; drive->special.b.set_multmode = 1; (void) ide_do_drive_cmd (drive, &rq, ide_wait); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 7c3a13e1cf64..c3546fe9af63 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq) ide_hwif_t *hwif = HWIF(drive); struct scatterlist *sg = hwif->sg_table; - BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256); + BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256); ide_map_sg(drive, rq); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index adbe9f76a505..0edc32204915 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs) /* Why does this happen? */ if (!rq) return 0; - if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) { + if (!blk_special_request(rq)) { /* our real local end request function */ ide_end_request(drive, uptodate, nsecs); return 0; @@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc ide_init_drive_cmd(rq); rq->buffer = (char *) pc; - rq->flags = REQ_SPECIAL; //rq->cmd = IDEFLOPPY_PC_RQ; + rq->cmd_type = REQ_TYPE_SPECIAL; rq->rq_disk = floppy->disk; (void) ide_do_drive_cmd(drive, rq, ide_preempt); } @@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t pc->callback = &idefloppy_rw_callback; pc->rq = rq; pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; - if (rq->flags & REQ_RW) + if (rq->cmd_flags & REQ_RW) set_bit(PC_WRITING, &pc->flags); pc->buffer = NULL; pc->request_transfer = pc->buffer_size = blocks * floppy->block_size; @@ -1303,7 +1303,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request idefloppy_do_end_request(drive, 0, 0); return ide_stopped; } - if (rq->flags & REQ_CMD) { + if (blk_fs_request(rq)) { if (((long)rq->sector % floppy->bs_factor) || (rq->nr_sectors % floppy->bs_factor)) { printk("%s: unsupported r/w request size\n", @@ -1313,9 +1313,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request } pc = idefloppy_next_pc_storage(drive); idefloppy_create_rw_cmd(floppy, pc, rq, block); - } else if (rq->flags & REQ_SPECIAL) { + } else if (blk_special_request(rq)) { pc = (idefloppy_pc_t *) rq->buffer; - } else if (rq->flags & REQ_BLOCK_PC) { + } else if (blk_pc_request(rq)) { pc = idefloppy_next_pc_storage(drive); if (idefloppy_blockpc_cmd(floppy, pc, rq)) { idefloppy_do_end_request(drive, 0, 0); @@ -1343,7 +1343,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc) ide_init_drive_cmd (&rq); rq.buffer = (char *) pc; - rq.flags = REQ_SPECIAL; // rq.cmd = IDEFLOPPY_PC_RQ; + rq.cmd_type = REQ_TYPE_SPECIAL; rq.rq_disk = floppy->disk; return ide_do_drive_cmd(drive, &rq, ide_wait); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index fb6795236e76..3436b1f104eb 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, { int ret = 1; - BUG_ON(!(rq->flags & REQ_STARTED)); + BUG_ON(!blk_rq_started(rq)); /* * if failfast is set on a request, override number of sectors and @@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq, spin_lock_irqsave(&ide_lock, flags); - BUG_ON(!(rq->flags & REQ_STARTED)); + BUG_ON(!blk_rq_started(rq)); /* * if failfast is set on a request, override number of sectors and @@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) rq = HWGROUP(drive)->rq; spin_unlock_irqrestore(&ide_lock, flags); - if (rq->flags & REQ_DRIVE_CMD) { + if (rq->cmd_type == REQ_TYPE_ATA_CMD) { u8 *args = (u8 *) rq->buffer; if (rq->errors == 0) rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); @@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) args[1] = err; args[2] = hwif->INB(IDE_NSECTOR_REG); } - } else if (rq->flags & REQ_DRIVE_TASK) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) { u8 *args = (u8 *) rq->buffer; if (rq->errors == 0) rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); @@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) args[5] = hwif->INB(IDE_HCYL_REG); args[6] = hwif->INB(IDE_SELECT_REG); } - } else if (rq->flags & REQ_DRIVE_TASKFILE) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *args = (ide_task_t *) rq->special; if (rq->errors == 0) rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); @@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat) return ide_stopped; /* retry only "normal" I/O: */ - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) { + if (!blk_fs_request(rq)) { rq->errors = 1; ide_end_drive_cmd(drive, stat, err); return ide_stopped; @@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg) return ide_stopped; /* retry only "normal" I/O: */ - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) { + if (!blk_fs_request(rq)) { rq->errors = 1; ide_end_drive_cmd(drive, BUSY_STAT, 0); return ide_stopped; @@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq) if (hwif->sg_mapped) /* needed by ide-scsi */ return; - if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) { + if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) { hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } else { sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); @@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { ide_hwif_t *hwif = HWIF(drive); - if (rq->flags & REQ_DRIVE_TASKFILE) { + if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *args = rq->special; if (!args) @@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, if (args->tf_out_flags.all != 0) return flagged_taskfile(drive, args); return do_rw_taskfile(drive, args); - } else if (rq->flags & REQ_DRIVE_TASK) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) { u8 *args = rq->buffer; u8 sel; @@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, hwif->OUTB(sel, IDE_SELECT_REG); ide_cmd(drive, args[0], args[2], &drive_cmd_intr); return ide_started; - } else if (rq->flags & REQ_DRIVE_CMD) { + } else if (rq->cmd_type == REQ_TYPE_ATA_CMD) { u8 *args = rq->buffer; if (!args) @@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) ide_startstop_t startstop; sector_t block; - BUG_ON(!(rq->flags & REQ_STARTED)); + BUG_ON(!blk_rq_started(rq)); #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", @@ -1013,9 +1013,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (!drive->special.all) { ide_driver_t *drv; - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) - return execute_drive_cmd(drive, rq); - else if (rq->flags & REQ_DRIVE_TASKFILE) + if (rq->cmd_type == REQ_TYPE_ATA_CMD || + rq->cmd_type == REQ_TYPE_ATA_TASK || + rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { struct request_pm_state *pm = rq->end_io_data; @@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) * We count how many times we loop here to make sure we service * all drives in the hwgroup without looping for ever */ - if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) { + if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) { drive = drive->next ? drive->next : hwgroup->drive; if (loops++ < 4 && !blk_queue_plugged(drive->queue)) goto again; @@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs) void ide_init_drive_cmd (struct request *rq) { memset(rq, 0, sizeof(*rq)); - rq->flags = REQ_DRIVE_CMD; + rq->cmd_type = REQ_TYPE_ATA_CMD; rq->ref_count = 1; } @@ -1727,7 +1727,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio hwgroup->rq = NULL; if (action == ide_preempt || action == ide_head_wait) { where = ELEVATOR_INSERT_FRONT; - rq->flags |= REQ_PREEMPT; + rq->cmd_flags |= REQ_PREEMPT; } __elv_add_request(drive->queue, rq, where, 0); ide_do_request(hwgroup, IDE_NO_IRQ); diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 1feff23487d4..850ef63cc986 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive) spin_unlock(&ide_lock); if (!rq) return; - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { + if (rq->cmd_type == REQ_TYPE_ATA_CMD || + rq->cmd_type == REQ_TYPE_ATA_TASK) { char *args = rq->buffer; if (args) { opcode = args[0]; found = 1; } - } else if (rq->flags & REQ_DRIVE_TASKFILE) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *args = rq->special; if (args) { task_struct_t *tf = (task_struct_t *) args->tfRegister; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 7067ab997927..643e4b9ac651 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc) static void idetape_init_rq(struct request *rq, u8 cmd) { memset(rq, 0, sizeof(*rq)); - rq->flags = REQ_SPECIAL; + rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[0] = cmd; } @@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, rq->sector, rq->nr_sectors, rq->current_nr_sectors); #endif /* IDETAPE_DEBUG_LOG */ - if ((rq->flags & REQ_SPECIAL) == 0) { + if (!blk_special_request(rq)) { /* * We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " - "request queue (%ld)\n", drive->name, rq->flags); + "request queue (%d)\n", drive->name, rq->cmd_type); ide_end_request(drive, 0, 0); return ide_stopped; } @@ -2768,7 +2768,7 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq) idetape_tape_t *tape = drive->driver_data; #if IDETAPE_DEBUG_BUGS - if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) { + if (rq == NULL || !blk_special_request(rq)) { printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n"); return; } diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 97a9244312fc..1d0470c1f957 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq, static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat) { - if (rq->flags & REQ_DRIVE_TASKFILE) { + if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *task = rq->special; if (task->tf_out_flags.all) { @@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long struct request rq; memset(&rq, 0, sizeof(rq)); - rq.flags = REQ_DRIVE_TASKFILE; + rq.cmd_type = REQ_TYPE_ATA_TASKFILE; rq.buffer = buf; /* @@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors; if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE) - rq.flags |= REQ_RW; + rq.cmd_flags |= REQ_RW; } rq.special = args; @@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf) struct request rq; ide_init_drive_cmd(&rq); - rq.flags = REQ_DRIVE_TASK; + rq.cmd_type = REQ_TYPE_ATA_TASK; rq.buffer = buf; return ide_do_drive_cmd(drive, &rq, ide_wait); } diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 9c8468de1a75..9384a3fdde6c 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1217,7 +1217,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&rq, 0, sizeof(rq)); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); - rq.flags = REQ_PM_SUSPEND; + rq.cmd_type = REQ_TYPE_PM_SUSPEND; rq.special = &args; rq.end_io_data = &rqpm; rqpm.pm_step = ide_pm_state_start_suspend; @@ -1238,7 +1238,7 @@ static int generic_ide_resume(struct device *dev) memset(&rq, 0, sizeof(rq)); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); - rq.flags = REQ_PM_RESUME; + rq.cmd_type = REQ_TYPE_PM_RESUME; rq.special = &args; rq.end_io_data = &rqpm; rqpm.pm_step = ide_pm_state_start_resume; diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c index aebecd8f51cc..4ab931145673 100644 --- a/drivers/ide/legacy/hd.c +++ b/drivers/ide/legacy/hd.c @@ -626,7 +626,7 @@ repeat: req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ", cyl, head, sec, nsect, req->buffer); #endif - if (req->flags & REQ_CMD) { + if (blk_fs_request(req)) { switch (rq_data_dir(req)) { case READ: hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr); diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 2a374ccb30dd..2b2d45d7baaa 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h, memset(&rq->cmd, 0, BLK_MAX_CDB); rq->timeout = EMC_FAILOVER_TIMEOUT; - rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE); + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; return rq; } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 1ddc2fb429d5..eaba81bf2eca 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req) } /* request is already processed by us, so return */ - if (req->flags & REQ_SPECIAL) { + if (blk_special_request(req)) { osm_debug("REQ_SPECIAL already set!\n"); - req->flags |= REQ_DONTPREP; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; } @@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req) ireq = req->special; /* do not come back here */ - req->flags |= REQ_DONTPREP | REQ_SPECIAL; + req->cmd_type = REQ_TYPE_SPECIAL; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; }; diff --git a/drivers/mmc/mmc_queue.c b/drivers/mmc/mmc_queue.c index 74f8cdeeff0f..4ccdd82b680f 100644 --- a/drivers/mmc/mmc_queue.c +++ b/drivers/mmc/mmc_queue.c @@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) struct mmc_queue *mq = q->queuedata; int ret = BLKPREP_KILL; - if (req->flags & REQ_SPECIAL) { + if (blk_special_request(req)) { /* * Special commands already have the command * blocks already setup in req->special. @@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) BUG_ON(!req->special); ret = BLKPREP_OK; - } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { + } else if (blk_fs_request(req) || blk_pc_request(req)) { /* * Block I/O requests need translating according * to the protocol. @@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) } if (ret == BLKPREP_OK) - req->flags |= REQ_DONTPREP; + req->cmd_flags |= REQ_DONTPREP; return ret; } diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 458d3c8ae1ee..6baf5fe14230 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, nsect = req->current_nr_sectors; buf = req->buffer; - if (!(req->flags & REQ_CMD)) + if (!blk_fs_request(req)) return 0; if (block + nsect > get_capacity(req->rq_disk)) diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 9d051e5687ea..222a8a71a5e8 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req) } cqr->retries = DIAG_MAX_RETRIES; cqr->buildclk = get_clock(); - if (req->flags & REQ_FAILFAST) + if (req->cmd_flags & REQ_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = DIAG_TIMEOUT; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index b7a7fac3f7c3..5ecea3e4fdef 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) recid++; } } - if (req->flags & REQ_FAILFAST) + if (req->cmd_flags & REQ_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = 5 * 60 * HZ; /* 5 minutes */ diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index e85015be109b..80926c548228 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) recid++; } } - if (req->flags & REQ_FAILFAST) + if (req->cmd_flags & REQ_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = 5 * 60 * HZ; /* 5 minutes */ diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 5dcef48d414f..10353379a074 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb) aic_dev->r_total++; ptr = aic_dev->r_bins; } - if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER) + if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER) { aic_dev->barrier_total++; if(scb->tag_action == MSG_ORDERED_Q_TAG) @@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd, /* We always force TEST_UNIT_READY to untagged */ if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) { - if (req->flags & REQ_HARDBARRIER) + if (req->cmd_flags & REQ_HARDBARRIER) { if(sdptr->ordered_tags) { diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 94d1de55607f..65b19695ebe2 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co pc->buffer = buf; pc->c[0] = REQUEST_SENSE; pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE; - rq->flags = REQ_SENSE; + rq->cmd_type = REQ_TYPE_SENSE; pc->timeout = jiffies + WAIT_READY; /* NOTE! Save the failed packet command in "rq->buffer" */ rq->buffer = (void *) failed_command->special; @@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs) int errors = rq->errors; unsigned long flags; - if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) { + if (!blk_special_request(rq) && !blk_sense_request(rq)) { ide_end_request(drive, uptodate, nrsecs); return 0; } ide_end_drive_cmd (drive, 0, 0); - if (rq->flags & REQ_SENSE) { + if (blk_sense_request(rq)) { idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer; if (log) { printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number); @@ -712,7 +712,7 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); #endif /* IDESCSI_DEBUG_LOG */ - if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) { + if (blk_sense_request(rq) || blk_special_request(rq)) { return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special); } blk_dump_rq_flags(rq, "ide-scsi: unsup command"); @@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd, ide_init_drive_cmd (rq); rq->special = (char *) pc; - rq->flags = REQ_SPECIAL; + rq->cmd_type = REQ_TYPE_SPECIAL; spin_unlock_irq(host->host_lock); rq->rq_disk = scsi->disk; (void) ide_do_drive_cmd (drive, rq, ide_end); @@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd) */ printk (KERN_ERR "ide-scsi: cmd aborted!\n"); - if (scsi->pc->rq->flags & REQ_SENSE) + if (blk_sense_request(scsi->pc->rq)) kfree(scsi->pc->buffer); kfree(scsi->pc->rq); kfree(scsi->pc); @@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd) /* kill current request */ blkdev_dequeue_request(req); end_that_request_last(req, 0); - if (req->flags & REQ_SENSE) + if (blk_sense_request(req)) kfree(scsi->pc->buffer); kfree(scsi->pc); scsi->pc = NULL; diff --git a/drivers/scsi/pluto.c b/drivers/scsi/pluto.c index 0bd9c60e6455..aa60a5f1fbc3 100644 --- a/drivers/scsi/pluto.c +++ b/drivers/scsi/pluto.c @@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt) static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt) { - SCpnt->request->rq_status = RQ_SCSI_DONE; PLND(("Detect done %08lx\n", (long)SCpnt)) if (atomic_dec_and_test (&fcss)) up(&fc_sem); @@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt) SCpnt->cmd_len = COMMAND_SIZE(INQUIRY); - SCpnt->request->rq_status = RQ_SCSI_BUSY; + SCpnt->request->cmd_flags &= ~REQ_STARTED; SCpnt->done = pluto_detect_done; SCpnt->request_bufflen = 256; @@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt) for (retry = 0; retry < 5; retry++) { for (i = 0; i < fcscount; i++) { if (!fcs[i].fc) break; - if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) { + if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) { + fcs[i].cmd.request->cmd_flags |= REQ_STARTED; disable_irq(fcs[i].fc->irq); PLND(("queuecommand %d %d\n", retry, i)) fcp_scsi_queuecommand (&(fcs[i].cmd), diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d6743b959a72..71084728eb42 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req) { struct scsi_cmnd *cmd = req->special; - req->flags &= ~REQ_DONTPREP; + req->cmd_flags &= ~REQ_DONTPREP; req->special = NULL; scsi_put_command(cmd); @@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req->sense_len = 0; req->retries = retries; req->timeout = timeout; - req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT; /* * head injection *required* here otherwise quiesce won't work @@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, write, gfp); if (!req) goto free_sense; - req->flags |= REQ_BLOCK_PC | REQ_QUIET; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= REQ_QUIET; if (use_sg) err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp); @@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) break; } } - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "Device not ready: "); scsi_print_sense_hdr("", &sshdr); @@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_end_request(cmd, 0, this_count, 1); return; case VOLUME_OVERFLOW: - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "Volume overflow, CDB: "); __scsi_print_command(cmd->cmnd); @@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) return; } if (result) { - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "SCSI error: return code = 0x%08x\n", result); @@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd) /* * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer */ - if ((req->flags & REQ_BLOCK_PC) && !req->bio) { + if (blk_pc_request(req) && !req->bio) { cmd->request_bufflen = req->data_len; cmd->request_buffer = req->data; req->buffer = req->data; @@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) * these two cases differently. We differentiate by looking * at request->cmd, as this tells us the real story. */ - if (req->flags & REQ_SPECIAL && req->special) { + if (blk_special_request(req) && req->special) cmd = req->special; - } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { - - if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) { - if(specials_only == SDEV_QUIESCE || - specials_only == SDEV_BLOCK) + else if (blk_pc_request(req) || blk_fs_request(req)) { + if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){ + if (specials_only == SDEV_QUIESCE || + specials_only == SDEV_BLOCK) goto defer; sdev_printk(KERN_ERR, sdev, @@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) goto kill; } - /* * Now try and find a command block that we can use. */ @@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) * lock. We hope REQ_STARTED prevents anything untoward from * happening now. */ - if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { + if (blk_fs_request(req) || blk_pc_request(req)) { int ret; /* @@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) /* * Initialize the actual SCSI command for this request. */ - if (req->flags & REQ_BLOCK_PC) { + if (blk_pc_request(req)) { scsi_setup_blk_pc_cmnd(cmd); } else if (req->rq_disk) { struct scsi_driver *drv; @@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) /* * The request is now prepped, no need to come back here */ - req->flags |= REQ_DONTPREP; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; defer: @@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q) if (unlikely(cmd == NULL)) { printk(KERN_CRIT "impossible request in %s.\n" "please mail a stack trace to " - "linux-scsi@vger.kernel.org", + "linux-scsi@vger.kernel.org\n", __FUNCTION__); + blk_dump_rq_flags(req, "foo"); BUG(); } spin_lock(shost->host_lock); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 638cff41d436..10bc99c911fa 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) SCpnt->cmnd[0] = READ_6; SCpnt->sc_data_direction = DMA_FROM_DEVICE; } else { - printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags); -/* overkill panic("Unknown sd command %lx\n", rq->flags); */ + printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags); return 0; } @@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector) static void sd_prepare_flush(request_queue_t *q, struct request *rq) { memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->timeout = SD_TIMEOUT; rq->cmd[0] = SYNCHRONIZE_CACHE; rq->cmd_len = 10; diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c index 2f8073b73bf3..7f9bcef6adfa 100644 --- a/drivers/scsi/sun3_NCR5380.c +++ b/drivers/scsi/sun3_NCR5380.c @@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance) if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done != cmd)) { - if(cmd->request->flags & REQ_CMD) { + if(blk_fs_request(cmd->request)) { sun3scsi_dma_setup(d, count, rq_data_dir(cmd->request)); sun3_dma_setup_done = cmd; diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c index 837173415d4c..44a99aeb8180 100644 --- a/drivers/scsi/sun3_scsi.c +++ b/drivers/scsi/sun3_scsi.c @@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance) static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd, int write_flag) { - if(cmd->request->flags & REQ_CMD) + if(blk_fs_request(cmd->request)) return wanted; else return 0; diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c index 008a82ab8521..f5742b84b27a 100644 --- a/drivers/scsi/sun3_scsi_vme.c +++ b/drivers/scsi/sun3_scsi_vme.c @@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance) static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd, int write_flag) { - if(cmd->request->flags & REQ_CMD) + if(blk_fs_request(cmd->request)) return wanted; else return 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfde8b3ee919..b2a412cf468f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -120,6 +120,86 @@ struct request_list { wait_queue_head_t wait[2]; }; +/* + * request command types + */ +enum rq_cmd_type_bits { + REQ_TYPE_FS = 1, /* fs request */ + REQ_TYPE_BLOCK_PC, /* scsi command */ + REQ_TYPE_SENSE, /* sense request */ + REQ_TYPE_PM_SUSPEND, /* suspend request */ + REQ_TYPE_PM_RESUME, /* resume request */ + REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ + REQ_TYPE_FLUSH, /* flush request */ + REQ_TYPE_SPECIAL, /* driver defined type */ + REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ + /* + * for ATA/ATAPI devices. this really doesn't belong here, ide should + * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver + * private REQ_LB opcodes to differentiate what type of request this is + */ + REQ_TYPE_ATA_CMD, + REQ_TYPE_ATA_TASK, + REQ_TYPE_ATA_TASKFILE, +}; + +/* + * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being + * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a + * SCSI cdb. + * + * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, + * typically to differentiate REQ_TYPE_SPECIAL requests. + * + */ +enum { + /* + * just examples for now + */ + REQ_LB_OP_EJECT = 0x40, /* eject request */ + REQ_LB_OP_FLUSH = 0x41, /* flush device */ +}; + +/* + * request type modified bits. first three bits match BIO_RW* bits, important + */ +enum rq_flag_bits { + __REQ_RW, /* not set, read. set, write */ + __REQ_FAILFAST, /* no low level driver retries */ + __REQ_SORTED, /* elevator knows about this request */ + __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ + __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_FUA, /* forced unit access */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_STARTED, /* drive already may have started this one */ + __REQ_DONTPREP, /* don't call prep for this one */ + __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ + __REQ_FAILED, /* set if the request failed */ + __REQ_QUIET, /* don't worry about errors */ + __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ + __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ + __REQ_NR_BITS, /* stops here */ +}; + +#define REQ_RW (1 << __REQ_RW) +#define REQ_FAILFAST (1 << __REQ_FAILFAST) +#define REQ_SORTED (1 << __REQ_SORTED) +#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) +#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_FUA (1 << __REQ_FUA) +#define REQ_NOMERGE (1 << __REQ_NOMERGE) +#define REQ_STARTED (1 << __REQ_STARTED) +#define REQ_DONTPREP (1 << __REQ_DONTPREP) +#define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) +#define REQ_FAILED (1 << __REQ_FAILED) +#define REQ_QUIET (1 << __REQ_QUIET) +#define REQ_PREEMPT (1 << __REQ_PREEMPT) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) +#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) + #define BLK_MAX_CDB 16 /* @@ -129,7 +209,8 @@ struct request { struct list_head queuelist; struct list_head donelist; - unsigned long flags; /* see REQ_ bits below */ + unsigned int cmd_flags; + enum rq_cmd_type_bits cmd_type; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! @@ -202,73 +283,7 @@ struct request { }; /* - * first three bits match BIO_RW* bits, important - */ -enum rq_flag_bits { - __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST, /* no low level driver retries */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_HARDBARRIER, /* may not be passed by drive either */ - __REQ_FUA, /* forced unit access */ - __REQ_CMD, /* is a regular fs rw request */ - __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - /* - * for ATA/ATAPI devices - */ - __REQ_PC, /* packet command (special) */ - __REQ_BLOCK_PC, /* queued down pc from block layer */ - __REQ_SENSE, /* sense retrival */ - - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_SPECIAL, /* driver suplied command */ - __REQ_DRIVE_CMD, - __REQ_DRIVE_TASK, - __REQ_DRIVE_TASKFILE, - __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_PM_SUSPEND, /* suspend request */ - __REQ_PM_RESUME, /* resume request */ - __REQ_PM_SHUTDOWN, /* shutdown request */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ - __REQ_NR_BITS, /* stops here */ -}; - -#define REQ_RW (1 << __REQ_RW) -#define REQ_FAILFAST (1 << __REQ_FAILFAST) -#define REQ_SORTED (1 << __REQ_SORTED) -#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) -#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) -#define REQ_FUA (1 << __REQ_FUA) -#define REQ_CMD (1 << __REQ_CMD) -#define REQ_NOMERGE (1 << __REQ_NOMERGE) -#define REQ_STARTED (1 << __REQ_STARTED) -#define REQ_DONTPREP (1 << __REQ_DONTPREP) -#define REQ_QUEUED (1 << __REQ_QUEUED) -#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) -#define REQ_PC (1 << __REQ_PC) -#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) -#define REQ_SENSE (1 << __REQ_SENSE) -#define REQ_FAILED (1 << __REQ_FAILED) -#define REQ_QUIET (1 << __REQ_QUIET) -#define REQ_SPECIAL (1 << __REQ_SPECIAL) -#define REQ_DRIVE_CMD (1 << __REQ_DRIVE_CMD) -#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK) -#define REQ_DRIVE_TASKFILE (1 << __REQ_DRIVE_TASKFILE) -#define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND) -#define REQ_PM_RESUME (1 << __REQ_PM_RESUME) -#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) -#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) - -/* - * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME + * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. */ struct request_pm_state @@ -490,25 +505,28 @@ enum { #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) -#define blk_fs_request(rq) ((rq)->flags & REQ_CMD) -#define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) -#define blk_noretry_request(rq) ((rq)->flags & REQ_FAILFAST) -#define blk_rq_started(rq) ((rq)->flags & REQ_STARTED) +#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) +#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) +#define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) +#define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) + +#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) +#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) -#define blk_pm_suspend_request(rq) ((rq)->flags & REQ_PM_SUSPEND) -#define blk_pm_resume_request(rq) ((rq)->flags & REQ_PM_RESUME) +#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) +#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) #define blk_pm_request(rq) \ - ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME)) + (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) -#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) -#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) -#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA) +#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) +#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) +#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) ((rq)->flags & 1) +#define rq_data_dir(rq) ((rq)->cmd_flags & 1) static inline int blk_queue_full(struct request_queue *q, int rw) { @@ -541,7 +559,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) /* * noop, requests are automagically marked as active/inactive by I/O @@ -737,7 +755,7 @@ extern void blk_put_queue(request_queue_t *); */ #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) -#define blk_rq_tagged(rq) ((rq)->flags & REQ_QUEUED) +#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) extern int blk_queue_start_tag(request_queue_t *, struct request *); extern struct request *blk_queue_find_tag(request_queue_t *, int); extern void blk_queue_end_tag(request_queue_t *, struct request *); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7520cc1ff9e2..ea48eb1b3fd3 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -148,7 +148,7 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, u32 what) { struct blk_trace *bt = q->blk_trace; - int rw = rq->flags & 0x03; + int rw = rq->cmd_flags & 0x03; if (likely(!bt)) return; diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index d04d05adfa9b..bbf66219b769 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -100,7 +100,7 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER) + if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) *msg++ = MSG_ORDERED_TAG; else *msg++ = MSG_SIMPLE_TAG; -- cgit v1.2.3-71-gd317 From 9817064b68fef7e4580c6df1ea597e106b9ff88b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 09:23:08 +0200 Subject: [PATCH] elevator: move the backmerging logic into the elevator core Right now, every IO scheduler implements its own backmerging (except for noop, which does no merging). That results in duplicated code for essentially the same operation, which is never a good thing. This patch moves the backmerging out of the io schedulers and into the elevator core. We save 1.6kb of text and as a bonus get backmerging for noop as well. Win-win! Signed-off-by: Jens Axboe --- block/as-iosched.c | 139 +------------------------------------------- block/cfq-iosched.c | 86 +-------------------------- block/deadline-iosched.c | 128 +---------------------------------------- block/elevator.c | 147 ++++++++++++++++++++++++++++++++++++++++++----- block/ll_rw_blk.c | 2 + include/linux/blkdev.h | 17 +----- include/linux/elevator.h | 2 + 7 files changed, 146 insertions(+), 375 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index ad1cc4077819..6db494333c3a 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -95,7 +94,6 @@ struct as_data { struct as_rq *next_arq[2]; /* next in sort order */ sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ - struct hlist_head *hash; /* request hash */ unsigned long exit_prob; /* probability a task will exit while being waited on */ @@ -161,11 +159,6 @@ struct as_rq { struct io_context *io_context; /* The submitting task */ - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - /* * expire fifo */ @@ -272,77 +265,6 @@ static void as_put_io_context(struct as_rq *arq) put_io_context(arq->io_context); } -/* - * the back merge hash support functions - */ -static const int as_hash_shift = 6; -#define AS_HASH_BLOCK(sec) ((sec) >> 3) -#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) -#define AS_HASH_ENTRIES (1 << as_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) - -static inline void __as_del_arq_hash(struct as_rq *arq) -{ - hlist_del_init(&arq->hash); -} - -static inline void as_del_arq_hash(struct as_rq *arq) -{ - if (!hlist_unhashed(&arq->hash)) - __as_del_arq_hash(arq); -} - -static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - - BUG_ON(!hlist_unhashed(&arq->hash)); - - hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; - - if (hlist_unhashed(&arq->hash)) { - WARN_ON(1); - return; - } - - if (&arq->hash != head->first) { - hlist_del(&arq->hash); - hlist_add_head(&arq->hash, head); - } -} - -static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) -{ - struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct as_rq *arq; - - hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) { - struct request *__rq = arq->request; - - BUG_ON(hlist_unhashed(&arq->hash)); - - if (!rq_mergeable(__rq)) { - as_del_arq_hash(arq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - /* * rb tree support functions */ @@ -1060,7 +982,6 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) ad->next_arq[data_dir] = as_find_next_arq(ad, arq); list_del_init(&arq->fifo); - as_del_arq_hash(arq); as_del_arq_rb(ad, arq); } @@ -1349,8 +1270,6 @@ static void as_add_request(request_queue_t *q, struct request *rq) } as_add_arq_rb(ad, arq); - if (rq_mergeable(arq->request)) - as_add_arq_hash(ad, arq); /* * set expire time (only used for reads) and add to fifo list @@ -1428,42 +1347,17 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio) struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; - int ret; - - /* - * see if the merge hash can satisfy a back merge - */ - __rq = as_find_arq_hash(ad, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } /* * check for front merge */ __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); - if (__rq) { - BUG_ON(rb_key != rq_rb_key(__rq)); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; - } + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - if (ret) { - if (rq_mergeable(__rq)) - as_hot_arq_hash(ad, RQ_DATA(__rq)); - } - *req = __rq; - return ret; } static void as_merged_request(request_queue_t *q, struct request *req) @@ -1471,12 +1365,6 @@ static void as_merged_request(request_queue_t *q, struct request *req) struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); - /* - * hash always needs to be repositioned, key is end sector - */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); - /* * if the merge was a front merge, we need to reposition request */ @@ -1501,13 +1389,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req, BUG_ON(!arq); BUG_ON(!anext); - /* - * reposition arq (this is the merged request) in hash, and in rbtree - * in case of a front merge - */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); - if (rq_rb_key(req) != arq->rb_key) { as_del_arq_rb(ad, arq); as_add_arq_rb(ad, arq); @@ -1591,7 +1472,6 @@ static int as_set_request(request_queue_t *q, struct request *rq, arq->request = rq; arq->state = AS_RQ_PRESCHED; arq->io_context = NULL; - INIT_HLIST_NODE(&arq->hash); INIT_LIST_HEAD(&arq->fifo); rq->elevator_private = arq; return 0; @@ -1628,7 +1508,6 @@ static void as_exit_queue(elevator_t *e) mempool_destroy(ad->arq_pool); put_io_context(ad->io_context); - kfree(ad->hash); kfree(ad); } @@ -1639,7 +1518,6 @@ static void as_exit_queue(elevator_t *e) static void *as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; - int i; if (!arq_pool) return NULL; @@ -1651,17 +1529,9 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e) ad->q = q; /* Identify what queue the data belongs to */ - ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!ad->hash) { - kfree(ad); - return NULL; - } - ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, arq_pool, q->node); if (!ad->arq_pool) { - kfree(ad->hash); kfree(ad); return NULL; } @@ -1672,9 +1542,6 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e) init_timer(&ad->antic_timer); INIT_WORK(&ad->antic_work, as_work_handler, q); - for (i = 0; i < AS_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&ad->hash[i]); - INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); ad->sort_list[REQ_SYNC] = RB_ROOT; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3a3aee08ec5f..1b803c0c90f1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -41,16 +41,6 @@ static DEFINE_SPINLOCK(cfq_exit_lock); #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) -/* - * for the hash of crq inside the cfqq - */ -#define CFQ_MHASH_SHIFT 6 -#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) -#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) -#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) - #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) #define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) @@ -112,11 +102,6 @@ struct cfq_data { */ struct hlist_head *cfq_hash; - /* - * global crq hash for all queues - */ - struct hlist_head *crq_hash; - mempool_t *crq_pool; int rq_in_driver; @@ -203,7 +188,6 @@ struct cfq_rq { struct rb_node rb_node; sector_t rb_key; struct request *request; - struct hlist_node hash; struct cfq_queue *cfq_queue; struct cfq_io_context *io_context; @@ -271,42 +255,6 @@ static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsi static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); -/* - * lots of deadline iosched dupes, can be abstracted later... - */ -static inline void cfq_del_crq_hash(struct cfq_rq *crq) -{ - hlist_del_init(&crq->hash); -} - -static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) -{ - const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); - - hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); -} - -static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) -{ - struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; - struct hlist_node *entry, *next; - - hlist_for_each_safe(entry, next, hash_list) { - struct cfq_rq *crq = list_entry_hash(entry); - struct request *__rq = crq->request; - - if (!rq_mergeable(__rq)) { - cfq_del_crq_hash(crq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - /* * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing @@ -677,7 +625,6 @@ static void cfq_remove_request(struct request *rq) list_del_init(&rq->queuelist); cfq_del_crq_rb(crq); - cfq_del_crq_hash(crq); } static int @@ -685,34 +632,20 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; - int ret; - - __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); - if (__rq && elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } __rq = cfq_find_rq_fmerge(cfqd, bio); if (__rq && elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - *req = __rq; - return ret; } static void cfq_merged_request(request_queue_t *q, struct request *req) { - struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(req); - cfq_del_crq_hash(crq); - cfq_add_crq_hash(cfqd, crq); - if (rq_rb_key(req) != crq->rb_key) { struct cfq_queue *cfqq = crq->cfq_queue; @@ -1825,9 +1758,6 @@ static void cfq_insert_request(request_queue_t *q, struct request *rq) list_add_tail(&rq->queuelist, &cfqq->fifo); - if (rq_mergeable(rq)) - cfq_add_crq_hash(cfqd, crq); - cfq_crq_enqueued(cfqd, cfqq, crq); } @@ -2055,7 +1985,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, RB_CLEAR_NODE(&crq->rb_node); crq->rb_key = 0; crq->request = rq; - INIT_HLIST_NODE(&crq->hash); crq->cfq_queue = cfqq; crq->io_context = cic; @@ -2221,7 +2150,6 @@ static void cfq_exit_queue(elevator_t *e) cfq_shutdown_timer_wq(cfqd); mempool_destroy(cfqd->crq_pool); - kfree(cfqd->crq_hash); kfree(cfqd->cfq_hash); kfree(cfqd); } @@ -2246,20 +2174,14 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_LIST_HEAD(&cfqd->empty_list); INIT_LIST_HEAD(&cfqd->cic_list); - cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); - if (!cfqd->crq_hash) - goto out_crqhash; - cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); if (!cfqd->cfq_hash) - goto out_cfqhash; + goto out_crqhash; cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool); if (!cfqd->crq_pool) goto out_crqpool; - for (i = 0; i < CFQ_MHASH_ENTRIES; i++) - INIT_HLIST_HEAD(&cfqd->crq_hash[i]); for (i = 0; i < CFQ_QHASH_ENTRIES; i++) INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); @@ -2289,8 +2211,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) return cfqd; out_crqpool: kfree(cfqd->cfq_hash); -out_cfqhash: - kfree(cfqd->crq_hash); out_crqhash: kfree(cfqd); return NULL; diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c7ca9f0b6498..b66e820f544d 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -12,7 +12,6 @@ #include #include #include -#include #include /* @@ -24,13 +23,6 @@ static const int writes_starved = 2; /* max times reads can starve a write */ static const int fifo_batch = 16; /* # of sequential requests treated as one by the above parameters. For throughput. */ -static const int deadline_hash_shift = 5; -#define DL_HASH_BLOCK(sec) ((sec) >> 3) -#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) -#define DL_HASH_ENTRIES (1 << deadline_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define ON_HASH(drq) (!hlist_unhashed(&(drq)->hash)) - struct deadline_data { /* * run time data @@ -46,7 +38,6 @@ struct deadline_data { * next in sort order. read, write or both are NULL */ struct deadline_rq *next_drq[2]; - struct hlist_head *hash; /* request hash */ unsigned int batching; /* number of sequential requests made */ sector_t last_sector; /* head position */ unsigned int starved; /* times reads have starved writes */ @@ -74,11 +65,6 @@ struct deadline_rq { struct request *request; - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - /* * expire fifo */ @@ -92,69 +78,6 @@ static kmem_cache_t *drq_pool; #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) -/* - * the back merge hash support functions - */ -static inline void __deadline_del_drq_hash(struct deadline_rq *drq) -{ - hlist_del_init(&drq->hash); -} - -static inline void deadline_del_drq_hash(struct deadline_rq *drq) -{ - if (ON_HASH(drq)) - __deadline_del_drq_hash(drq); -} - -static inline void -deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct request *rq = drq->request; - - BUG_ON(ON_HASH(drq)); - - hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void -deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct request *rq = drq->request; - struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))]; - - if (ON_HASH(drq) && &drq->hash != head->first) { - hlist_del(&drq->hash); - hlist_add_head(&drq->hash, head); - } -} - -static struct request * -deadline_find_drq_hash(struct deadline_data *dd, sector_t offset) -{ - struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct deadline_rq *drq; - - hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) { - struct request *__rq = drq->request; - - BUG_ON(!ON_HASH(drq)); - - if (!rq_mergeable(__rq)) { - __deadline_del_drq_hash(drq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - /* * rb tree support functions */ @@ -267,22 +190,19 @@ deadline_add_request(struct request_queue *q, struct request *rq) { struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); - const int data_dir = rq_data_dir(drq->request); deadline_add_drq_rb(dd, drq); + /* * set expire time (only used for reads) and add to fifo list */ drq->expires = jiffies + dd->fifo_expire[data_dir]; list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); - - if (rq_mergeable(rq)) - deadline_add_drq_hash(dd, drq); } /* - * remove rq from rbtree, fifo, and hash + * remove rq from rbtree and fifo. */ static void deadline_remove_request(request_queue_t *q, struct request *rq) { @@ -291,7 +211,6 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq) list_del_init(&drq->fifo); deadline_del_drq_rb(dd, drq); - deadline_del_drq_hash(drq); } static int @@ -301,19 +220,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) struct request *__rq; int ret; - /* - * see if the merge hash can satisfy a back merge - */ - __rq = deadline_find_drq_hash(dd, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } - /* * check for front merge */ @@ -333,8 +239,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; out: - if (ret) - deadline_hot_drq_hash(dd, RQ_DATA(__rq)); *req = __rq; return ret; } @@ -344,12 +248,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req) struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); - /* - * hash always needs to be repositioned, key is end sector - */ - deadline_del_drq_hash(drq); - deadline_add_drq_hash(dd, drq); - /* * if the merge was a front merge, we need to reposition request */ @@ -370,13 +268,6 @@ deadline_merged_requests(request_queue_t *q, struct request *req, BUG_ON(!drq); BUG_ON(!dnext); - /* - * reposition drq (this is the merged request) in hash, and in rbtree - * in case of a front merge - */ - deadline_del_drq_hash(drq); - deadline_add_drq_hash(dd, drq); - if (rq_rb_key(req) != drq->rb_key) { deadline_del_drq_rb(dd, drq); deadline_add_drq_rb(dd, drq); @@ -594,7 +485,6 @@ static void deadline_exit_queue(elevator_t *e) BUG_ON(!list_empty(&dd->fifo_list[WRITE])); mempool_destroy(dd->drq_pool); - kfree(dd->hash); kfree(dd); } @@ -605,7 +495,6 @@ static void deadline_exit_queue(elevator_t *e) static void *deadline_init_queue(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; - int i; if (!drq_pool) return NULL; @@ -615,24 +504,13 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e) return NULL; memset(dd, 0, sizeof(*dd)); - dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!dd->hash) { - kfree(dd); - return NULL; - } - dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, drq_pool, q->node); if (!dd->drq_pool) { - kfree(dd->hash); kfree(dd); return NULL; } - for (i = 0; i < DL_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&dd->hash[i]); - INIT_LIST_HEAD(&dd->fifo_list[READ]); INIT_LIST_HEAD(&dd->fifo_list[WRITE]); dd->sort_list[READ] = RB_ROOT; @@ -667,8 +545,6 @@ deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, RB_CLEAR_NODE(&drq->rb_node); drq->request = rq; - INIT_HLIST_NODE(&drq->hash); - INIT_LIST_HEAD(&drq->fifo); rq->elevator_private = drq; diff --git a/block/elevator.c b/block/elevator.c index 4ac97b642042..cff1102dac9d 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,12 +33,23 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); +/* + * Merge hash stuff. + */ +static const int elv_hash_shift = 6; +#define ELV_HASH_BLOCK(sec) ((sec) >> 3) +#define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) +#define ELV_HASH_ENTRIES (1 << elv_hash_shift) +#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) + /* * can we safely merge with this request? */ @@ -153,25 +164,41 @@ static struct kobj_type elv_ktype; static elevator_t *elevator_alloc(struct elevator_type *e) { - elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); - if (eq) { - memset(eq, 0, sizeof(*eq)); - eq->ops = &e->ops; - eq->elevator_type = e; - kobject_init(&eq->kobj); - snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - eq->kobj.ktype = &elv_ktype; - mutex_init(&eq->sysfs_lock); - } else { - elevator_put(e); - } + elevator_t *eq; + int i; + + eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); + if (unlikely(!eq)) + goto err; + + memset(eq, 0, sizeof(*eq)); + eq->ops = &e->ops; + eq->elevator_type = e; + kobject_init(&eq->kobj); + snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); + eq->kobj.ktype = &elv_ktype; + mutex_init(&eq->sysfs_lock); + + eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL); + if (!eq->hash) + goto err; + + for (i = 0; i < ELV_HASH_ENTRIES; i++) + INIT_HLIST_HEAD(&eq->hash[i]); + return eq; +err: + kfree(eq); + elevator_put(e); + return NULL; } static void elevator_release(struct kobject *kobj) { elevator_t *e = container_of(kobj, elevator_t, kobj); + elevator_put(e->elevator_type); + kfree(e->hash); kfree(e); } @@ -223,6 +250,53 @@ void elevator_exit(elevator_t *e) kobject_put(&e->kobj); } +static inline void __elv_rqhash_del(struct request *rq) +{ + hlist_del_init(&rq->hash); +} + +static void elv_rqhash_del(request_queue_t *q, struct request *rq) +{ + if (ELV_ON_HASH(rq)) + __elv_rqhash_del(rq); +} + +static void elv_rqhash_add(request_queue_t *q, struct request *rq) +{ + elevator_t *e = q->elevator; + + BUG_ON(ELV_ON_HASH(rq)); + hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); +} + +static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) +{ + __elv_rqhash_del(rq); + elv_rqhash_add(q, rq); +} + +static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) +{ + elevator_t *e = q->elevator; + struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; + struct hlist_node *entry, *next; + struct request *rq; + + hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { + BUG_ON(!ELV_ON_HASH(rq)); + + if (unlikely(!rq_mergeable(rq))) { + __elv_rqhash_del(rq); + continue; + } + + if (rq_hash_key(rq) == offset) + return rq; + } + + return NULL; +} + /* * Insert rq into dispatch queue of q. Queue lock must be held on * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be @@ -235,6 +309,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) if (q->last_merge == rq) q->last_merge = NULL; + + elv_rqhash_del(q, rq); + q->nr_sorted--; boundary = q->end_sector; @@ -258,11 +335,32 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_add(&rq->queuelist, entry); } +/* + * This should be in elevator.h, but that requires pulling in rq and q + */ +void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) +{ + if (q->last_merge == rq) + q->last_merge = NULL; + + elv_rqhash_del(q, rq); + + q->nr_sorted--; + + q->end_sector = rq_end_sector(rq); + q->boundary_rq = rq; + list_add_tail(&rq->queuelist, &q->queue_head); +} + int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; + struct request *__rq; int ret; + /* + * First try one-hit cache. + */ if (q->last_merge) { ret = elv_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { @@ -271,6 +369,15 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) } } + /* + * See if our hash lookup can find a potential backmerge. + */ + __rq = elv_rqhash_find(q, bio->bi_sector); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } + if (e->ops->elevator_merge_fn) return e->ops->elevator_merge_fn(q, req, bio); @@ -284,6 +391,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq) if (e->ops->elevator_merged_fn) e->ops->elevator_merged_fn(q, rq); + elv_rqhash_reposition(q, rq); + q->last_merge = rq; } @@ -294,8 +403,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, if (e->ops->elevator_merge_req_fn) e->ops->elevator_merge_req_fn(q, rq, next); - q->nr_sorted--; + elv_rqhash_reposition(q, rq); + elv_rqhash_del(q, next); + + q->nr_sorted--; q->last_merge = rq; } @@ -371,8 +483,12 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) BUG_ON(!blk_fs_request(rq)); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; - if (q->last_merge == NULL && rq_mergeable(rq)) - q->last_merge = rq; + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } + /* * Some ioscheds (cfq) run q->request_fn directly, so * rq cannot be accessed after calling @@ -557,6 +673,7 @@ struct request *elv_next_request(request_queue_t *q) void elv_dequeue_request(request_queue_t *q, struct request *rq) { BUG_ON(list_empty(&rq->queuelist)); + BUG_ON(ELV_ON_HASH(rq)); list_del_init(&rq->queuelist); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 9b91bb70c5ed..9cbf7b550c78 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -281,6 +281,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); + INIT_HLIST_NODE(&rq->hash); rq->errors = 0; rq->rq_status = RQ_ACTIVE; @@ -2700,6 +2701,7 @@ void __blk_put_request(request_queue_t *q, struct request *req) int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); + BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); freed_request(q, rw, priv); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b2a412cf468f..8f5486964671 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -229,6 +229,8 @@ struct request { struct bio *bio; struct bio *biotail; + struct hlist_node hash; /* merge hash */ + void *elevator_private; void *completion_data; @@ -696,21 +698,6 @@ static inline void blkdev_dequeue_request(struct request *req) elv_dequeue_request(req->q, req); } -/* - * This should be in elevator.h, but that requires pulling in rq and q - */ -static inline void elv_dispatch_add_tail(struct request_queue *q, - struct request *rq) -{ - if (q->last_merge == rq) - q->last_merge = NULL; - q->nr_sorted--; - - q->end_sector = rq_end_sector(rq); - q->boundary_rq = rq; - list_add_tail(&rq->queuelist, &q->queue_head); -} - /* * Access functions for manipulating queue properties */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1713ace808bf..2c270e90b33e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -82,12 +82,14 @@ struct elevator_queue struct kobject kobj; struct elevator_type *elevator_type; struct mutex sysfs_lock; + struct hlist_head *hash; }; /* * block elevator interface */ extern void elv_dispatch_sort(request_queue_t *, struct request *); +extern void elv_dispatch_add_tail(request_queue_t *, struct request *); extern void elv_add_request(request_queue_t *, struct request *, int, int); extern void __elv_add_request(request_queue_t *, struct request *, int, int); extern void elv_insert(request_queue_t *, struct request *, int); -- cgit v1.2.3-71-gd317 From 10fd48f2376db52f08bf0420d2c4f580e39269e1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Jul 2006 21:15:52 +0200 Subject: [PATCH] rbtree: fixed reversed RB_EMPTY_NODE and rb_next/prev The conditions got reserved. Also make rb_next() and rb_prev() check for the empty condition. Signed-off-by: Jens Axboe --- block/as-iosched.c | 4 ++-- include/linux/rbtree.h | 2 +- lib/rbtree.c | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index 6db494333c3a..9d0f15a54c64 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -336,7 +336,7 @@ static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) { - if (!RB_EMPTY_NODE(&arq->rb_node)) { + if (RB_EMPTY_NODE(&arq->rb_node)) { WARN_ON(1); return; } @@ -1039,7 +1039,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) struct request *rq = arq->request; const int data_dir = arq->is_sync; - BUG_ON(!RB_EMPTY_NODE(&arq->rb_node)); + BUG_ON(RB_EMPTY_NODE(&arq->rb_node)); as_antic_stop(ad); ad->antic_status = ANTIC_OFF; diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 8d5382e62c08..344bc3495ddb 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -133,7 +133,7 @@ static inline void rb_set_color(struct rb_node *rb, int color) #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -#define RB_EMPTY_NODE(node) (rb_parent(node) != node) +#define RB_EMPTY_NODE(node) (rb_parent(node) == node) #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) extern void rb_insert_color(struct rb_node *, struct rb_root *); diff --git a/lib/rbtree.c b/lib/rbtree.c index 1e55ba1c2edf..48499c2d88cc 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -322,6 +322,9 @@ struct rb_node *rb_next(struct rb_node *node) { struct rb_node *parent; + if (rb_parent(node) == node) + return NULL; + /* If we have a right-hand child, go down and then left as far as we can. */ if (node->rb_right) { @@ -348,6 +351,9 @@ struct rb_node *rb_prev(struct rb_node *node) { struct rb_node *parent; + if (rb_parent(node) == node) + return NULL; + /* If we have a left-hand child, go down and then right as far as we can. */ if (node->rb_left) { -- cgit v1.2.3-71-gd317 From 2e662b65f05d550b6799ed6bfa9963b82279e6b7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Jul 2006 11:55:04 +0200 Subject: [PATCH] elevator: abstract out the rbtree sort handling The rbtree sort/lookup/reposition logic is mostly duplicated in cfq/deadline/as, so move it to the elevator core. The io schedulers still provide the actual rb root, as we don't want to impose any sort of specific handling on the schedulers. Introduce the helpers and rb_node in struct request to help migrate the IO schedulers. Signed-off-by: Jens Axboe --- block/elevator.c | 123 +++++++++++++++++++++++++++++++++++++++++------ block/ll_rw_blk.c | 7 +-- include/linux/blkdev.h | 1 + include/linux/elevator.h | 18 ++++++- 4 files changed, 130 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/block/elevator.c b/block/elevator.c index cff1102dac9d..cbbc36ba016a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -239,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name) return ret; } +EXPORT_SYMBOL(elevator_init); + void elevator_exit(elevator_t *e) { mutex_lock(&e->sysfs_lock); @@ -250,6 +252,8 @@ void elevator_exit(elevator_t *e) kobject_put(&e->kobj); } +EXPORT_SYMBOL(elevator_exit); + static inline void __elv_rqhash_del(struct request *rq) { hlist_del_init(&rq->hash); @@ -297,10 +301,69 @@ static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) return NULL; } +/* + * RB-tree support functions for inserting/lookup/removal of requests + * in a sorted RB tree. + */ +struct request *elv_rb_add(struct rb_root *root, struct request *rq) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct request *__rq; + + while (*p) { + parent = *p; + __rq = rb_entry(parent, struct request, rb_node); + + if (rq->sector < __rq->sector) + p = &(*p)->rb_left; + else if (rq->sector > __rq->sector) + p = &(*p)->rb_right; + else + return __rq; + } + + rb_link_node(&rq->rb_node, parent, p); + rb_insert_color(&rq->rb_node, root); + return NULL; +} + +EXPORT_SYMBOL(elv_rb_add); + +void elv_rb_del(struct rb_root *root, struct request *rq) +{ + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); + rb_erase(&rq->rb_node, root); + RB_CLEAR_NODE(&rq->rb_node); +} + +EXPORT_SYMBOL(elv_rb_del); + +struct request *elv_rb_find(struct rb_root *root, sector_t sector) +{ + struct rb_node *n = root->rb_node; + struct request *rq; + + while (n) { + rq = rb_entry(n, struct request, rb_node); + + if (sector < rq->sector) + n = n->rb_left; + else if (sector > rq->sector) + n = n->rb_right; + else + return rq; + } + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_find); + /* * Insert rq into dispatch queue of q. Queue lock must be held on - * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be - * appended to the dispatch queue. To be used by specific elevators. + * entry. rq is sort insted into the dispatch queue. To be used by + * specific elevators. */ void elv_dispatch_sort(request_queue_t *q, struct request *rq) { @@ -335,8 +398,12 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_add(&rq->queuelist, entry); } +EXPORT_SYMBOL(elv_dispatch_sort); + /* - * This should be in elevator.h, but that requires pulling in rq and q + * Insert rq into dispatch queue of q. Queue lock must be held on + * entry. rq is added to the back of the dispatch queue. To be used by + * specific elevators. */ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) { @@ -352,6 +419,8 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) list_add_tail(&rq->queuelist, &q->queue_head); } +EXPORT_SYMBOL(elv_dispatch_add_tail); + int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; @@ -384,14 +453,15 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; } -void elv_merged_request(request_queue_t *q, struct request *rq) +void elv_merged_request(request_queue_t *q, struct request *rq, int type) { elevator_t *e = q->elevator; if (e->ops->elevator_merged_fn) - e->ops->elevator_merged_fn(q, rq); + e->ops->elevator_merged_fn(q, rq, type); - elv_rqhash_reposition(q, rq); + if (type == ELEVATOR_BACK_MERGE) + elv_rqhash_reposition(q, rq); q->last_merge = rq; } @@ -577,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, elv_insert(q, rq, where); } +EXPORT_SYMBOL(__elv_add_request); + void elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { @@ -587,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where, spin_unlock_irqrestore(q->queue_lock, flags); } +EXPORT_SYMBOL(elv_add_request); + static inline struct request *__elv_next_request(request_queue_t *q) { struct request *rq; @@ -670,6 +744,8 @@ struct request *elv_next_request(request_queue_t *q) return rq; } +EXPORT_SYMBOL(elv_next_request); + void elv_dequeue_request(request_queue_t *q, struct request *rq) { BUG_ON(list_empty(&rq->queuelist)); @@ -686,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq) q->in_flight++; } +EXPORT_SYMBOL(elv_dequeue_request); + int elv_queue_empty(request_queue_t *q) { elevator_t *e = q->elevator; @@ -699,6 +777,8 @@ int elv_queue_empty(request_queue_t *q) return 1; } +EXPORT_SYMBOL(elv_queue_empty); + struct request *elv_latter_request(request_queue_t *q, struct request *rq) { elevator_t *e = q->elevator; @@ -1025,11 +1105,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) return len; } -EXPORT_SYMBOL(elv_dispatch_sort); -EXPORT_SYMBOL(elv_add_request); -EXPORT_SYMBOL(__elv_add_request); -EXPORT_SYMBOL(elv_next_request); -EXPORT_SYMBOL(elv_dequeue_request); -EXPORT_SYMBOL(elv_queue_empty); -EXPORT_SYMBOL(elevator_exit); -EXPORT_SYMBOL(elevator_init); +struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) +{ + struct rb_node *rbprev = rb_prev(&rq->rb_node); + + if (rbprev) + return rb_entry_rq(rbprev); + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_former_request); + +struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) +{ + struct rb_node *rbnext = rb_next(&rq->rb_node); + + if (rbnext) + return rb_entry_rq(rbnext); + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_latter_request); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 9cbf7b550c78..d388486e98bb 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -281,11 +281,12 @@ static inline void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); - INIT_HLIST_NODE(&rq->hash); rq->errors = 0; rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; + INIT_HLIST_NODE(&rq->hash); + RB_CLEAR_NODE(&rq->rb_node); rq->ioprio = 0; rq->buffer = NULL; rq->ref_count = 1; @@ -2943,7 +2944,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_back_merge(q, req)) - elv_merged_request(q, req); + elv_merged_request(q, req, el_ret); goto out; case ELEVATOR_FRONT_MERGE: @@ -2970,7 +2971,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_front_merge(q, req)) - elv_merged_request(q, req); + elv_merged_request(q, req, el_ret); goto out; /* ELV_NO_MERGE: elevator says don't/can't merge. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8f5486964671..a905c4934a55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -230,6 +230,7 @@ struct request { struct bio *biotail; struct hlist_node hash; /* merge hash */ + struct rb_node rb_node; /* sort/lookup */ void *elevator_private; void *completion_data; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c270e90b33e..95b2a04b969c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -6,7 +6,7 @@ typedef int (elevator_merge_fn) (request_queue_t *, struct request **, typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); -typedef void (elevator_merged_fn) (request_queue_t *, struct request *); +typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int); typedef int (elevator_dispatch_fn) (request_queue_t *, int); @@ -96,7 +96,7 @@ extern void elv_insert(request_queue_t *, struct request *, int); extern int elv_merge(request_queue_t *, struct request **, struct bio *); extern void elv_merge_requests(request_queue_t *, struct request *, struct request *); -extern void elv_merged_request(request_queue_t *, struct request *); +extern void elv_merged_request(request_queue_t *, struct request *, int); extern void elv_dequeue_request(request_queue_t *, struct request *); extern void elv_requeue_request(request_queue_t *, struct request *); extern int elv_queue_empty(request_queue_t *); @@ -126,6 +126,19 @@ extern int elevator_init(request_queue_t *, char *); extern void elevator_exit(elevator_t *); extern int elv_rq_merge_ok(struct request *, struct bio *); +/* + * Helper functions. + */ +extern struct request *elv_rb_former_request(request_queue_t *, struct request *); +extern struct request *elv_rb_latter_request(request_queue_t *, struct request *); + +/* + * rb support functions. + */ +extern struct request *elv_rb_add(struct rb_root *, struct request *); +extern void elv_rb_del(struct rb_root *, struct request *); +extern struct request *elv_rb_find(struct rb_root *, sector_t); + /* * Return values from elevator merger */ @@ -151,5 +164,6 @@ enum { }; #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) +#define rb_entry_rq(node) rb_entry((node), struct request, rb_node) #endif -- cgit v1.2.3-71-gd317 From 1fbfdfcddff4df188b24d9d05271a76a85064583 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Jul 2006 21:49:15 +0200 Subject: [PATCH] elevator: introduce a way to reuse rq for internal FIFO handling The io schedulers can use this instead of having to allocate space for it themselves. Signed-off-by: Jens Axboe --- include/linux/elevator.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 95b2a04b969c..0e7b1a733919 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -166,4 +166,16 @@ enum { #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) +/* + * Hack to reuse the donelist list_head as the fifo time holder while + * the request is in the io scheduler. Saves an unsigned long in rq. + */ +#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) +#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) +#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) +#define rq_fifo_clear(rq) do { \ + list_del_init(&(rq)->queuelist); \ + INIT_LIST_HEAD(&(rq)->donelist); \ + } while (0) + #endif -- cgit v1.2.3-71-gd317 From 9e2585a8a23f3a42f815b2a638725d85a921cd65 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 09:26:13 +0200 Subject: [PATCH] as-iosched: remove arq->is_sync member We can track this in struct request. Signed-off-by: Jens Axboe Signed-off-by: Nick Piggin --- block/as-iosched.c | 36 ++++++++++++++---------------------- include/linux/blkdev.h | 5 +++++ 2 files changed, 19 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index c2665467950e..dca0b0563ca0 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -151,7 +151,6 @@ struct as_rq { struct io_context *io_context; /* The submitting task */ - unsigned int is_sync; enum arq_state state; }; @@ -241,7 +240,7 @@ static void as_put_io_context(struct as_rq *arq) aic = arq->io_context->aic; - if (arq->is_sync == REQ_SYNC && aic) { + if (rq_is_sync(arq->request) && aic) { spin_lock(&aic->lock); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; @@ -254,14 +253,13 @@ static void as_put_io_context(struct as_rq *arq) /* * rb tree support functions */ -#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) +#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))]) static void as_add_arq_rb(struct as_data *ad, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); struct request *alias; - while ((unlikely(alias = elv_rb_add(ARQ_RB_ROOT(ad, arq), rq)))) { + while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) { as_move_to_dispatch(ad, RQ_DATA(alias)); as_antic_stop(ad); } @@ -269,7 +267,7 @@ static void as_add_arq_rb(struct as_data *ad, struct request *rq) static inline void as_del_arq_rb(struct as_data *ad, struct request *rq) { - elv_rb_del(ARQ_RB_ROOT(ad, RQ_DATA(rq)), rq); + elv_rb_del(RQ_RB_ROOT(ad, rq), rq); } /* @@ -300,13 +298,13 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) if (arq2 == NULL) return arq1; - data_dir = arq1->is_sync; + data_dir = rq_is_sync(arq1->request); last = ad->last_sector[data_dir]; s1 = arq1->request->sector; s2 = arq2->request->sector; - BUG_ON(data_dir != arq2->is_sync); + BUG_ON(data_dir != rq_is_sync(arq2->request)); /* * Strict one way elevator _except_ in the case where we allow @@ -377,7 +375,7 @@ static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *arq) if (rbnext) next = RQ_DATA(rb_entry_rq(rbnext)); else { - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(last); rbnext = rb_first(&ad->sort_list[data_dir]); if (rbnext && rbnext != &last->rb_node) @@ -538,8 +536,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - int data_dir = arq->is_sync; + int data_dir = rq_is_sync(rq); unsigned long thinktime = 0; sector_t seek_dist; @@ -674,7 +671,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { + if (arq && rq_is_sync(arq->request) && as_close_req(ad, aic, arq)) { /* * Found a close request that is not one of ours. * @@ -758,7 +755,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) */ static void as_update_arq(struct as_data *ad, struct as_rq *arq) { - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(arq->request); /* keep the next_arq cache up to date */ ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); @@ -835,7 +832,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq) * actually serviced. This should help devices with big TCQ windows * and writeback caches */ - if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { + if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { update_write_batch(ad); ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; @@ -868,7 +865,7 @@ out: static void as_remove_queued_request(request_queue_t *q, struct request *rq) { struct as_rq *arq = RQ_DATA(rq); - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); struct as_data *ad = q->elevator->elevator_data; WARN_ON(arq->state != AS_RQ_QUEUED); @@ -941,7 +938,7 @@ static inline int as_batch_expired(struct as_data *ad) static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) { struct request *rq = arq->request; - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); @@ -1158,12 +1155,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) arq->state = AS_RQ_NEW; - if (rq_data_dir(arq->request) == READ - || (arq->request->cmd_flags & REQ_RW_SYNC)) - arq->is_sync = 1; - else - arq->is_sync = 0; - data_dir = arq->is_sync; + data_dir = rq_is_sync(rq); arq->io_context = as_get_io_context(); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a905c4934a55..55ef6efe3eb5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -531,6 +531,11 @@ enum { #define rq_data_dir(rq) ((rq)->cmd_flags & 1) +/* + * We regard a request as sync, if it's a READ or a SYNC write. + */ +#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) + static inline int blk_queue_full(struct request_queue *q, int rw) { if (rw == READ) -- cgit v1.2.3-71-gd317 From ff7d145fd911266ae42af7552edc32681c01addb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Jul 2006 14:04:37 +0200 Subject: [PATCH] Add one more pointer to struct request for IO scheduler usage Then we have enough room in the request to get rid of the dynamic allocations in CFQ/AS. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 55ef6efe3eb5..d2dc17151f6c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -232,7 +232,13 @@ struct request { struct hlist_node hash; /* merge hash */ struct rb_node rb_node; /* sort/lookup */ + /* + * two pointers are available for the IO schedulers, if they need + * more they have to dynamically allocate it. + */ void *elevator_private; + void *elevator_private2; + void *completion_data; int rq_status; /* should split this into a few status bits */ -- cgit v1.2.3-71-gd317 From c00895ab2f08df7044e58ee01c38bf0a661ea0eb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 30 Sep 2006 20:29:12 +0200 Subject: [PATCH] Remove ->waiting member from struct request As the comments indicates in blkdev.h, we can fold it into ->end_io_data usage as that is really what ->waiting is. Fixup the users of blk_end_sync_rq(). Signed-off-by: Jens Axboe --- block/elevator.c | 3 +-- block/ll_rw_blk.c | 13 ++++++------- drivers/block/DAC960.c | 2 +- drivers/block/paride/pd.c | 3 +-- drivers/block/pktcdvd.c | 2 +- drivers/ide/ide-io.c | 13 ++++++------- drivers/ide/ide-tape.c | 2 +- drivers/ide/ide.c | 4 ++-- include/linux/blkdev.h | 3 +-- 9 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/block/elevator.c b/block/elevator.c index cbbc36ba016a..924b81b08f86 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -67,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) /* * same device and no special stuff set, merge is ok */ - if (rq->rq_disk == bio->bi_bdev->bd_disk && - !rq->waiting && !rq->special) + if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special) return 1; return 0; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index d388486e98bb..3b6aad2affd8 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -291,7 +291,6 @@ static inline void rq_init(request_queue_t *q, struct request *rq) rq->buffer = NULL; rq->ref_count = 1; rq->q = q; - rq->waiting = NULL; rq->special = NULL; rq->data_len = 0; rq->data = NULL; @@ -451,6 +450,7 @@ static void queue_flush(request_queue_t *q, unsigned which) rq->cmd_flags = REQ_HARDBARRIER; rq_init(q, rq); rq->elevator_private = NULL; + rq->elevator_private2 = NULL; rq->rq_disk = q->bar_rq.rq_disk; rq->rl = NULL; rq->end_io = end_io; @@ -479,6 +479,7 @@ static inline struct request *start_ordered(request_queue_t *q, rq->cmd_flags |= REQ_RW; rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; rq->elevator_private = NULL; + rq->elevator_private2 = NULL; rq->rl = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; @@ -2569,10 +2570,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->waiting = &wait; + rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); wait_for_completion(&wait); - rq->waiting = NULL; if (rq->errors) err = -EIO; @@ -2736,9 +2736,9 @@ EXPORT_SYMBOL(blk_put_request); */ void blk_end_sync_rq(struct request *rq, int error) { - struct completion *waiting = rq->waiting; + struct completion *waiting = rq->end_io_data; - rq->waiting = NULL; + rq->end_io_data = NULL; __blk_put_request(rq->q, rq); /* @@ -2801,7 +2801,7 @@ static int attempt_merge(request_queue_t *q, struct request *req, if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk - || next->waiting || next->special) + || next->special) return 0; /* @@ -2886,7 +2886,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio) req->nr_phys_segments = bio_phys_segments(req->q, bio); req->nr_hw_segments = bio_hw_segments(req->q, bio); req->buffer = bio_data(bio); /* see ->buffer comment above */ - req->waiting = NULL; req->bio = req->biotail = bio; req->ioprio = bio_prio(bio); req->rq_disk = bio->bi_bdev->bd_disk; diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index a360215dbce7..2568640430fb 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_ Command->DmaDirection = PCI_DMA_TODEVICE; Command->CommandType = DAC960_WriteCommand; } - Command->Completion = Request->waiting; + Command->Completion = Request->end_io_data; Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 12ff1a274d91..500d2ebb41e4 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -722,11 +722,10 @@ static int pd_special_command(struct pd_unit *disk, rq.rq_status = RQ_ACTIVE; rq.rq_disk = disk->gd; rq.ref_count = 1; - rq.waiting = &wait; + rq.end_io_data = &wait; rq.end_io = blk_end_sync_rq; blk_insert_request(disk->gd->queue, &rq, 0, func); wait_for_completion(&wait); - rq.waiting = NULL; if (rq.errors) err = -EIO; blk_put_request(&rq); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 42891d2b054e..888d1aceeeff 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -375,7 +375,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); rq->ref_count++; - rq->waiting = &wait; + rq->end_io_data = &wait; rq->end_io = blk_end_sync_rq; elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); generic_unplug_device(q); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 3436b1f104eb..a3ffb04436bd 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -141,7 +141,7 @@ enum { static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; if (drive->media != ide_disk) return; @@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; ide_task_t *args = rq->special; memset(args, 0, sizeof(*args)); @@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) } } } else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; #ifdef DEBUG_PM printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n", drive->name, rq->pm->pm_step, stat, err); @@ -933,7 +933,7 @@ done: static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; if (blk_pm_suspend_request(rq) && pm->pm_step == ide_pm_state_start_suspend) @@ -1018,7 +1018,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, rq->pm->pm_step); @@ -1718,7 +1718,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio */ if (must_wait) { rq->ref_count++; - rq->waiting = &wait; + rq->end_io_data = &wait; rq->end_io = blk_end_sync_rq; } @@ -1736,7 +1736,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio err = 0; if (must_wait) { wait_for_completion(&wait); - rq->waiting = NULL; if (rq->errors) err = -EIO; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 643e4b9ac651..66f9678d2f10 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2773,7 +2773,7 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq) return; } #endif /* IDETAPE_DEBUG_BUGS */ - rq->waiting = &wait; + rq->end_io_data = &wait; rq->end_io = blk_end_sync_rq; spin_unlock_irq(&tape->spinlock); wait_for_completion(&wait); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 9384a3fdde6c..2b1a1389c318 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1219,7 +1219,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&args, 0, sizeof(args)); rq.cmd_type = REQ_TYPE_PM_SUSPEND; rq.special = &args; - rq.end_io_data = &rqpm; + rq.data = &rqpm; rqpm.pm_step = ide_pm_state_start_suspend; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -1240,7 +1240,7 @@ static int generic_ide_resume(struct device *dev) memset(&args, 0, sizeof(args)); rq.cmd_type = REQ_TYPE_PM_RESUME; rq.special = &args; - rq.end_io_data = &rqpm; + rq.data = &rqpm; rqpm.pm_step = ide_pm_state_start_resume; rqpm.pm_state = PM_EVENT_ON; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2dc17151f6c..604f23189097 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -266,7 +266,6 @@ struct request { request_queue_t *q; struct request_list *rl; - struct completion *waiting; void *special; char *buffer; @@ -285,7 +284,7 @@ struct request { int retries; /* - * completion callback. end_io_data should be folded in with waiting + * completion callback. */ rq_end_io_fn *end_io; void *end_io_data; -- cgit v1.2.3-71-gd317 From 49171e5c6f414d49a061b5c1c84967c2eb569822 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Aug 2006 08:59:11 +0200 Subject: [PATCH] Remove struct request_list from struct request It is always identical to &q->rq, and we only use it for detecting whether this request came out of our mempool or not. So replace it with an additional ->flags bit flag. Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 10 ++-------- include/linux/blkdev.h | 3 ++- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 3b6aad2affd8..f7462502bfdf 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -452,7 +452,6 @@ static void queue_flush(request_queue_t *q, unsigned which) rq->elevator_private = NULL; rq->elevator_private2 = NULL; rq->rq_disk = q->bar_rq.rq_disk; - rq->rl = NULL; rq->end_io = end_io; q->prepare_flush_fn(q, rq); @@ -480,7 +479,6 @@ static inline struct request *start_ordered(request_queue_t *q, rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; rq->elevator_private = NULL; rq->elevator_private2 = NULL; - rq->rl = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; @@ -2018,7 +2016,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h */ - rq->cmd_flags = rw; + rq->cmd_flags = rw | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { @@ -2196,7 +2194,6 @@ rq_starved: ioc->nr_batch_requests--; rq_init(q, rq); - rq->rl = rl; blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: @@ -2681,8 +2678,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats); */ void __blk_put_request(request_queue_t *q, struct request *req) { - struct request_list *rl = req->rl; - if (unlikely(!q)) return; if (unlikely(--req->ref_count)) @@ -2691,13 +2686,12 @@ void __blk_put_request(request_queue_t *q, struct request *req) elv_completed_request(q, req); req->rq_status = RQ_INACTIVE; - req->rl = NULL; /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools */ - if (rl) { + if (req->cmd_flags & REQ_ALLOCED) { int rw = rq_data_dir(req); int priv = req->cmd_flags & REQ_ELVPRIV; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 604f23189097..d4c1dd046e27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -180,6 +180,7 @@ enum rq_flag_bits { __REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ + __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_NR_BITS, /* stops here */ }; @@ -199,6 +200,7 @@ enum rq_flag_bits { #define REQ_PREEMPT (1 << __REQ_PREEMPT) #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) +#define REQ_ALLOCED (1 << __REQ_ALLOCED) #define BLK_MAX_CDB 16 @@ -264,7 +266,6 @@ struct request { int ref_count; request_queue_t *q; - struct request_list *rl; void *special; char *buffer; -- cgit v1.2.3-71-gd317 From cdd6026217c0e4cda2efce1bdc318661bef1f66f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 09:32:07 +0200 Subject: [PATCH] Remove ->rq_status from struct request After Christophs SCSI change, the only usage left is RQ_ACTIVE and RQ_INACTIVE. The block layer sets RQ_INACTIVE right before freeing the request, so any check for RQ_INACTIVE in a driver is a bug and indicates use-after-free. So kill/clean the remaining users, straight forward. Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 2 -- block/ll_rw_blk.c | 3 --- drivers/block/paride/pd.c | 1 - drivers/block/swim3.c | 4 ++-- drivers/block/swim_iop.c | 4 ++-- drivers/fc4/fc.c | 1 - drivers/ide/ide-floppy.c | 3 +-- drivers/ide/ide-io.c | 1 - drivers/ide/ide-tape.c | 4 ++-- drivers/scsi/ide-scsi.c | 2 +- drivers/scsi/scsi.c | 2 +- include/linux/blkdev.h | 13 +++++-------- 12 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 5fa4c8e258a4..fda4a3940698 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) __u64 offset; int len; - if(req->rq_status == RQ_INACTIVE) return(1); - /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index f7462502bfdf..b94a396aa624 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -283,7 +283,6 @@ static inline void rq_init(request_queue_t *q, struct request *rq) INIT_LIST_HEAD(&rq->donelist); rq->errors = 0; - rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); @@ -2685,8 +2684,6 @@ void __blk_put_request(request_queue_t *q, struct request *req) elv_completed_request(q, req); - req->rq_status = RQ_INACTIVE; - /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 500d2ebb41e4..38578b9dbfd1 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -719,7 +719,6 @@ static int pd_special_command(struct pd_unit *disk, memset(&rq, 0, sizeof(rq)); rq.errors = 0; - rq.rq_status = RQ_ACTIVE; rq.rq_disk = disk->gd; rq.ref_count = 1; rq.end_io_data = &wait; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index cc42e762396f..f2305ee792a1 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs) printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", req->rq_disk->disk_name, req->cmd, (long)req->sector, req->nr_sectors, req->buffer); - printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n", - req->rq_status, req->errors, req->current_nr_sectors); + printk(" errors=%d current_nr_sectors=%ld\n", + req->errors, req->current_nr_sectors); #endif if (req->sector < 0 || req->sector >= fs->total_secs) { diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c index 89e3c2f8b776..dfda796eba56 100644 --- a/drivers/block/swim_iop.c +++ b/drivers/block/swim_iop.c @@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs) printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", CURRENT->rq_disk->disk_name, CURRENT->cmd, CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer); - printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n", - CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors); + printk(" errors=%d current_nr_sectors=%ld\n", + CURRENT->errors, CURRENT->current_nr_sectors); #endif if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) { diff --git a/drivers/fc4/fc.c b/drivers/fc4/fc.c index 1a159e8843ca..22d17474755f 100644 --- a/drivers/fc4/fc.c +++ b/drivers/fc4/fc.c @@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt) */ fc->rst_pkt->device->host->eh_action = &sem; - fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY; fc->rst_pkt->done = fcp_scsi_reset_done; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 0edc32204915..8ccee9c769f8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request idefloppy_pc_t *pc; unsigned long block = (unsigned long)block_s; - debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n", - rq->rq_status, + debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n", rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->flags, rq->errors); debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, " diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a3ffb04436bd..38479a29d3e1 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio int must_wait = (action == ide_wait || action == ide_head_wait); rq->errors = 0; - rq->rq_status = RQ_ACTIVE; /* * we need to hold an extra reference to request for safe inspection diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 66f9678d2f10..2ebc3760f261 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, #if IDETAPE_DEBUG_LOG #if 0 if (tape->debug_level >= 5) - printk(KERN_INFO "ide-tape: rq_status: %d, " - "dev: %s, cmd: %ld, errors: %d\n", rq->rq_status, + printk(KERN_INFO "ide-tape: %d, " + "dev: %s, cmd: %ld, errors: %d\n", rq->rq_disk->disk_name, rq->cmd[0], rq->errors); #endif if (tape->debug_level >= 2) diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 65b19695ebe2..1427a41e8441 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -708,7 +708,7 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc) static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block) { #if IDESCSI_DEBUG_LOG - printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors); + printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors); printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); #endif /* IDESCSI_DEBUG_LOG */ diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 7a054f9d1ee3..12f6639dda2d 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1065,7 +1065,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery) spin_lock_irqsave(&sdev->list_lock, flags); list_for_each_entry(scmd, &sdev->cmd_list, list) { - if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) { + if (scmd->request) { /* * If we are unable to remove the timer, it means * that the command has already timed out or diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4c1dd046e27..8a3e309e0842 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -243,8 +243,6 @@ struct request { void *completion_data; - int rq_status; /* should split this into a few status bits */ - int errors; struct gendisk *rq_disk; unsigned long start_time; @@ -262,14 +260,16 @@ struct request { unsigned short ioprio; - int tag; - - int ref_count; request_queue_t *q; void *special; char *buffer; + int tag; + int errors; + + int ref_count; + /* * when request is used as a packet command carrier */ @@ -456,9 +456,6 @@ struct request_queue struct mutex sysfs_lock; }; -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 - #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ -- cgit v1.2.3-71-gd317 From cb78b285c8f9d59b0d4e4f6a54c2977ce1d9b880 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 09:32:57 +0200 Subject: [PATCH] Drop useless bio passing in may_queue/set_request API It's not needed for anything, so kill the bio passing. Signed-off-by: Jens Axboe --- block/as-iosched.c | 2 +- block/cfq-iosched.c | 5 ++--- block/elevator.c | 9 ++++----- block/ll_rw_blk.c | 9 ++++----- include/linux/elevator.h | 8 ++++---- 5 files changed, 15 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index 02eb9333898f..66015bc79e6f 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1284,7 +1284,7 @@ static void as_work_handler(void *data) spin_unlock_irqrestore(q->queue_lock, flags); } -static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int as_may_queue(request_queue_t *q, int rw) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3c5fd9c2c205..2ac35aacbbf9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1752,7 +1752,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, return ELV_MQUEUE_MAY; } -static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int cfq_may_queue(request_queue_t *q, int rw) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -1817,8 +1817,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq) * Allocate cfq data structures associated with this request. */ static int -cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) +cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; diff --git a/block/elevator.c b/block/elevator.c index 924b81b08f86..788d2d81994c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -796,13 +796,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) return NULL; } -int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) +int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) { elevator_t *e = q->elevator; if (e->ops->elevator_set_req_fn) - return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); + return e->ops->elevator_set_req_fn(q, rq, gfp_mask); rq->elevator_private = NULL; return 0; @@ -816,12 +815,12 @@ void elv_put_request(request_queue_t *q, struct request *rq) e->ops->elevator_put_req_fn(q, rq); } -int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) +int elv_may_queue(request_queue_t *q, int rw) { elevator_t *e = q->elevator; if (e->ops->elevator_may_queue_fn) - return e->ops->elevator_may_queue_fn(q, rw, bio); + return e->ops->elevator_may_queue_fn(q, rw); return ELV_MQUEUE_MAY; } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index b94a396aa624..b1ea941f6dc3 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2003,8 +2003,7 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq) } static inline struct request * -blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, - int priv, gfp_t gfp_mask) +blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -2018,7 +2017,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, rq->cmd_flags = rw | REQ_ALLOCED; if (priv) { - if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { + if (unlikely(elv_set_request(q, rq, gfp_mask))) { mempool_free(rq, q->rq.rq_pool); return NULL; } @@ -2109,7 +2108,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, struct io_context *ioc = NULL; int may_queue, priv; - may_queue = elv_may_queue(q, rw, bio); + may_queue = elv_may_queue(q, rw); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; @@ -2157,7 +2156,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); + rq = blk_alloc_request(q, rw, priv, gfp_mask); if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 0e7b1a733919..cc81645a3e18 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -14,9 +14,9 @@ typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); typedef int (elevator_queue_empty_fn) (request_queue_t *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); -typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); +typedef int (elevator_may_queue_fn) (request_queue_t *, int); -typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); +typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, gfp_t); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); @@ -105,9 +105,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); extern int elv_register_queue(request_queue_t *q); extern void elv_unregister_queue(request_queue_t *q); -extern int elv_may_queue(request_queue_t *, int, struct bio *); +extern int elv_may_queue(request_queue_t *, int); extern void elv_completed_request(request_queue_t *, struct request *); -extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); +extern int elv_set_request(request_queue_t *, struct request *, gfp_t); extern void elv_put_request(request_queue_t *, struct request *); /* -- cgit v1.2.3-71-gd317 From e6a1c874a064e7d07f24986aba7cd537b7f4a25d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Aug 2006 09:00:21 +0200 Subject: [PATCH] struct request: shrink and optimize some more Move some members around and unionize completion_data and rb_node since they cannot ever be used at the same time. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8a3e309e0842..a1e288069e2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -211,6 +211,8 @@ struct request { struct list_head queuelist; struct list_head donelist; + request_queue_t *q; + unsigned int cmd_flags; enum rq_cmd_type_bits cmd_type; @@ -219,12 +221,12 @@ struct request { */ sector_t sector; /* next sector to submit */ + sector_t hard_sector; /* next sector to complete */ unsigned long nr_sectors; /* no. of sectors left to submit */ + unsigned long hard_nr_sectors; /* no. of sectors left to complete */ /* no. of sectors left to submit in the current segment */ unsigned int current_nr_sectors; - sector_t hard_sector; /* next sector to complete */ - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ /* no. of sectors left to complete in the current segment */ unsigned int hard_cur_sectors; @@ -232,7 +234,15 @@ struct request { struct bio *biotail; struct hlist_node hash; /* merge hash */ - struct rb_node rb_node; /* sort/lookup */ + /* + * The rb_node is only used inside the io scheduler, requests + * are pruned when moved to the dispatch queue. So let the + * completion_data share space with the rb_node. + */ + union { + struct rb_node rb_node; /* sort/lookup */ + void *completion_data; + }; /* * two pointers are available for the IO schedulers, if they need @@ -241,8 +251,6 @@ struct request { void *elevator_private; void *elevator_private2; - void *completion_data; - struct gendisk *rq_disk; unsigned long start_time; @@ -260,8 +268,6 @@ struct request { unsigned short ioprio; - request_queue_t *q; - void *special; char *buffer; -- cgit v1.2.3-71-gd317 From fc46379daf90dce57bf765c81d3b39f55150aac2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Aug 2006 09:05:44 +0200 Subject: [PATCH] cfq-iosched: kill cfq_exit_lock cfq_exit_lock is protecting two things now: - The per-ioc rbtree of cfq_io_contexts - The per-cfqd linked list of cfq_io_contexts The per-cfqd linked list can be protected by the queue lock, as it is (by definition) per cfqd as the queue lock is. The per-ioc rbtree is mainly used and updated by the process itself only. The only outside use is the io priority changing. If we move the priority changing to not browsing the rbtree, we can remove any locking from the rbtree updates and lookup completely. Let the sys_ioprio syscall just mark processes as having the iopriority changed and lazily update the private cfq io contexts the next time io is queued, and we can remove this locking as well. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 54 ++++++++++++++++---------------------------------- block/ll_rw_blk.c | 2 +- fs/ioprio.c | 4 ++-- include/linux/blkdev.h | 2 +- 4 files changed, 21 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ec24284e9d39..33e0b0c5e31d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -31,8 +31,6 @@ static int cfq_slice_idle = HZ / 125; #define CFQ_KEY_ASYNC (0) -static DEFINE_SPINLOCK(cfq_exit_lock); - /* * for the hash of cfqq inside the cfqd */ @@ -1084,12 +1082,6 @@ static void cfq_free_io_context(struct io_context *ioc) complete(ioc_gone); } -static void cfq_trim(struct io_context *ioc) -{ - ioc->set_ioprio = NULL; - cfq_free_io_context(ioc); -} - static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (unlikely(cfqq == cfqd->active_queue)) @@ -1101,6 +1093,10 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) static void __cfq_exit_single_io_context(struct cfq_data *cfqd, struct cfq_io_context *cic) { + list_del_init(&cic->queue_list); + smp_wmb(); + cic->key = NULL; + if (cic->cfqq[ASYNC]) { cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); cic->cfqq[ASYNC] = NULL; @@ -1110,9 +1106,6 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); cic->cfqq[SYNC] = NULL; } - - cic->key = NULL; - list_del_init(&cic->queue_list); } @@ -1123,27 +1116,23 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) { struct cfq_data *cfqd = cic->key; - WARN_ON(!irqs_disabled()); - if (cfqd) { request_queue_t *q = cfqd->queue; - spin_lock(q->queue_lock); + spin_lock_irq(q->queue_lock); __cfq_exit_single_io_context(cfqd, cic); - spin_unlock(q->queue_lock); + spin_unlock_irq(q->queue_lock); } } static void cfq_exit_io_context(struct io_context *ioc) { struct cfq_io_context *__cic; - unsigned long flags; struct rb_node *n; /* * put the reference this task is holding to the various queues */ - spin_lock_irqsave(&cfq_exit_lock, flags); n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1152,8 +1141,6 @@ static void cfq_exit_io_context(struct io_context *ioc) cfq_exit_single_io_context(__cic); n = rb_next(n); } - - spin_unlock_irqrestore(&cfq_exit_lock, flags); } static struct cfq_io_context * @@ -1248,15 +1235,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic) spin_unlock(cfqd->queue->queue_lock); } -/* - * callback from sys_ioprio_set, irqs are disabled - */ -static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) +static void cfq_ioc_set_ioprio(struct io_context *ioc) { struct cfq_io_context *cic; struct rb_node *n; - spin_lock(&cfq_exit_lock); + ioc->ioprio_changed = 0; n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1265,10 +1249,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) changed_ioprio(cic); n = rb_next(n); } - - spin_unlock(&cfq_exit_lock); - - return 0; } static struct cfq_queue * @@ -1336,10 +1316,8 @@ out: static void cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) { - spin_lock(&cfq_exit_lock); + WARN_ON(!list_empty(&cic->queue_list)); rb_erase(&cic->rb_node, &ioc->cic_root); - list_del_init(&cic->queue_list); - spin_unlock(&cfq_exit_lock); kmem_cache_free(cfq_ioc_pool, cic); atomic_dec(&ioc_count); } @@ -1385,7 +1363,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, cic->ioc = ioc; cic->key = cfqd; - ioc->set_ioprio = cfq_ioc_set_ioprio; restart: parent = NULL; p = &ioc->cic_root.rb_node; @@ -1407,11 +1384,12 @@ restart: BUG(); } - spin_lock(&cfq_exit_lock); rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); + + spin_lock_irq(cfqd->queue->queue_lock); list_add(&cic->queue_list, &cfqd->cic_list); - spin_unlock(&cfq_exit_lock); + spin_unlock_irq(cfqd->queue->queue_lock); } /* @@ -1441,6 +1419,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) cfq_cic_link(cfqd, ioc, cic); out: + smp_read_barrier_depends(); + if (unlikely(ioc->ioprio_changed)) + cfq_ioc_set_ioprio(ioc); + return cic; err: put_io_context(ioc); @@ -1945,7 +1927,6 @@ static void cfq_exit_queue(elevator_t *e) cfq_shutdown_timer_wq(cfqd); - spin_lock(&cfq_exit_lock); spin_lock_irq(q->queue_lock); if (cfqd->active_queue) @@ -1960,7 +1941,6 @@ static void cfq_exit_queue(elevator_t *e) } spin_unlock_irq(q->queue_lock); - spin_unlock(&cfq_exit_lock); cfq_shutdown_timer_wq(cfqd); @@ -2149,7 +2129,7 @@ static struct elevator_type iosched_cfq = { .elevator_may_queue_fn = cfq_may_queue, .elevator_init_fn = cfq_init_queue, .elevator_exit_fn = cfq_exit_queue, - .trim = cfq_trim, + .trim = cfq_free_io_context, }, .elevator_attrs = cfq_attrs, .elevator_name = "cfq", diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index e25b4cd2dcd1..508548b834f1 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3654,7 +3654,7 @@ struct io_context *current_io_context(gfp_t gfp_flags) if (ret) { atomic_set(&ret->refcount, 1); ret->task = current; - ret->set_ioprio = NULL; + ret->ioprio_changed = 0; ret->last_waited = jiffies; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; diff --git a/fs/ioprio.c b/fs/ioprio.c index 78b1deae3fa2..0fd1089d7bf6 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) /* see wmb() in current_io_context() */ smp_read_barrier_depends(); - if (ioc && ioc->set_ioprio) - ioc->set_ioprio(ioc, ioprio); + if (ioc) + ioc->ioprio_changed = 1; task_unlock(task); return 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a1e288069e2e..79cb9fa8034a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -90,7 +90,7 @@ struct io_context { atomic_t refcount; struct task_struct *task; - int (*set_ioprio)(struct io_context *, unsigned int); + unsigned int ioprio_changed; /* * For request batching -- cgit v1.2.3-71-gd317 From 4a893e837bb470867d74c05d6c6b97bba5a96185 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 22 Jul 2006 15:37:43 +0200 Subject: [PATCH] elevator: define ioc counting mechanism None of the in-kernel primitives for handling "atomic" counting seem to be a good fit. We need something that is essentially free for incrementing/decrementing, while the read side may be more expensive as we only ever need to do that when a device is removed from the kernel. Use a per-cpu variable for maintaining a per-cpu ioc count and define a reading mechanism that just sums up the values. Signed-off-by: Jens Axboe --- include/linux/elevator.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index cc81645a3e18..9c5a04f6114c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -1,6 +1,8 @@ #ifndef _LINUX_ELEVATOR_H #define _LINUX_ELEVATOR_H +#include + typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct bio *); @@ -178,4 +180,27 @@ enum { INIT_LIST_HEAD(&(rq)->donelist); \ } while (0) +/* + * io context count accounting + */ +#define elv_ioc_count_mod(name, __val) \ + do { \ + preempt_disable(); \ + __get_cpu_var(name) += (__val); \ + preempt_enable(); \ + } while (0) + +#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1) +#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1) + +#define elv_ioc_count_read(name) \ +({ \ + unsigned long __val = 0; \ + int __cpu; \ + smp_wmb(); \ + for_each_possible_cpu(__cpu) \ + __val += per_cpu(name, __cpu); \ + __val; \ +}) + #endif -- cgit v1.2.3-71-gd317 From a3b05e8f58c95dfccbf2c824d0c68e5990571f24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 09:36:46 +0200 Subject: [PATCH] Kill various deprecated/unused block layer defines/functions Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 1 - drivers/block/cpqarray.c | 1 - include/linux/blkdev.h | 29 ----------------------------- include/linux/fs.h | 1 - 4 files changed, 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 2cd3391ff878..c211065ad829 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status) int nr_sectors = bio_sectors(bio); bio->bi_next = NULL; - blk_finished_io(len); bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO); bio = xbh; } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 78082edc14b4..4abc193314ee 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok) xbh = bio->bi_next; bio->bi_next = NULL; - blk_finished_io(nr_sectors); bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO); bio = xbh; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79cb9fa8034a..593386162f47 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -578,12 +578,6 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) -/* - * noop, requests are automagically marked as active/inactive by I/O - * scheduler -- see elv_next_request - */ -#define blk_queue_headactive(q, head_active) - /* * q->prep_rq_fn return values */ @@ -621,11 +615,6 @@ static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) -struct sec_size { - unsigned block_size; - unsigned block_size_bits; -}; - extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); @@ -690,16 +679,6 @@ extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); extern void blk_complete_request(struct request *); -static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) -{ - if (blk_fs_request(rq)) - return (nr_bytes >= (rq->hard_nr_sectors << 9)); - else if (blk_pc_request(rq)) - return nr_bytes >= rq->data_len; - - return 0; -} - /* * end_that_request_first/chunk() takes an uptodate argument. we account * any value <= as an io error. 0 means -EIO for compatability reasons, @@ -807,14 +786,6 @@ static inline int queue_dma_alignment(request_queue_t *q) return retval; } -static inline int bdev_dma_aligment(struct block_device *bdev) -{ - return queue_dma_alignment(bdev_get_queue(bdev)); -} - -#define blk_finished_io(nsects) do { } while (0) -#define blk_started_io(nsects) do { } while (0) - /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 6eafbe309483..9306f63bf77e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,7 +79,6 @@ extern int dir_notify_enable; #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define SPECIAL 4 /* For non-blockdevice requests in request queue */ #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3-71-gd317 From b5deef901282628d88c784f4c9d2f0583ec3b355 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Jul 2006 23:39:40 +0200 Subject: [PATCH] Make sure all block/io scheduler setups are node aware Some were kmalloc_node(), some were still kmalloc(). Change them all to kmalloc_node(). Signed-off-by: Jens Axboe --- block/as-iosched.c | 8 ++++---- block/cfq-iosched.c | 13 +++++++------ block/elevator.c | 11 ++++++----- block/ll_rw_blk.c | 13 +++++++------ block/noop-iosched.c | 2 +- include/linux/blkdev.h | 3 +-- 6 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index 8e1fef1eafc9..f6dc95489316 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -210,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void) * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */ -static struct io_context *as_get_io_context(void) +static struct io_context *as_get_io_context(int node) { - struct io_context *ioc = get_io_context(GFP_ATOMIC); + struct io_context *ioc = get_io_context(GFP_ATOMIC, node); if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); if (!ioc->aic) { @@ -1148,7 +1148,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) data_dir = rq_is_sync(rq); - rq->elevator_private = as_get_io_context(); + rq->elevator_private = as_get_io_context(q->node); if (RQ_IOC(rq)) { as_update_iohist(ad, RQ_IOC(rq)->aic, rq); @@ -1292,7 +1292,7 @@ static int as_may_queue(request_queue_t *q, int rw) struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - ioc = as_get_io_context(); + ioc = as_get_io_context(q->node); if (ad->io_context == ioc) ret = ELV_MQUEUE_MUST; put_io_context(ioc); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 85f1d87e86d4..0452108a932e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1148,8 +1148,9 @@ static void cfq_exit_io_context(struct io_context *ioc) static struct cfq_io_context * cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { - struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); + struct cfq_io_context *cic; + cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node); if (cic) { memset(cic, 0, sizeof(*cic)); cic->last_end_request = jiffies; @@ -1277,11 +1278,11 @@ retry: * free memory. */ spin_unlock_irq(cfqd->queue->queue_lock); - new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask|__GFP_NOFAIL); + new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node); spin_lock_irq(cfqd->queue->queue_lock); goto retry; } else { - cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node); if (!cfqq) goto out; } @@ -1407,7 +1408,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) might_sleep_if(gfp_mask & __GFP_WAIT); - ioc = get_io_context(gfp_mask); + ioc = get_io_context(gfp_mask, cfqd->queue->node); if (!ioc) return NULL; @@ -1955,7 +1956,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) struct cfq_data *cfqd; int i; - cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); + cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node); if (!cfqd) return NULL; @@ -1970,7 +1971,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_LIST_HEAD(&cfqd->empty_list); INIT_LIST_HEAD(&cfqd->cic_list); - cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); + cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node); if (!cfqd->cfq_hash) goto out_free; diff --git a/block/elevator.c b/block/elevator.c index 788d2d81994c..e643291793a4 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -161,12 +161,12 @@ __setup("elevator=", elevator_setup); static struct kobj_type elv_ktype; -static elevator_t *elevator_alloc(struct elevator_type *e) +static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) { elevator_t *eq; int i; - eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); + eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); if (unlikely(!eq)) goto err; @@ -178,7 +178,8 @@ static elevator_t *elevator_alloc(struct elevator_type *e) eq->kobj.ktype = &elv_ktype; mutex_init(&eq->sysfs_lock); - eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL); + eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, + GFP_KERNEL, q->node); if (!eq->hash) goto err; @@ -224,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name) e = elevator_get("noop"); } - eq = elevator_alloc(e); + eq = elevator_alloc(q, e); if (!eq) return -ENOMEM; @@ -987,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) /* * Allocate new elevator */ - e = elevator_alloc(new_e); + e = elevator_alloc(q, new_e); if (!e) return 0; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 4b7b4461e8d6..c6dfa889206c 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); static void init_request_from_bio(struct request *req, struct bio *bio); static int __make_request(request_queue_t *q, struct bio *bio); +static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* * For the allocated request tables @@ -2114,7 +2115,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { if (rl->count[rw]+1 >= q->nr_requests) { - ioc = current_io_context(GFP_ATOMIC); + ioc = current_io_context(GFP_ATOMIC, q->node); /* * The queue will fill after this allocation, so set * it as full, and mark this process as "batching". @@ -2234,7 +2235,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw, * up to a big batch of them for a small period time. * See ioc_batching, ioc_set_batching */ - ioc = current_io_context(GFP_NOIO); + ioc = current_io_context(GFP_NOIO, q->node); ioc_set_batching(q, ioc); spin_lock_irq(q->queue_lock); @@ -3641,7 +3642,7 @@ void exit_io_context(void) * but since the current task itself holds a reference, the context can be * used in general code, so long as it stays within `current` context. */ -struct io_context *current_io_context(gfp_t gfp_flags) +static struct io_context *current_io_context(gfp_t gfp_flags, int node) { struct task_struct *tsk = current; struct io_context *ret; @@ -3650,7 +3651,7 @@ struct io_context *current_io_context(gfp_t gfp_flags) if (likely(ret)) return ret; - ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); + ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); if (ret) { atomic_set(&ret->refcount, 1); ret->task = current; @@ -3674,10 +3675,10 @@ EXPORT_SYMBOL(current_io_context); * * This is always called in the context of the task which submitted the I/O. */ -struct io_context *get_io_context(gfp_t gfp_flags) +struct io_context *get_io_context(gfp_t gfp_flags, int node) { struct io_context *ret; - ret = current_io_context(gfp_flags); + ret = current_io_context(gfp_flags, node); if (likely(ret)) atomic_inc(&ret->refcount); return ret; diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 56a7c620574f..79af43179421 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e) { struct noop_data *nd; - nd = kmalloc(sizeof(*nd), GFP_KERNEL); + nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); if (!nd) return NULL; INIT_LIST_HEAD(&nd->queue); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 593386162f47..6609371c303e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -104,8 +104,7 @@ struct io_context { void put_io_context(struct io_context *ioc); void exit_io_context(void); -struct io_context *current_io_context(gfp_t gfp_flags); -struct io_context *get_io_context(gfp_t gfp_flags); +struct io_context *get_io_context(gfp_t gfp_flags, int node); void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); -- cgit v1.2.3-71-gd317 From dc72ef4ae35c2016fb594bcc85ce871376682174 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 Jul 2006 14:54:05 +0200 Subject: [PATCH] Add blk_start_queueing() helper CFQ implements this on its own now, but it's really block layer knowledge. Tells a device queue to start dispatching requests to the driver, taking care to unplug if needed. Also fixes the issue where as/cfq will invoke a stopped queue, which we really don't want. Signed-off-by: Jens Axboe --- block/as-iosched.c | 3 +-- block/cfq-iosched.c | 22 ++++------------------ block/ll_rw_blk.c | 25 ++++++++++++++++++++----- include/linux/blkdev.h | 1 + 4 files changed, 26 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/block/as-iosched.c b/block/as-iosched.c index f6dc95489316..bc13dc0b29be 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1280,8 +1280,7 @@ static void as_work_handler(void *data) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (!as_queue_empty(q)) - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6fb1613d44d7..9f684cc66bd1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1552,19 +1552,6 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) __cfq_set_active_queue(cfqd, cfqq); } -/* - * should really be a ll_rw_blk.c helper - */ -static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) -{ - request_queue_t *q = cfqd->queue; - - if (!blk_queue_plugged(q)) - q->request_fn(q); - else - __generic_unplug_device(q); -} - /* * Called when a new fs request (rq) is added (to cfqq). Check if there's * something we should do about it @@ -1593,7 +1580,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cic == cfqd->active_cic && del_timer(&cfqd->idle_slice_timer)) { cfq_slice_expired(cfqd, 0); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } return; } @@ -1614,7 +1601,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) { cfq_mark_cfqq_must_dispatch(cfqq); del_timer(&cfqd->idle_slice_timer); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } } else if (cfq_should_preempt(cfqd, cfqq, rq)) { /* @@ -1624,7 +1611,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, */ cfq_preempt_queue(cfqd, cfqq); cfq_mark_cfqq_must_dispatch(cfqq); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } } @@ -1832,8 +1819,7 @@ static void cfq_kick_queue(void *data) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c6dfa889206c..346be9ae31f6 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2266,6 +2266,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) } EXPORT_SYMBOL(blk_get_request); +/** + * blk_start_queueing - initiate dispatch of requests to device + * @q: request queue to kick into gear + * + * This is basically a helper to remove the need to know whether a queue + * is plugged or not if someone just wants to initiate dispatch of requests + * for this queue. + * + * The queue lock must be held with interrupts disabled. + */ +void blk_start_queueing(request_queue_t *q) +{ + if (!blk_queue_plugged(q)) + q->request_fn(q); + else + __generic_unplug_device(q); +} +EXPORT_SYMBOL(blk_start_queueing); + /** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted @@ -2333,11 +2352,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq, drive_stat_acct(rq, rq->nr_sectors, 1); __elv_add_request(q, rq, where, 0); - - if (blk_queue_plugged(q)) - __generic_unplug_device(q); - else - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6609371c303e..5d8e288db163 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -635,6 +635,7 @@ extern void blk_stop_queue(request_queue_t *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); +extern void blk_start_queueing(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); -- cgit v1.2.3-71-gd317 From 5404bc7a87b9949cf61e0174b21f80e73239ab25 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Aug 2006 09:01:02 +0200 Subject: [PATCH] Allow file systems to differentiate between data and meta reads We can use this information for making more intelligent priority decisions, and it will also be useful for blktrace. Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 2 ++ include/linux/bio.h | 2 ++ include/linux/blkdev.h | 3 +++ include/linux/fs.h | 1 + 4 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index e3980ec747c1..57992ae511c2 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2884,6 +2884,8 @@ static void init_request_from_bio(struct request *req, struct bio *bio) if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; + if (bio_rw_meta(bio)) + req->cmd_flags |= REQ_RW_META; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; diff --git a/include/linux/bio.h b/include/linux/bio.h index 76bdaeab6f62..711c321a7011 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,6 +148,7 @@ struct bio { #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 +#define BIO_RW_META 5 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -178,6 +179,7 @@ struct bio { #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) +#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) /* * will die diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5d8e288db163..2c01a90998a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -180,6 +180,7 @@ enum rq_flag_bits { __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ __REQ_ALLOCED, /* request came from our alloc pool */ + __REQ_RW_META, /* metadata io request */ __REQ_NR_BITS, /* stops here */ }; @@ -200,6 +201,7 @@ enum rq_flag_bits { #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) #define REQ_ALLOCED (1 << __REQ_ALLOCED) +#define REQ_RW_META (1 << __REQ_RW_META) #define BLK_MAX_CDB 16 @@ -543,6 +545,7 @@ enum { * We regard a request as sync, if it's a READ or a SYNC write. */ #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) +#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) static inline int blk_queue_full(struct request_queue *q, int rw) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 9306f63bf77e..d68c37af4dfb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -80,6 +80,7 @@ extern int dir_notify_enable; #define READA 2 /* read-ahead - don't block if no resources */ #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) +#define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3-71-gd317 From 7457e6e2d7406c7009e9ad03db1335fe93b5fb71 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 23 Jul 2006 02:12:01 +0200 Subject: [PATCH] blktrace: support for logging metadata reads Signed-off-by: Jens Axboe --- block/blktrace.c | 5 ++++- include/linux/blktrace_api.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/blktrace.c b/block/blktrace.c index 8ff33441d8a2..2592f215a905 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK /* * Bio action bits of interest */ -static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) }; +static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; /* * More could be added as needed, taking care to increment the decrementer @@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) #define trace_ahead_bit(rw) \ (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) +#define trace_meta_bit(rw) \ + (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= bio_act[trace_barrier_bit(rw)]; what |= bio_act[trace_sync_bit(rw)]; what |= bio_act[trace_ahead_bit(rw)]; + what |= bio_act[trace_meta_bit(rw)]; pid = tsk->pid; if (unlikely(act_log_check(bt, what, sector, pid))) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index ea48eb1b3fd3..b99a714fcac6 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -20,6 +20,7 @@ enum blktrace_cat { BLK_TC_PC = 1 << 9, /* pc requests */ BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ + BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; -- cgit v1.2.3-71-gd317 From cf9a2ae8d49948f861b56e5333530e491a9da190 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:05:54 +0100 Subject: [PATCH] BLOCK: Move functions out of buffer code [try #6] Move some functions out of the buffering code that aren't strictly buffering specific. This is a precursor to being able to disable the block layer. (*) Moved some stuff out of fs/buffer.c: (*) The file sync and general sync stuff moved to fs/sync.c. (*) The superblock sync stuff moved to fs/super.c. (*) do_invalidatepage() moved to mm/truncate.c. (*) try_to_release_page() moved to mm/filemap.c. (*) Moved some related declarations between header files: (*) declarations for do_invalidatepage() and try_to_release_page() moved to linux/mm.h. (*) __set_page_dirty_buffers() moved to linux/buffer_head.h. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- fs/buffer.c | 174 -------------------------------------------- fs/super.c | 31 ++++++++ fs/sync.c | 113 ++++++++++++++++++++++++++++ include/linux/buffer_head.h | 3 +- include/linux/fs.h | 1 + include/linux/mm.h | 4 +- mm/filemap.c | 30 ++++++++ mm/page-writeback.c | 1 + mm/truncate.c | 24 ++++++ 9 files changed, 204 insertions(+), 177 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 3b6d701073e7..16cfbcd254f1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); -static void __fsync_super(struct super_block *sb) -{ - sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); -} - /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block @@ -259,118 +234,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) } EXPORT_SYMBOL(thaw_bdev); -/* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. - */ -static void do_sync(unsigned long wait) -{ - wakeup_pdflush(0); - sync_inodes(0); /* All mappings, inodes and their blockdevs */ - DQUOT_SYNC(NULL); - sync_supers(); /* Write the superblocks */ - sync_filesystems(0); /* Start syncing the filesystems */ - sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ - if (!wait) - printk("Emergency Sync complete\n"); - if (unlikely(laptop_mode)) - laptop_sync_completion(); -} - -asmlinkage long sys_sync(void) -{ - do_sync(1); - return 0; -} - -void emergency_sync(void) -{ - pdflush_operation(do_sync, 0); -} - -/* - * Generic function to fsync a file. - * - * filp may be NULL if called via the msync of a vma. - */ - -int file_fsync(struct file *filp, struct dentry *dentry, int datasync) -{ - struct inode * inode = dentry->d_inode; - struct super_block * sb; - int ret, err; - - /* sync the inode to buffers */ - ret = write_inode_now(inode, 0); - - /* sync the superblock to buffers */ - sb = inode->i_sb; - lock_super(sb); - if (sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - - /* .. finally sync the buffers to disk */ - err = sync_blockdev(sb->s_bdev); - if (!ret) - ret = err; - return ret; -} - -long do_fsync(struct file *file, int datasync) -{ - int ret; - int err; - struct address_space *mapping = file->f_mapping; - - if (!file->f_op || !file->f_op->fsync) { - /* Why? We can still call filemap_fdatawrite */ - ret = -EINVAL; - goto out; - } - - ret = filemap_fdatawrite(mapping); - - /* - * We need to protect against concurrent writers, which could cause - * livelocks in fsync_buffers_list(). - */ - mutex_lock(&mapping->host->i_mutex); - err = file->f_op->fsync(file, file->f_dentry, datasync); - if (!ret) - ret = err; - mutex_unlock(&mapping->host->i_mutex); - err = filemap_fdatawait(mapping); - if (!ret) - ret = err; -out: - return ret; -} - -static long __do_fsync(unsigned int fd, int datasync) -{ - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if (file) { - ret = do_fsync(file, datasync); - fput(file); - } - return ret; -} - -asmlinkage long sys_fsync(unsigned int fd) -{ - return __do_fsync(fd, 0); -} - -asmlinkage long sys_fdatasync(unsigned int fd) -{ - return __do_fsync(fd, 1); -} - /* * Various filesystems appear to want __find_get_block to be non-blocking. * But it's the page lock which protects the buffers. To get around this, @@ -1550,35 +1413,6 @@ static void discard_buffer(struct buffer_head * bh) unlock_buffer(bh); } -/** - * try_to_release_page() - release old fs-specific metadata on a page - * - * @page: the page which the kernel is trying to free - * @gfp_mask: memory allocation flags (and I/O mode) - * - * The address_space is to try to release any data against the page - * (presumably at page->private). If the release was successful, return `1'. - * Otherwise return zero. - * - * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). - * - * NOTE: @gfp_mask may go away, and this function may become non-blocking. - */ -int try_to_release_page(struct page *page, gfp_t gfp_mask) -{ - struct address_space * const mapping = page->mapping; - - BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) - return 0; - - if (mapping && mapping->a_ops->releasepage) - return mapping->a_ops->releasepage(page, gfp_mask); - return try_to_free_buffers(page); -} -EXPORT_SYMBOL(try_to_release_page); - /** * block_invalidatepage - invalidate part of all of a buffer-backed page * @@ -1630,14 +1464,6 @@ out: } EXPORT_SYMBOL(block_invalidatepage); -void do_invalidatepage(struct page *page, unsigned long offset) -{ - void (*invalidatepage)(struct page *, unsigned long); - invalidatepage = page->mapping->a_ops->invalidatepage ? : - block_invalidatepage; - (*invalidatepage)(page, offset); -} - /* * We attach and possibly dirty the buffers atomically wrt * __set_page_dirty_buffers() via private_lock. try_to_free_buffers diff --git a/fs/super.c b/fs/super.c index 6987824d0dce..15671cd048b1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -220,6 +220,37 @@ static int grab_super(struct super_block *s) __releases(sb_lock) return 0; } +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. Requires a second blkdev + * flush by the caller to complete the operation. + */ +void __fsync_super(struct super_block *sb) +{ + sync_inodes_sb(sb, 0); + DQUOT_SYNC(sb); + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); +} + +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_super(struct super_block *sb) +{ + __fsync_super(sb); + return sync_blockdev(sb->s_bdev); +} + /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill diff --git a/fs/sync.c b/fs/sync.c index 955aef04da28..1de747b5ddb9 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -10,10 +10,123 @@ #include #include #include +#include +#include #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) +/* + * sync everything. Start out by waking pdflush, because that writes back + * all queues in parallel. + */ +static void do_sync(unsigned long wait) +{ + wakeup_pdflush(0); + sync_inodes(0); /* All mappings, inodes and their blockdevs */ + DQUOT_SYNC(NULL); + sync_supers(); /* Write the superblocks */ + sync_filesystems(0); /* Start syncing the filesystems */ + sync_filesystems(wait); /* Waitingly sync the filesystems */ + sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ + if (!wait) + printk("Emergency Sync complete\n"); + if (unlikely(laptop_mode)) + laptop_sync_completion(); +} + +asmlinkage long sys_sync(void) +{ + do_sync(1); + return 0; +} + +void emergency_sync(void) +{ + pdflush_operation(do_sync, 0); +} + +/* + * Generic function to fsync a file. + * + * filp may be NULL if called via the msync of a vma. + */ +int file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode * inode = dentry->d_inode; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + lock_super(sb); + if (sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} + +long do_fsync(struct file *file, int datasync) +{ + int ret; + int err; + struct address_space *mapping = file->f_mapping; + + if (!file->f_op || !file->f_op->fsync) { + /* Why? We can still call filemap_fdatawrite */ + ret = -EINVAL; + goto out; + } + + ret = filemap_fdatawrite(mapping); + + /* + * We need to protect against concurrent writers, which could cause + * livelocks in fsync_buffers_list(). + */ + mutex_lock(&mapping->host->i_mutex); + err = file->f_op->fsync(file, file->f_dentry, datasync); + if (!ret) + ret = err; + mutex_unlock(&mapping->host->i_mutex); + err = filemap_fdatawait(mapping); + if (!ret) + ret = err; +out: + return ret; +} + +static long __do_fsync(unsigned int fd, int datasync) +{ + struct file *file; + int ret = -EBADF; + + file = fget(fd); + if (file) { + ret = do_fsync(file, datasync); + fput(file); + } + return ret; +} + +asmlinkage long sys_fsync(unsigned int fd) +{ + return __do_fsync(fd, 0); +} + +asmlinkage long sys_fdatasync(unsigned int fd) +{ + return __do_fsync(fd, 1); +} + /* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 737e407d0cd1..64b508e35d2a 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -190,9 +190,7 @@ extern int buffer_heads_over_limit; * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ -int try_to_release_page(struct page * page, gfp_t gfp_mask); void block_invalidatepage(struct page *page, unsigned long offset); -void do_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); @@ -302,4 +300,5 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +extern int __set_page_dirty_buffers(struct page *page); #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index d68c37af4dfb..1728142ec4b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1546,6 +1546,7 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, extern long do_fsync(struct file *file, int datasync); extern void sync_supers(void); extern void sync_filesystems(int wait); +extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b703b6d4358..4edf1934e5ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -743,7 +743,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); -int __set_page_dirty_buffers(struct page *page); +extern int try_to_release_page(struct page * page, gfp_t gfp_mask); +extern void do_invalidatepage(struct page *page, unsigned long offset); + int __set_page_dirty_nobuffers(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); diff --git a/mm/filemap.c b/mm/filemap.c index 3277f3b23524..d6846de08887 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2491,3 +2491,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, } return retval; } + +/** + * try_to_release_page() - release old fs-specific metadata on a page + * + * @page: the page which the kernel is trying to free + * @gfp_mask: memory allocation flags (and I/O mode) + * + * The address_space is to try to release any data against the page + * (presumably at page->private). If the release was successful, return `1'. + * Otherwise return zero. + * + * The @gfp_mask argument specifies whether I/O may be performed to release + * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). + * + * NOTE: @gfp_mask may go away, and this function may become non-blocking. + */ +int try_to_release_page(struct page *page, gfp_t gfp_mask) +{ + struct address_space * const mapping = page->mapping; + + BUG_ON(!PageLocked(page)); + if (PageWriteback(page)) + return 0; + + if (mapping && mapping->a_ops->releasepage) + return mapping->a_ops->releasepage(page, gfp_mask); + return try_to_free_buffers(page); +} + +EXPORT_SYMBOL(try_to_release_page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 488b7088557c..9fdcc7903956 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate diff --git a/mm/truncate.c b/mm/truncate.c index a654928323dc..cd3e34b816db 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -17,6 +17,30 @@ do_invalidatepage */ +/** + * do_invalidatepage - invalidate part of all of a page + * @page: the page which is affected + * @offset: the index of the truncation point + * + * do_invalidatepage() is called when all or part of the page has become + * invalidated by a truncate operation. + * + * do_invalidatepage() does not have to release all buffers, but it must + * ensure that no dirty buffer is left outside @offset and that no I/O + * is underway against any of the blocks which are outside the truncation + * point. Because the caller is about to free (and possibly reuse) those + * blocks on-disk. + */ +void do_invalidatepage(struct page *page, unsigned long offset) +{ + void (*invalidatepage)(struct page *, unsigned long); + invalidatepage = page->mapping->a_ops->invalidatepage; + if (!invalidatepage) + invalidatepage = block_invalidatepage; + if (invalidatepage) + (*invalidatepage)(page, offset); +} + static inline void truncate_partial_page(struct page *page, unsigned partial) { memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); -- cgit v1.2.3-71-gd317 From 0d67a46df0125e20d14f12dbd3646f1f1bf23e8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:05:56 +0100 Subject: [PATCH] BLOCK: Remove duplicate declaration of exit_io_context() [try #6] Remove the duplicate declaration of exit_io_context() from linux/sched.h. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- include/linux/sched.h | 1 - kernel/exit.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a06fc89cf6e5..fc4a9873ec10 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -710,7 +710,6 @@ extern unsigned int max_cache_size; struct io_context; /* See blkdev.h */ -void exit_io_context(void); struct cpuset; #define NGROUPS_SMALL 32 diff --git a/kernel/exit.c b/kernel/exit.c index 2e4c13cba95a..c189de2927ab 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -38,6 +38,7 @@ #include #include /* for audit_free() */ #include +#include #include #include -- cgit v1.2.3-71-gd317 From 07f3f05c1e3052b8656129b2a5aca9f888241a34 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 30 Sep 2006 20:52:18 +0200 Subject: [PATCH] BLOCK: Move extern declarations out of fs/*.c into header files [try #6] Create a new header file, fs/internal.h, for common definitions local to the sources in the fs/ directory. Move extern definitions that should be in header files from fs/*.c to fs/internal.h or other main header files where they span directories. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- arch/mips/kernel/signal_n32.c | 4 ++-- fs/binfmt_elf.c | 1 - fs/block_dev.c | 1 + fs/char_dev.c | 1 + fs/compat.c | 10 +++------- fs/compat_ioctl.c | 2 -- fs/dcache.c | 4 +--- fs/fs-writeback.c | 3 +-- fs/internal.h | 36 ++++++++++++++++++++++++++++++++++++ fs/namespace.c | 3 +-- include/linux/ramfs.h | 1 + include/linux/tty.h | 3 +++ kernel/compat.c | 2 ++ 13 files changed, 52 insertions(+), 19 deletions(-) create mode 100644 fs/internal.h (limited to 'include/linux') diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 477c5334ec1b..50c17eaa7f25 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -42,6 +42,8 @@ #include "signal-common.h" +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + /* * Including would give use the 64-bit syscall numbers ... */ @@ -81,8 +83,6 @@ struct rt_sigframe_n32 { #endif }; -extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat); - save_static_function(sysn32_rt_sigsuspend); __attribute_used__ noinline static int _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6eb48e1446ec..bad52433de69 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -46,7 +46,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); -extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); #ifndef elf_addr_t #define elf_addr_t unsigned long diff --git a/fs/block_dev.c b/fs/block_dev.c index 4346468139e8..20f7333ba372 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -22,6 +22,7 @@ #include #include #include +#include "internal.h" struct bdev_inode { struct block_device bdev; diff --git a/fs/char_dev.c b/fs/char_dev.c index 1f3285affa39..a885f46ca001 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -24,6 +24,7 @@ #ifdef CONFIG_KMOD #include #endif +#include "internal.h" /* * capabilities for /dev/mem, /dev/kmem and similar directly mappable character diff --git a/fs/compat.c b/fs/compat.c index ce982f6e8c80..122b4e3992b5 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -52,11 +52,12 @@ #include #include #include - -extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); +#include "internal.h" int compat_log = 1; +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + int compat_printk(const char *fmt, ...) { va_list ap; @@ -313,9 +314,6 @@ out: #define IOCTL_HASHSIZE 256 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; -extern struct ioctl_trans ioctl_start[]; -extern int ioctl_table_size; - static inline unsigned long ioctl32_hash(unsigned long cmd) { return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; @@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data) return 0; } -extern int copy_mount_options (const void __user *, unsigned long *); - #define SMBFS_NAME "smbfs" #define NCPFS_NAME "ncpfs" #define NFS4_NAME "nfs4" diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 4063a9396977..ab74c9bd55fe 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1279,8 +1279,6 @@ static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } -extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg); - #ifdef CONFIG_VT static int vt_check(struct file *file) diff --git a/fs/dcache.c b/fs/dcache.c index 17b392a2049e..fc2faa44f8d1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; @@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly; EXPORT_SYMBOL(d_genocide); -extern void bdev_cache_init(void); -extern void chrdev_init(void); - void __init vfs_caches_init_early(void) { dcache_init_early(); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 892643dc9af1..0639024d83a9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -22,8 +22,7 @@ #include #include #include - -extern struct super_block *blockdev_superblock; +#include "internal.h" /** * __mark_inode_dirty - internal function diff --git a/fs/internal.h b/fs/internal.h new file mode 100644 index 000000000000..c21ecd37b1e7 --- /dev/null +++ b/fs/internal.h @@ -0,0 +1,36 @@ +/* fs/ internal definitions + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +/* + * block_dev.c + */ +extern struct super_block *blockdev_superblock; +extern void __init bdev_cache_init(void); + +/* + * char_dev.c + */ +extern void __init chrdev_init(void); + +/* + * compat_ioctl.c + */ +#ifdef CONFIG_COMPAT +extern struct ioctl_trans ioctl_start[]; +extern int ioctl_table_size; +#endif + +/* + * namespace.c + */ +extern int copy_mount_options(const void __user *, unsigned long *); diff --git a/fs/namespace.c b/fs/namespace.c index 6ede3a539ed8..66d921e14fee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -24,12 +24,11 @@ #include #include #include +#include #include #include #include "pnode.h" -extern int __init init_rootfs(void); - /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 00b340ba6612..b160fb18e8d6 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -17,5 +17,6 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); extern const struct file_operations ramfs_file_operations; extern struct vm_operations_struct generic_file_vm_ops; +extern int __init init_rootfs(void); #endif diff --git a/include/linux/tty.h b/include/linux/tty.h index ea4c2605f8da..44091c0db0b4 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -307,6 +307,9 @@ extern void tty_ldisc_put(int); extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); +extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg); + extern struct mutex tty_mutex; /* n_tty.c */ diff --git a/kernel/compat.c b/kernel/compat.c index 75573e5d27b0..b4fbd838cd77 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -26,6 +26,8 @@ #include +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || -- cgit v1.2.3-71-gd317 From 811d736f9e8013966e1a5a930c0db09508bdbb15 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:06:09 +0100 Subject: [PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try #6] Dissociate the generic_writepages() function from the mpage stuff, moving its declaration to linux/mm.h and actually emitting a full implementation into mm/page-writeback.c. The implementation is a partial duplicate of mpage_writepages() with all BIO references removed. It is used by NFS to do writeback. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 + fs/mpage.c | 2 + include/linux/mpage.h | 6 --- include/linux/writeback.h | 2 + mm/page-writeback.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/block_dev.c b/fs/block_dev.c index 20f7333ba372..335c38bb86eb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/mpage.c b/fs/mpage.c index 1e4598247d0b..692a3e578fc8 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -693,6 +693,8 @@ out: * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. + * + * If you fix this you should check generic_writepages() also! */ int mpage_writepages(struct address_space *mapping, diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 3ca880463c47..517c098fde20 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -20,9 +20,3 @@ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); - -static inline int -generic_writepages(struct address_space *mapping, struct writeback_control *wbc) -{ - return mpage_writepages(mapping, wbc, NULL); -} diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9d4074ecd0cd..4f4d98addb44 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping) } int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); +extern int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9fdcc7903956..ecf27839c203 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -551,6 +552,139 @@ void __init page_writeback_init(void) register_cpu_notifier(&ratelimit_nb); } +/** + * generic_writepages - walk the list of dirty pages of the given + * address space and writepage() all of them. + * + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * + * This is a library function, which implements the writepages() + * address_space_operation. + * + * If a page is already under I/O, generic_writepages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + * + * Derived from mpage_writepages() - if you fix this you should check that + * also! + */ +int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + int (*writepage)(struct page *page, struct writeback_control *wbc); + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + writepage = mapping->a_ops->writepage; + + /* deal with chardevs and other special file */ + if (!writepage) + return 0; + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc); + if (ret) { + if (ret == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + +EXPORT_SYMBOL(generic_writepages); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; -- cgit v1.2.3-71-gd317 From 863d5b822c02d0e7215fb84ca79e9f8c3e35f04e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:06:14 +0100 Subject: [PATCH] BLOCK: Move the loop device ioctl compat stuff to the loop driver [try #6] Move the loop device ioctl compat stuff from fs/compat_ioctl.c to the loop driver so that the loop header file doesn't need to be included. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- drivers/block/loop.c | 160 +++++++++++++++++++++++++++++++++++++++++++ fs/compat_ioctl.c | 68 ------------------ include/linux/compat_ioctl.h | 6 -- 3 files changed, 160 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 68b0471ad5a6..d6bb8da955a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include /* for invalidate_bdev() */ @@ -1165,6 +1166,162 @@ static int lo_ioctl(struct inode * inode, struct file * file, return err; } +#ifdef CONFIG_COMPAT +struct compat_loop_info { + compat_int_t lo_number; /* ioctl r/o */ + compat_dev_t lo_device; /* ioctl r/o */ + compat_ulong_t lo_inode; /* ioctl r/o */ + compat_dev_t lo_rdevice; /* ioctl r/o */ + compat_int_t lo_offset; + compat_int_t lo_encrypt_type; + compat_int_t lo_encrypt_key_size; /* ioctl w/o */ + compat_int_t lo_flags; /* ioctl r/o */ + char lo_name[LO_NAME_SIZE]; + unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ + compat_ulong_t lo_init[2]; + char reserved[4]; +}; + +/* + * Transfer 32-bit compatibility structure in userspace to 64-bit loop info + * - noinlined to reduce stack space usage in main part of driver + */ +static noinline int +loop_info64_from_compat(const struct compat_loop_info *arg, + struct loop_info64 *info64) +{ + struct compat_loop_info info; + + if (copy_from_user(&info, arg, sizeof(info))) + return -EFAULT; + + memset(info64, 0, sizeof(*info64)); + info64->lo_number = info.lo_number; + info64->lo_device = info.lo_device; + info64->lo_inode = info.lo_inode; + info64->lo_rdevice = info.lo_rdevice; + info64->lo_offset = info.lo_offset; + info64->lo_sizelimit = 0; + info64->lo_encrypt_type = info.lo_encrypt_type; + info64->lo_encrypt_key_size = info.lo_encrypt_key_size; + info64->lo_flags = info.lo_flags; + info64->lo_init[0] = info.lo_init[0]; + info64->lo_init[1] = info.lo_init[1]; + if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) + memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); + else + memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); + memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); + return 0; +} + +/* + * Transfer 64-bit loop info to 32-bit compatibility structure in userspace + * - noinlined to reduce stack space usage in main part of driver + */ +static noinline int +loop_info64_to_compat(const struct loop_info64 *info64, + struct compat_loop_info __user *arg) +{ + struct compat_loop_info info; + + memset(&info, 0, sizeof(info)); + info.lo_number = info64->lo_number; + info.lo_device = info64->lo_device; + info.lo_inode = info64->lo_inode; + info.lo_rdevice = info64->lo_rdevice; + info.lo_offset = info64->lo_offset; + info.lo_encrypt_type = info64->lo_encrypt_type; + info.lo_encrypt_key_size = info64->lo_encrypt_key_size; + info.lo_flags = info64->lo_flags; + info.lo_init[0] = info64->lo_init[0]; + info.lo_init[1] = info64->lo_init[1]; + if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) + memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); + else + memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); + memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); + + /* error in case values were truncated */ + if (info.lo_device != info64->lo_device || + info.lo_rdevice != info64->lo_rdevice || + info.lo_inode != info64->lo_inode || + info.lo_offset != info64->lo_offset || + info.lo_init[0] != info64->lo_init[0] || + info.lo_init[1] != info64->lo_init[1]) + return -EOVERFLOW; + + if (copy_to_user(arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +static int +loop_set_status_compat(struct loop_device *lo, + const struct compat_loop_info __user *arg) +{ + struct loop_info64 info64; + int ret; + + ret = loop_info64_from_compat(arg, &info64); + if (ret < 0) + return ret; + return loop_set_status(lo, &info64); +} + +static int +loop_get_status_compat(struct loop_device *lo, + struct compat_loop_info __user *arg) +{ + struct loop_info64 info64; + int err = 0; + + if (!arg) + err = -EINVAL; + if (!err) + err = loop_get_status(lo, &info64); + if (!err) + err = loop_info64_to_compat(&info64, arg); + return err; +} + +static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + struct loop_device *lo = inode->i_bdev->bd_disk->private_data; + int err; + + lock_kernel(); + switch(cmd) { + case LOOP_SET_STATUS: + mutex_lock(&lo->lo_ctl_mutex); + err = loop_set_status_compat( + lo, (const struct compat_loop_info __user *) arg); + mutex_unlock(&lo->lo_ctl_mutex); + break; + case LOOP_GET_STATUS: + mutex_lock(&lo->lo_ctl_mutex); + err = loop_get_status_compat( + lo, (struct compat_loop_info __user *) arg); + mutex_unlock(&lo->lo_ctl_mutex); + break; + case LOOP_CLR_FD: + case LOOP_GET_STATUS64: + case LOOP_SET_STATUS64: + arg = (unsigned long) compat_ptr(arg); + case LOOP_SET_FD: + case LOOP_CHANGE_FD: + err = lo_ioctl(inode, file, cmd, arg); + break; + default: + err = -ENOIOCTLCMD; + break; + } + unlock_kernel(); + return err; +} +#endif + static int lo_open(struct inode *inode, struct file *file) { struct loop_device *lo = inode->i_bdev->bd_disk->private_data; @@ -1192,6 +1349,9 @@ static struct block_device_operations lo_fops = { .open = lo_open, .release = lo_release, .ioctl = lo_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = lo_compat_ioctl, +#endif }; /* diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ab74c9bd55fe..3b0cf7fbd95a 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -1214,71 +1213,6 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return err; } -struct loop_info32 { - compat_int_t lo_number; /* ioctl r/o */ - compat_dev_t lo_device; /* ioctl r/o */ - compat_ulong_t lo_inode; /* ioctl r/o */ - compat_dev_t lo_rdevice; /* ioctl r/o */ - compat_int_t lo_offset; - compat_int_t lo_encrypt_type; - compat_int_t lo_encrypt_key_size; /* ioctl w/o */ - compat_int_t lo_flags; /* ioctl r/o */ - char lo_name[LO_NAME_SIZE]; - unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ - compat_ulong_t lo_init[2]; - char reserved[4]; -}; - -static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - mm_segment_t old_fs = get_fs(); - struct loop_info l; - struct loop_info32 __user *ul; - int err = -EINVAL; - - ul = compat_ptr(arg); - switch(cmd) { - case LOOP_SET_STATUS: - err = get_user(l.lo_number, &ul->lo_number); - err |= __get_user(l.lo_device, &ul->lo_device); - err |= __get_user(l.lo_inode, &ul->lo_inode); - err |= __get_user(l.lo_rdevice, &ul->lo_rdevice); - err |= __copy_from_user(&l.lo_offset, &ul->lo_offset, - 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); - if (err) { - err = -EFAULT; - } else { - set_fs (KERNEL_DS); - err = sys_ioctl (fd, cmd, (unsigned long)&l); - set_fs (old_fs); - } - break; - case LOOP_GET_STATUS: - set_fs (KERNEL_DS); - err = sys_ioctl (fd, cmd, (unsigned long)&l); - set_fs (old_fs); - if (!err) { - err = put_user(l.lo_number, &ul->lo_number); - err |= __put_user(l.lo_device, &ul->lo_device); - err |= __put_user(l.lo_inode, &ul->lo_inode); - err |= __put_user(l.lo_rdevice, &ul->lo_rdevice); - err |= __copy_to_user(&ul->lo_offset, &l.lo_offset, - (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); - if (err) - err = -EFAULT; - } - break; - default: { - static int count; - if (++count <= 20) - printk("%s: Unknown loop ioctl cmd, fd(%d) " - "cmd(%08x) arg(%08lx)\n", - __FUNCTION__, fd, cmd, arg); - } - } - return err; -} - #ifdef CONFIG_VT static int vt_check(struct file *file) @@ -2808,8 +2742,6 @@ HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans) HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans) HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans) HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans) -HANDLE_IOCTL(LOOP_SET_STATUS, loop_status) -HANDLE_IOCTL(LOOP_GET_STATUS, loop_status) #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout) #ifdef CONFIG_VT diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index bea0255196c4..98d40e08ba6e 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -395,12 +395,6 @@ COMPATIBLE_IOCTL(DVD_WRITE_STRUCT) COMPATIBLE_IOCTL(DVD_AUTH) /* pktcdvd */ COMPATIBLE_IOCTL(PACKET_CTRL_CMD) -/* Big L */ -ULONG_IOCTL(LOOP_SET_FD) -ULONG_IOCTL(LOOP_CHANGE_FD) -COMPATIBLE_IOCTL(LOOP_CLR_FD) -COMPATIBLE_IOCTL(LOOP_GET_STATUS64) -COMPATIBLE_IOCTL(LOOP_SET_STATUS64) /* Big A */ /* sparc only */ /* Big Q for sound/OSS */ -- cgit v1.2.3-71-gd317 From 36695673b012096228ebdc1b39a6a5850daa474e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:06:16 +0100 Subject: [PATCH] BLOCK: Move common FS-specific ioctls to linux/fs.h [try #6] Move common FS-specific ioctls from linux/ext2_fs.h to linux/fs.h as FS_IOC_* and FS_IOC32_* and have the users of them use those as a base. Also move the GETFLAGS/SETFLAGS flags to linux/fs.h as FS_*_FL macros, and then have the other users use them as a base. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- fs/cifs/ioctl.c | 7 +++-- fs/compat_ioctl.c | 15 ----------- fs/hfsplus/hfsplus_fs.h | 8 ++---- fs/hfsplus/ioctl.c | 17 ++++++------ fs/jfs/ioctl.c | 15 +++++------ include/linux/ext2_fs.h | 64 +++++++++++++++++++++++++-------------------- include/linux/ext3_fs.h | 20 +++++++++++--- include/linux/fs.h | 39 +++++++++++++++++++++++++++ include/linux/reiserfs_fs.h | 28 +++++++++----------- 9 files changed, 124 insertions(+), 89 deletions(-) (limited to 'include/linux') diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index b0ea6687ab55..e34c7db00f6f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -22,7 +22,6 @@ */ #include -#include #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep, } break; #ifdef CONFIG_CIFS_POSIX - case EXT2_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: if(CIFS_UNIX_EXTATTR_CAP & caps) { if (pSMBFile == NULL) break; @@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep, &ExtAttrBits, &ExtAttrMask); if(rc == 0) rc = put_user(ExtAttrBits & - EXT2_FL_USER_VISIBLE, + FS_FL_USER_VISIBLE, (int __user *)arg); } break; - case EXT2_IOC_SETFLAGS: + case FS_IOC_SETFLAGS: if(CIFS_UNIX_EXTATTR_CAP & caps) { if(get_user(ExtAttrBits,(int __user *)arg)) { rc = -EFAULT; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3b0cf7fbd95a..bd9c4f49d4e5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -123,21 +123,6 @@ #include #include -/* Aiee. Someone does not find a difference between int and long */ -#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) -#define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) -#define EXT3_IOC32_GETVERSION _IOR('f', 3, int) -#define EXT3_IOC32_SETVERSION _IOW('f', 4, int) -#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) -#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int) -#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG -#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -#endif - -#define EXT2_IOC32_GETVERSION _IOR('v', 1, int) -#define EXT2_IOC32_SETVERSION _IOW('v', 2, int) - static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *f) { diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 8a1ca5ef7ada..3915635b4470 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -246,12 +246,8 @@ struct hfsplus_readdir_data { /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support * chattr/lsattr */ -#define HFSPLUS_IOC_EXT2_GETFLAGS _IOR('f', 1, long) -#define HFSPLUS_IOC_EXT2_SETFLAGS _IOW('f', 2, long) - -#define EXT2_FLAG_IMMUTABLE 0x00000010 /* Immutable file */ -#define EXT2_FLAG_APPEND 0x00000020 /* writes to file may only append */ -#define EXT2_FLAG_NODUMP 0x00000040 /* do not dump file */ +#define HFSPLUS_IOC_EXT2_GETFLAGS FS_IOC_GETFLAGS +#define HFSPLUS_IOC_EXT2_SETFLAGS FS_IOC_SETFLAGS /* diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 13cf848ac833..79fd10402ea3 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, case HFSPLUS_IOC_EXT2_GETFLAGS: flags = 0; if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) - flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */ + flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) - flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */ + flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) - flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */ + flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { if (IS_RDONLY(inode)) @@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) || + if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } /* don't silently ignore unsupported ext2 flags */ - if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND| - EXT2_FLAG_NODUMP)) + if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) return -EOPNOTSUPP; - if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */ + if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ inode->i_flags |= S_IMMUTABLE; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; } else { inode->i_flags &= ~S_IMMUTABLE; HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; } - if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */ + if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ inode->i_flags |= S_APPEND; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; } else { inode->i_flags &= ~S_APPEND; HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; } - if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */ + if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; else HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 67b3774820eb..37db52488262 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -22,13 +21,13 @@ static struct { long jfs_flag; long ext2_flag; } jfs_map[] = { - {JFS_NOATIME_FL, EXT2_NOATIME_FL}, - {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL}, - {JFS_SYNC_FL, EXT2_SYNC_FL}, - {JFS_SECRM_FL, EXT2_SECRM_FL}, - {JFS_UNRM_FL, EXT2_UNRM_FL}, - {JFS_APPEND_FL, EXT2_APPEND_FL}, - {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL}, + {JFS_NOATIME_FL, FS_NOATIME_FL}, + {JFS_DIRSYNC_FL, FS_DIRSYNC_FL}, + {JFS_SYNC_FL, FS_SYNC_FL}, + {JFS_SECRM_FL, FS_SECRM_FL}, + {JFS_UNRM_FL, FS_UNRM_FL}, + {JFS_APPEND_FL, FS_APPEND_FL}, + {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL}, {0, 0}, }; diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 33a1aa107329..153d755376a4 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -165,41 +165,49 @@ struct ext2_group_desc #define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) /* - * Inode flags + * Inode flags (GETFLAGS/SETFLAGS) */ -#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ -#define EXT2_UNRM_FL 0x00000002 /* Undelete */ -#define EXT2_COMPR_FL 0x00000004 /* Compress file */ -#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ -#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ -#define EXT2_NOATIME_FL 0x00000080 /* do not update atime */ +#define EXT2_SECRM_FL FS_SECRM_FL /* Secure deletion */ +#define EXT2_UNRM_FL FS_UNRM_FL /* Undelete */ +#define EXT2_COMPR_FL FS_COMPR_FL /* Compress file */ +#define EXT2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ +#define EXT2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ +#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ +#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ /* Reserved for compression usage... */ -#define EXT2_DIRTY_FL 0x00000100 -#define EXT2_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define EXT2_NOCOMP_FL 0x00000400 /* Don't compress */ -#define EXT2_ECOMPR_FL 0x00000800 /* Compression error */ +#define EXT2_DIRTY_FL FS_DIRTY_FL +#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ +#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ /* End compression flags --- maybe not all used */ -#define EXT2_BTREE_FL 0x00001000 /* btree format dir */ -#define EXT2_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define EXT2_IMAGIC_FL 0x00002000 /* AFS directory */ -#define EXT2_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ -#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ - -#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -#define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */ +#define EXT2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ +#define EXT2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ +#define EXT2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ +#define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ +#define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ +#define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ + +#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ /* * ioctl commands */ -#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) -#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) -#define EXT2_IOC_GETVERSION _IOR('v', 1, long) -#define EXT2_IOC_SETVERSION _IOW('v', 2, long) +#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT2_IOC_GETVERSION FS_IOC_GETVERSION +#define EXT2_IOC_SETVERSION FS_IOC_SETVERSION + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION +#define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION /* * Structure of an inode on the disk diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index cc08f56750da..a7a01ff44669 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -216,20 +216,32 @@ struct ext3_new_group_data { /* * ioctl commands */ -#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) -#define EXT3_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT3_IOC_GETVERSION _IOR('f', 3, long) #define EXT3_IOC_SETVERSION _IOW('f', 4, long) #define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) #define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input) -#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) -#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) +#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) +/* + * ioctl commands in 32 bit emulation + */ +#define EXT3_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT3_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#ifdef CONFIG_JBD_DEBUG +#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) +#endif + /* * Mount options */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 1728142ec4b6..b73a47582dbe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -217,6 +217,45 @@ extern int dir_notify_enable; #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FS_IOC_GETFLAGS _IOR('f', 1, long) +#define FS_IOC_SETFLAGS _IOW('f', 2, long) +#define FS_IOC_GETVERSION _IOR('v', 1, long) +#define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC32_GETFLAGS _IOR('f', 1, int) +#define FS_IOC32_SETFLAGS _IOW('f', 2, int) +#define FS_IOC32_GETVERSION _IOR('v', 1, int) +#define FS_IOC32_SETVERSION _IOW('v', 2, int) + +/* + * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) + */ +#define FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define FS_UNRM_FL 0x00000002 /* Undelete */ +#define FS_COMPR_FL 0x00000004 /* Compress file */ +#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define FS_DIRTY_FL 0x00000100 +#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ +#define FS_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define FS_BTREE_FL 0x00001000 /* btree format dir */ +#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ +#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + + #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 28493ffaafe7..0100d6d1d84c 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -807,21 +807,19 @@ struct stat_data_v1 { #define set_sd_v1_first_direct_byte(sdp,v) \ ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) -#include - /* inode flags stored in sd_attrs (nee sd_reserved) */ /* we want common flags to have the same values as in ext2, so chattr(1) will work without problems */ -#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL -#define REISERFS_APPEND_FL EXT2_APPEND_FL -#define REISERFS_SYNC_FL EXT2_SYNC_FL -#define REISERFS_NOATIME_FL EXT2_NOATIME_FL -#define REISERFS_NODUMP_FL EXT2_NODUMP_FL -#define REISERFS_SECRM_FL EXT2_SECRM_FL -#define REISERFS_UNRM_FL EXT2_UNRM_FL -#define REISERFS_COMPR_FL EXT2_COMPR_FL -#define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL +#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define REISERFS_APPEND_FL FS_APPEND_FL +#define REISERFS_SYNC_FL FS_SYNC_FL +#define REISERFS_NOATIME_FL FS_NOATIME_FL +#define REISERFS_NODUMP_FL FS_NODUMP_FL +#define REISERFS_SECRM_FL FS_SECRM_FL +#define REISERFS_UNRM_FL FS_UNRM_FL +#define REISERFS_COMPR_FL FS_COMPR_FL +#define REISERFS_NOTAIL_FL FS_NOTAIL_FL /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ @@ -2168,10 +2166,10 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, #define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) /* define following flags to be the same as in ext2, so that chattr(1), lsattr(1) will work with us. */ -#define REISERFS_IOC_GETFLAGS EXT2_IOC_GETFLAGS -#define REISERFS_IOC_SETFLAGS EXT2_IOC_SETFLAGS -#define REISERFS_IOC_GETVERSION EXT2_IOC_GETVERSION -#define REISERFS_IOC_SETVERSION EXT2_IOC_SETVERSION +#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION +#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION /* Locking primitives */ /* Right now we are still falling back to (un)lock_kernel, but eventually that -- cgit v1.2.3-71-gd317 From 52b499c438ff60991eb3855ca090782569b3e8cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:06:18 +0100 Subject: [PATCH] BLOCK: Move the ReiserFS device ioctl compat stuff to the ReiserFS driver [try #6] Move the ReiserFS device ioctl compat stuff from fs/compat_ioctl.c to the ReiserFS driver so that the ReiserFS header file doesn't need to be included. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- fs/compat_ioctl.c | 12 ------------ fs/reiserfs/dir.c | 3 +++ fs/reiserfs/file.c | 4 ++++ fs/reiserfs/ioctl.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/reiserfs_fs.h | 9 +++++++++ 5 files changed, 51 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index bd9c4f49d4e5..0346f2ab57c4 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -2014,16 +2013,6 @@ static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg) return ret; } -#define REISERFS_IOC_UNPACK32 _IOW(0xCD,1,int) - -static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr) -{ - if (cmd == REISERFS_IOC_UNPACK32) - cmd = REISERFS_IOC_UNPACK; - - return sys_ioctl(fd,cmd,ptr); -} - struct raw32_config_request { compat_int_t raw_minor; @@ -2784,7 +2773,6 @@ HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64) /* vfat */ HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32) HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32) -HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32) /* Raw devices */ HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 9aabcc0ccd2d..657050ad7430 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -22,6 +22,9 @@ const struct file_operations reiserfs_dir_operations = { .readdir = reiserfs_readdir, .fsync = reiserfs_dir_fsync, .ioctl = reiserfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = reiserfs_compat_ioctl, +#endif }; static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1cfbe857ba27..3e08f7161a3d 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -2,6 +2,7 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ +#include #include #include #include @@ -1568,6 +1569,9 @@ const struct file_operations reiserfs_file_operations = { .read = generic_file_read, .write = reiserfs_file_write, .ioctl = reiserfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = reiserfs_compat_ioctl, +#endif .mmap = generic_file_mmap, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index a986b5e1e288..9c57578cb831 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -9,6 +9,7 @@ #include #include #include +#include static int reiserfs_unpack(struct inode *inode, struct file *filp); @@ -94,6 +95,40 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, } } +#ifdef CONFIG_COMPAT +long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case REISERFS_IOC32_UNPACK: + cmd = REISERFS_IOC_UNPACK; + break; + case REISERFS_IOC32_GETFLAGS: + cmd = REISERFS_IOC_GETFLAGS; + break; + case REISERFS_IOC32_SETFLAGS: + cmd = REISERFS_IOC_SETFLAGS; + break; + case REISERFS_IOC32_GETVERSION: + cmd = REISERFS_IOC_GETVERSION; + break; + case REISERFS_IOC32_SETVERSION: + cmd = REISERFS_IOC_SETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif + /* ** reiserfs_unpack ** Function try to convert tail from direct item into indirect. diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 0100d6d1d84c..9c63abffd7b2 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2161,6 +2161,8 @@ __u32 r5_hash(const signed char *msg, int len); /* prototypes from ioctl.c */ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +long reiserfs_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); /* ioctl's command */ #define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) @@ -2171,6 +2173,13 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, #define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION #define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION +/* the 32 bit compat definitions with int argument */ +#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) +#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION +#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION + /* Locking primitives */ /* Right now we are still falling back to (un)lock_kernel, but eventually that would evolve into real per-fs locks */ -- cgit v1.2.3-71-gd317 From 52a700c5675f399c07e6e57328291e57f13ef3bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:06:23 +0100 Subject: [PATCH] BLOCK: Move the Ext3 device ioctl compat stuff to the Ext3 driver [try #6] Move the Ext3 device ioctl compat stuff from fs/compat_ioctl.c to the Ext3 driver so that the Ext3 header file doesn't need to be included. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- fs/compat_ioctl.c | 27 ------------------------ fs/ext3/dir.c | 3 +++ fs/ext3/file.c | 3 +++ fs/ext3/ioctl.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/ext3_fs.h | 6 ++++++ 5 files changed, 66 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3594668559af..e5eb0f10f05a 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -45,8 +45,6 @@ #include #include #include -#include -#include #include #include #include @@ -158,22 +156,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } -static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break; - case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break; - case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break; - case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break; - case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break; -#ifdef CONFIG_JBD_DEBUG - case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break; -#endif - } - return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); -} - struct compat_video_event { int32_t type; compat_time_t timestamp; @@ -2712,15 +2694,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl) HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl) HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl) #endif -HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl) -COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD) -#ifdef CONFIG_JBD_DEBUG -HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl) -#endif /* One SMB ioctl needs translations. */ #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 429acbb4e064..d0b54f30b914 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = { .read = generic_read_dir, .readdir = ext3_readdir, /* we take BKL. needed?*/ .ioctl = ext3_ioctl, /* BKL held */ +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif .fsync = ext3_sync_file, /* BKL held */ #ifdef CONFIG_EXT3_INDEX .release = ext3_release_dir, diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 994efd189f4e..74ff20f9d09b 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = { .readv = generic_file_readv, .writev = generic_file_writev, .ioctl = ext3_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif .mmap = generic_file_mmap, .open = generic_file_open, .release = ext3_release_file, diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 3a6b012d120c..12daa6869572 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -13,9 +13,10 @@ #include #include #include +#include +#include #include - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) { @@ -252,3 +253,55 @@ flags_err: return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT3_IOC32_GETFLAGS: + cmd = EXT3_IOC_GETFLAGS; + break; + case EXT3_IOC32_SETFLAGS: + cmd = EXT3_IOC_SETFLAGS; + break; + case EXT3_IOC32_GETVERSION: + cmd = EXT3_IOC_GETVERSION; + break; + case EXT3_IOC32_SETVERSION: + cmd = EXT3_IOC_SETVERSION; + break; + case EXT3_IOC32_GROUP_EXTEND: + cmd = EXT3_IOC_GROUP_EXTEND; + break; + case EXT3_IOC32_GETVERSION_OLD: + cmd = EXT3_IOC_GETVERSION_OLD; + break; + case EXT3_IOC32_SETVERSION_OLD: + cmd = EXT3_IOC_SETVERSION_OLD; + break; +#ifdef CONFIG_JBD_DEBUG + case EXT3_IOC32_WAIT_FOR_READONLY: + cmd = EXT3_IOC_WAIT_FOR_READONLY; + break; +#endif + case EXT3_IOC32_GETRSVSZ: + cmd = EXT3_IOC_GETRSVSZ; + break; + case EXT3_IOC32_SETRSVSZ: + cmd = EXT3_IOC_SETRSVSZ; + break; + case EXT3_IOC_GROUP_ADD: + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index a7a01ff44669..11cca1bdc0c7 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -233,6 +233,8 @@ struct ext3_new_group_data { /* * ioctl commands in 32 bit emulation */ +#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS #define EXT3_IOC32_GETVERSION _IOR('f', 3, int) #define EXT3_IOC32_SETVERSION _IOW('f', 4, int) #define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) @@ -241,6 +243,9 @@ struct ext3_new_group_data { #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) #endif +#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + /* * Mount options @@ -824,6 +829,7 @@ extern void ext3_set_aops(struct inode *inode); /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, unsigned long); +extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long); /* namei.c */ extern int ext3_orphan_add(handle_t *, struct inode *); -- cgit v1.2.3-71-gd317 From 9361401eb7619c033e2394e4f9f6d410d6719ac7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 30 Sep 2006 20:45:40 +0200 Subject: [PATCH] BLOCK: Make it possible to disable the block layer [try #6] Make it possible to disable the block layer. Not all embedded devices require it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require the block layer to be present. This patch does the following: (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev support. (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls an item that uses the block layer. This includes: (*) Block I/O tracing. (*) Disk partition code. (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS. (*) The SCSI layer. As far as I can tell, even SCSI chardevs use the block layer to do scheduling. Some drivers that use SCSI facilities - such as USB storage - end up disabled indirectly from this. (*) Various block-based device drivers, such as IDE and the old CDROM drivers. (*) MTD blockdev handling and FTL. (*) JFFS - which uses set_bdev_super(), something it could avoid doing by taking a leaf out of JFFS2's book. (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and linux/elevator.h contingent on CONFIG_BLOCK being set. sector_div() is, however, still used in places, and so is still available. (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and parts of linux/fs.h. (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK. (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK. (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK is not enabled. (*) fs/no-block.c is created to hold out-of-line stubs and things that are required when CONFIG_BLOCK is not set: (*) Default blockdev file operations (to give error ENODEV on opening). (*) Makes some /proc changes: (*) /proc/devices does not list any blockdevs. (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK. (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK. (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if given command other than Q_SYNC or if a special device is specified. (*) In init/do_mounts.c, no reference is made to the blockdev routines if CONFIG_BLOCK is not defined. This does not prohibit NFS roots or JFFS2. (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return error ENOSYS by way of cond_syscall if so). (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if CONFIG_BLOCK is not set, since they can't then happen. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- block/Kconfig | 20 ++++++++++++++++++ block/Kconfig.iosched | 3 +++ block/Makefile | 2 +- drivers/block/Kconfig | 4 ++++ drivers/cdrom/Kconfig | 2 +- drivers/char/Kconfig | 1 + drivers/char/random.c | 4 ++++ drivers/ide/Kconfig | 4 ++++ drivers/md/Kconfig | 3 +++ drivers/message/i2o/Kconfig | 2 +- drivers/mmc/Kconfig | 2 +- drivers/mmc/Makefile | 3 ++- drivers/mtd/Kconfig | 12 +++++------ drivers/mtd/devices/Kconfig | 2 +- drivers/s390/block/Kconfig | 2 +- drivers/scsi/Kconfig | 2 ++ fs/Kconfig | 31 ++++++++++++++++++++------- fs/Makefile | 14 +++++++++---- fs/compat_ioctl.c | 18 ++++++++++++++++ fs/internal.h | 6 ++++++ fs/no-block.c | 22 +++++++++++++++++++ fs/partitions/Makefile | 2 +- fs/proc/proc_misc.c | 11 +++++++++- fs/quota.c | 44 ++++++++++++++++++++++++++------------ fs/super.c | 4 ++++ fs/xfs/Kconfig | 1 + include/linux/blkdev.h | 50 +++++++++++++++++++++++++++++++------------- include/linux/buffer_head.h | 16 ++++++++++++++ include/linux/compat_ioctl.h | 2 ++ include/linux/elevator.h | 3 +++ include/linux/fs.h | 25 +++++++++++++++++++--- include/linux/genhd.h | 4 ++++ include/linux/mpage.h | 3 +++ include/linux/raid/md.h | 3 +++ include/linux/raid/md_k.h | 3 +++ include/scsi/scsi_tcq.h | 3 ++- init/Kconfig | 2 +- init/do_mounts.c | 13 +++++++++++- kernel/sys_ni.c | 5 +++++ mm/Makefile | 2 +- mm/filemap.c | 4 ++++ mm/migrate.c | 2 ++ mm/page-writeback.c | 8 ++++--- mm/truncate.c | 2 ++ 44 files changed, 308 insertions(+), 63 deletions(-) create mode 100644 fs/no-block.c (limited to 'include/linux') diff --git a/block/Kconfig b/block/Kconfig index b6f5f0a79655..9af6c614dfde 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -1,6 +1,24 @@ # # Block layer core configuration # +config BLOCK + bool "Enable the block layer" + default y + help + This permits the block layer to be removed from the kernel if it's not + needed (on some embedded devices for example). If this option is + disabled, then blockdev files will become unusable and some + filesystems (such as ext3) will become unavailable. + + This option will also disable SCSI character devices and USB storage + since they make use of various block layer definitions and + facilities. + + Say Y here unless you know you really don't want to mount disks and + suchlike. + +if BLOCK + #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64 #for instance. config LBD @@ -33,4 +51,6 @@ config LSF If unsure, say Y. +endif + source block/Kconfig.iosched diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 48d090e266fc..903f0d3b6852 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -1,3 +1,4 @@ +if BLOCK menu "IO Schedulers" @@ -67,3 +68,5 @@ config DEFAULT_IOSCHED default "noop" if DEFAULT_NOOP endmenu + +endif diff --git a/block/Makefile b/block/Makefile index c05de0e0037f..4b84d0d5947b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -2,7 +2,7 @@ # Makefile for the kernel block layer # -obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o +obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_AS) += as-iosched.o diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index b5382cedf0c0..422e31d5f8e5 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -2,6 +2,8 @@ # Block device driver configuration # +if BLOCK + menu "Block devices" config BLK_DEV_FD @@ -468,3 +470,5 @@ config ATA_OVER_ETH devices like the Coraid EtherDrive (R) Storage Blade. endmenu + +endif diff --git a/drivers/cdrom/Kconfig b/drivers/cdrom/Kconfig index ff5652d40619..4b12e9031fb3 100644 --- a/drivers/cdrom/Kconfig +++ b/drivers/cdrom/Kconfig @@ -3,7 +3,7 @@ # menu "Old CD-ROM drivers (not SCSI, not IDE)" - depends on ISA + depends on ISA && BLOCK config CD_NO_IDESCSI bool "Support non-SCSI/IDE/ATAPI CDROM drives" diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 4cc619edf424..bde1c665d9f4 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -1006,6 +1006,7 @@ config GPIO_VR41XX config RAW_DRIVER tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)" + depends on BLOCK help The raw driver permits block devices to be bound to /dev/raw/rawN. Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. diff --git a/drivers/char/random.c b/drivers/char/random.c index 4c3a5ca9d8f7..b430a12eb819 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq) add_timer_randomness(irq_timer_state[irq], 0x100 + irq); } +#ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) { if (!disk || !disk->random) @@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk) } EXPORT_SYMBOL(add_disk_randomness); +#endif #define EXTRACT_SIZE 10 @@ -918,6 +920,7 @@ void rand_initialize_irq(int irq) } } +#ifdef CONFIG_BLOCK void rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; @@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk) disk->random = state; } } +#endif static ssize_t random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index b6fb167e20f6..69d627bd537a 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -4,6 +4,8 @@ # Andre Hedrick # +if BLOCK + menu "ATA/ATAPI/MFM/RLL support" config IDE @@ -1082,3 +1084,5 @@ config BLK_DEV_HD endif endmenu + +endif diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index bf869ed03eed..6dd31a291d84 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -2,6 +2,8 @@ # Block device driver configuration # +if BLOCK + menu "Multi-device support (RAID and LVM)" config MD @@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC endmenu +endif diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig index fef677103880..6443392bffff 100644 --- a/drivers/message/i2o/Kconfig +++ b/drivers/message/i2o/Kconfig @@ -88,7 +88,7 @@ config I2O_BUS config I2O_BLOCK tristate "I2O Block OSM" - depends on I2O + depends on I2O && BLOCK ---help--- Include support for the I2O Block OSM. The Block OSM presents disk and other structured block devices to the operating system. If you diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig index 45bcf098e762..f540bd88dc5a 100644 --- a/drivers/mmc/Kconfig +++ b/drivers/mmc/Kconfig @@ -21,7 +21,7 @@ config MMC_DEBUG config MMC_BLOCK tristate "MMC block device driver" - depends on MMC + depends on MMC && BLOCK default y help Say Y here to enable the MMC block device driver support. diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile index d2957e35cc6f..b1f6e03e7aa9 100644 --- a/drivers/mmc/Makefile +++ b/drivers/mmc/Makefile @@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X) += au1xmmc.o obj-$(CONFIG_MMC_OMAP) += omap.o obj-$(CONFIG_MMC_AT91RM9200) += at91_mci.o -mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o +mmc_core-y := mmc.o mmc_sysfs.o +mmc_core-$(CONFIG_BLOCK) += mmc_queue.o ifeq ($(CONFIG_MMC_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index a03e862851db..a304b34c2632 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -166,7 +166,7 @@ config MTD_CHAR config MTD_BLOCK tristate "Caching block device access to MTD devices" - depends on MTD + depends on MTD && BLOCK ---help--- Although most flash chips have an erase size too large to be useful as block devices, it is possible to use MTD devices which are based @@ -188,7 +188,7 @@ config MTD_BLOCK config MTD_BLOCK_RO tristate "Readonly block device access to MTD devices" - depends on MTD_BLOCK!=y && MTD + depends on MTD_BLOCK!=y && MTD && BLOCK help This allows you to mount read-only file systems (such as cramfs) from an MTD device, without the overhead (and danger) of the caching @@ -199,7 +199,7 @@ config MTD_BLOCK_RO config FTL tristate "FTL (Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the original Flash Translation Layer which is part of the PCMCIA specification. It uses a kind of pseudo- @@ -215,7 +215,7 @@ config FTL config NFTL tristate "NFTL (NAND Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the NAND Flash Translation Layer which is used on M-Systems' DiskOnChip devices. It uses a kind of pseudo- @@ -238,7 +238,7 @@ config NFTL_RW config INFTL tristate "INFTL (Inverse NAND Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the Inverse NAND Flash Translation Layer which is used on M-Systems' newer DiskOnChip devices. It @@ -255,7 +255,7 @@ config INFTL config RFD_FTL tristate "Resident Flash Disk (Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the flash translation layer known as the Resident Flash Disk (RFD), as used by the Embedded BIOS diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 16c02b5ccf7e..440f6851da69 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -136,7 +136,7 @@ config MTDRAM_ABS_POS config MTD_BLOCK2MTD tristate "MTD using block device" - depends on MTD + depends on MTD && BLOCK help This driver allows a block device to appear as an MTD. It would generally be used in the following cases: diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 929d6fff6152..b250c5354503 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -1,4 +1,4 @@ -if S390 +if S390 && BLOCK comment "S/390 block device drivers" depends on S390 diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index c4dfcc91ddda..dab082002e6f 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -3,11 +3,13 @@ menu "SCSI device support" config RAID_ATTRS tristate "RAID Transport Class" default n + depends on BLOCK ---help--- Provides RAID config SCSI tristate "SCSI device support" + depends on BLOCK ---help--- If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or any other SCSI device under Linux, say Y and make sure that you know diff --git a/fs/Kconfig b/fs/Kconfig index 4fd9efac29ab..1453d2d164f7 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -4,6 +4,8 @@ menu "File systems" +if BLOCK + config EXT2_FS tristate "Second extended fs support" help @@ -399,6 +401,8 @@ config ROMFS_FS If you don't know whether you need it, then you don't need it: answer N. +endif + config INOTIFY bool "Inotify file change notification support" default y @@ -530,6 +534,7 @@ config FUSE_FS If you want to develop a userspace FS, or if you want to use a filesystem based on FUSE, answer Y or M. +if BLOCK menu "CD-ROM/DVD Filesystems" config ISO9660_FS @@ -597,7 +602,9 @@ config UDF_NLS depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) endmenu +endif +if BLOCK menu "DOS/FAT/NT Filesystems" config FAT_FS @@ -782,6 +789,7 @@ config NTFS_RW It is perfectly safe to say N here. endmenu +endif menu "Pseudo filesystems" @@ -939,7 +947,7 @@ menu "Miscellaneous filesystems" config ADFS_FS tristate "ADFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help The Acorn Disc Filing System is the standard file system of the RiscOS operating system which runs on Acorn's ARM-based Risc PC @@ -967,7 +975,7 @@ config ADFS_FS_RW config AFFS_FS tristate "Amiga FFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help The Fast File System (FFS) is the common file system used on hard disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y @@ -989,7 +997,7 @@ config AFFS_FS config HFS_FS tristate "Apple Macintosh file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL select NLS help If you say Y here, you will be able to mount Macintosh-formatted @@ -1002,6 +1010,7 @@ config HFS_FS config HFSPLUS_FS tristate "Apple Extended HFS file system support" + depends on BLOCK select NLS select NLS_UTF8 help @@ -1015,7 +1024,7 @@ config HFSPLUS_FS config BEFS_FS tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL select NLS help The BeOS File System (BeFS) is the native file system of Be, Inc's @@ -1042,7 +1051,7 @@ config BEFS_DEBUG config BFS_FS tristate "BFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help Boot File System (BFS) is a file system used under SCO UnixWare to allow the bootloader access to the kernel image and other important @@ -1064,7 +1073,7 @@ config BFS_FS config EFS_FS tristate "EFS file system support (read only) (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help EFS is an older file system used for non-ISO9660 CD-ROMs and hard disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer @@ -1079,7 +1088,7 @@ config EFS_FS config JFFS_FS tristate "Journalling Flash File System (JFFS) support" - depends on MTD + depends on MTD && BLOCK help JFFS is the Journaling Flash File System developed by Axis Communications in Sweden, aimed at providing a crash/powerdown-safe @@ -1264,6 +1273,7 @@ endchoice config CRAMFS tristate "Compressed ROM file system support (cramfs)" + depends on BLOCK select ZLIB_INFLATE help Saying Y here includes support for CramFs (Compressed ROM File @@ -1283,6 +1293,7 @@ config CRAMFS config VXFS_FS tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" + depends on BLOCK help FreeVxFS is a file system driver that support the VERITAS VxFS(TM) file system format. VERITAS VxFS(TM) is the standard file system @@ -1300,6 +1311,7 @@ config VXFS_FS config HPFS_FS tristate "OS/2 HPFS file system support" + depends on BLOCK help OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS is the file system used for organizing files on OS/2 hard disk @@ -1316,6 +1328,7 @@ config HPFS_FS config QNX4FS_FS tristate "QNX4 file system support (read only)" + depends on BLOCK help This is the file system used by the real-time operating systems QNX 4 and QNX 6 (the latter is also called QNX RTP). @@ -1343,6 +1356,7 @@ config QNX4FS_RW config SYSV_FS tristate "System V/Xenix/V7/Coherent file system support" + depends on BLOCK help SCO, Xenix and Coherent are commercial Unix systems for Intel machines, and Version 7 was used on the DEC PDP-11. Saying Y @@ -1381,6 +1395,7 @@ config SYSV_FS config UFS_FS tristate "UFS file system support (read only)" + depends on BLOCK help BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, OpenBSD and NeXTstep) use a file system called UFS. Some System V @@ -1959,11 +1974,13 @@ config GENERIC_ACL endmenu +if BLOCK menu "Partition Types" source "fs/partitions/Kconfig" endmenu +endif source "fs/nls/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 46b8cfe497b2..a503e6ce0f32 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -5,12 +5,18 @@ # Rewritten to use lists instead of if-statements. # -obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ - block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ +obj-y := open.o read_write.o file_table.o super.o \ + char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ - seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ - ioprio.o pnode.o drop_caches.o splice.o sync.o + seq_file.o xattr.o libfs.o fs-writeback.o \ + pnode.o drop_caches.o splice.o sync.o + +ifeq ($(CONFIG_BLOCK),y) +obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o +else +obj-y += no-block.o +endif obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY_USER) += inotify_user.o diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index e1a56437040a..64b34533edea 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -645,6 +645,7 @@ out: } #endif +#ifdef CONFIG_BLOCK struct hd_geometry32 { unsigned char heads; unsigned char sectors; @@ -869,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } return err; } +#endif /* CONFIG_BLOCK */ struct sock_fprog32 { unsigned short len; @@ -992,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } +#ifdef CONFIG_BLOCK struct mtget32 { compat_long_t mt_type; compat_long_t mt_resid; @@ -1164,6 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return err; } +#endif /* CONFIG_BLOCK */ #ifdef CONFIG_VT @@ -1491,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) return -EINVAL; } +#ifdef CONFIG_BLOCK static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg) { /* The mkswap binary hard codes it to Intel value :-((( */ @@ -1525,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return sys_ioctl(fd, cmd, (unsigned long)a); } +#endif static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) { return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); } +#ifdef CONFIG_BLOCK /* Fix sizeof(sizeof()) breakage */ #define BLKBSZGET_32 _IOR(0x12,112,int) #define BLKBSZSET_32 _IOW(0x12,113,int) @@ -1551,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, { return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg)); } +#endif /* Bluetooth ioctls */ #define HCIUARTSETPROTO _IOW('U', 200, int) @@ -1571,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, #define HIDPGETCONNLIST _IOR('H', 210, int) #define HIDPGETCONNINFO _IOR('H', 211, int) +#ifdef CONFIG_BLOCK struct floppy_struct32 { compat_uint_t size; compat_uint_t sect; @@ -1895,6 +1904,7 @@ out: kfree(karg); return err; } +#endif struct mtd_oob_buf32 { u_int32_t start; @@ -1936,6 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } +#ifdef CONFIG_BLOCK struct raw32_config_request { compat_int_t raw_minor; @@ -2000,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg) } return ret; } +#endif /* CONFIG_BLOCK */ struct serial_struct32 { compat_int_t type; @@ -2606,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc) HANDLE_IOCTL(SIOCRTMSG, ret_einval) HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) #endif +#ifdef CONFIG_BLOCK HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) HANDLE_IOCTL(BLKRAGET, w_long) HANDLE_IOCTL(BLKGETSIZE, w_long) @@ -2631,14 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans) HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans) HANDLE_IOCTL(SG_IO,sg_ioctl_trans) HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans) +#endif HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans) HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans) HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans) HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans) +#ifdef CONFIG_BLOCK HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans) HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans) HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans) HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans) +#endif #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout) #ifdef CONFIG_VT @@ -2677,12 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) /* block stuff */ +#ifdef CONFIG_BLOCK HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget) HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset) HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64) /* Raw devices */ HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) +#endif /* Serial */ HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl) HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl) diff --git a/fs/internal.h b/fs/internal.h index f662b703bb97..f07147d63255 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -14,10 +14,16 @@ /* * block_dev.c */ +#ifdef CONFIG_BLOCK extern struct super_block *blockdev_superblock; extern void __init bdev_cache_init(void); #define sb_is_blkdev_sb(sb) ((sb) == blockdev_superblock) +#else +static inline void bdev_cache_init(void) {} + +#define sb_is_blkdev_sb(sb) 0 +#endif /* * char_dev.c diff --git a/fs/no-block.c b/fs/no-block.c new file mode 100644 index 000000000000..d269a93d3467 --- /dev/null +++ b/fs/no-block.c @@ -0,0 +1,22 @@ +/* no-block.c: implementation of routines required for non-BLOCK configuration + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +static int no_blkdev_open(struct inode * inode, struct file * filp) +{ + return -ENODEV; +} + +const struct file_operations def_blk_fops = { + .open = no_blkdev_open, +}; diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile index d713ce6b3e12..67e665fdb7fc 100644 --- a/fs/partitions/Makefile +++ b/fs/partitions/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y := check.o +obj-$(CONFIG_BLOCK) := check.o obj-$(CONFIG_ACORN_PARTITION) += acorn.o obj-$(CONFIG_AMIGA_PARTITION) += amiga.o diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5bbd60896050..66bc425f2f3d 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v) if (i == 0) seq_printf(f, "Character devices:\n"); chrdev_show(f, i); - } else { + } +#ifdef CONFIG_BLOCK + else { i -= CHRDEV_MAJOR_HASH_SIZE; if (i == 0) seq_printf(f, "\nBlock devices:\n"); blkdev_show(f, i); } +#endif return 0; } @@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off, } #endif +#ifdef CONFIG_BLOCK extern struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { @@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif #ifdef CONFIG_MODULES extern struct seq_operations modules_op; @@ -695,7 +700,9 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); +#ifdef CONFIG_BLOCK create_seq_entry("partitions", 0, &proc_partitions_operations); +#endif create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); #ifdef CONFIG_SLAB @@ -707,7 +714,9 @@ void __init proc_misc_init(void) create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); +#ifdef CONFIG_BLOCK create_seq_entry("diskstats", 0, &proc_diskstats_operations); +#endif #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); #endif diff --git a/fs/quota.c b/fs/quota.c index d6a2be826e29..b9dae76a0b6e 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -337,6 +337,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void return 0; } +/* + * look up a superblock on which quota ops will be performed + * - use the name of a block device to find the superblock thereon + */ +static inline struct super_block *quotactl_block(const char __user *special) +{ +#ifdef CONFIG_BLOCK + struct block_device *bdev; + struct super_block *sb; + char *tmp = getname(special); + + if (IS_ERR(tmp)) + return ERR_PTR(PTR_ERR(tmp)); + bdev = lookup_bdev(tmp); + putname(tmp); + if (IS_ERR(bdev)) + return ERR_PTR(PTR_ERR(bdev)); + sb = get_super(bdev); + bdput(bdev); + if (!sb) + return ERR_PTR(-ENODEV); + + return sb; +#else + return ERR_PTR(-ENODEV); +#endif +} + /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota @@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t { uint cmds, type; struct super_block *sb = NULL; - struct block_device *bdev; - char *tmp; int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if (cmds != Q_SYNC || special) { - tmp = getname(special); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - bdev = lookup_bdev(tmp); - putname(tmp); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - sb = get_super(bdev); - bdput(bdev); - if (!sb) - return -ENODEV; + sb = quotactl_block(special); + if (IS_ERR(sb)) + return PTR_ERR(sb); } ret = check_quotactl_valid(sb, type, cmds, id); diff --git a/fs/super.c b/fs/super.c index 15671cd048b1..aec99ddbe53f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -571,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; +#ifdef CONFIG_BLOCK if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; +#endif if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); @@ -692,6 +694,7 @@ void kill_litter_super(struct super_block *sb) EXPORT_SYMBOL(kill_litter_super); +#ifdef CONFIG_BLOCK static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; @@ -787,6 +790,7 @@ void kill_block_super(struct super_block *sb) } EXPORT_SYMBOL(kill_block_super); +#endif int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 26b364c9d62c..35115bca036e 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,5 +1,6 @@ config XFS_FS tristate "XFS filesystem support" + depends on BLOCK help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c01a90998a7..3e36107d342a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -16,6 +16,22 @@ #include +#ifdef CONFIG_LBD +# include +# define sector_div(a, b) do_div(a, b) +#else +# define sector_div(n, b)( \ +{ \ + int _res; \ + _res = (n) % (b); \ + (n) /= (b); \ + _res; \ +} \ +) +#endif + +#ifdef CONFIG_BLOCK + struct scsi_ioctl_command; struct request_queue; @@ -818,24 +834,30 @@ struct work_struct; int kblockd_schedule_work(struct work_struct *work); void kblockd_flush(void); -#ifdef CONFIG_LBD -# include -# define sector_div(a, b) do_div(a, b) -#else -# define sector_div(n, b)( \ -{ \ - int _res; \ - _res = (n) % (b); \ - (n) /= (b); \ - _res; \ -} \ -) -#endif - #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") +#else /* CONFIG_BLOCK */ +/* + * stubs for when the block layer is configured out + */ +#define buffer_heads_over_limit 0 + +static inline long blk_congestion_wait(int rw, long timeout) +{ + return timeout; +} + +static inline long nr_blockdev_pages(void) +{ + return 0; +} + +static inline void exit_io_context(void) {} + +#endif /* CONFIG_BLOCK */ + #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 64b508e35d2a..131ffd37e716 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -14,6 +14,8 @@ #include #include +#ifdef CONFIG_BLOCK + enum bh_state_bits { BH_Uptodate, /* Contains valid data */ BH_Dirty, /* Is dirty */ @@ -301,4 +303,18 @@ static inline void lock_buffer(struct buffer_head *bh) } extern int __set_page_dirty_buffers(struct page *page); + +#else /* CONFIG_BLOCK */ + +static inline void buffer_init(void) {} +static inline int try_to_free_buffers(struct page *page) { return 1; } +static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline int inode_has_buffers(struct inode *inode) { return 0; } +static inline void invalidate_inode_buffers(struct inode *inode) {} +static inline int remove_inode_buffers(struct inode *inode) { return 1; } +static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {} + + +#endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 98d40e08ba6e..d61ef5951538 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -90,6 +90,7 @@ COMPATIBLE_IOCTL(FDTWADDLE) COMPATIBLE_IOCTL(FDFMTTRK) COMPATIBLE_IOCTL(FDRAWCMD) /* 0x12 */ +#ifdef CONFIG_BLOCK COMPATIBLE_IOCTL(BLKRASET) COMPATIBLE_IOCTL(BLKROSET) COMPATIBLE_IOCTL(BLKROGET) @@ -103,6 +104,7 @@ COMPATIBLE_IOCTL(BLKTRACESETUP) COMPATIBLE_IOCTL(BLKTRACETEARDOWN) ULONG_IOCTL(BLKRASET) ULONG_IOCTL(BLKFRASET) +#endif /* RAID */ COMPATIBLE_IOCTL(RAID_VERSION) COMPATIBLE_IOCTL(GET_ARRAY_INFO) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 9c5a04f6114c..b3370ef5164d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -3,6 +3,8 @@ #include +#ifdef CONFIG_BLOCK + typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct bio *); @@ -203,4 +205,5 @@ enum { __val; \ }) +#endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index b73a47582dbe..5baf3a153403 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1482,6 +1482,7 @@ extern void __init vfs_caches_init(unsigned long); extern void putname(const char *name); #endif +#ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern int unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); @@ -1490,11 +1491,15 @@ extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); extern struct block_device *open_partition_by_devnum(dev_t, unsigned); -extern const struct file_operations def_blk_fops; extern const struct address_space_operations def_blk_aops; +#else +static inline void bd_forget(struct inode *inode) {} +#endif +extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; extern const struct file_operations def_fifo_fops; +#ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); @@ -1510,6 +1515,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *); #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) #define bd_release_from_disk(bdev, disk) bd_release(bdev) #endif +#endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 @@ -1523,14 +1529,19 @@ extern int chrdev_open(struct inode *, struct file *); extern void chrdev_show(struct seq_file *,off_t); /* fs/block_dev.c */ -#define BLKDEV_MAJOR_HASH_SIZE 255 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ + +#ifdef CONFIG_BLOCK +#define BLKDEV_MAJOR_HASH_SIZE 255 extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern struct block_device *open_bdev_excl(const char *, int, void *); extern void close_bdev_excl(struct block_device *); extern void blkdev_show(struct seq_file *,off_t); +#else +#define BLKDEV_MAJOR_HASH_SIZE 0 +#endif extern void init_special_inode(struct inode *, umode_t, dev_t); @@ -1544,6 +1555,7 @@ extern const struct file_operations rdwr_fifo_fops; extern int fs_may_remount_ro(struct super_block *); +#ifdef CONFIG_BLOCK /* * return READ, READA, or WRITE */ @@ -1555,9 +1567,10 @@ extern int fs_may_remount_ro(struct super_block *); #define bio_data_dir(bio) ((bio)->bi_rw & 1) extern int check_disk_change(struct block_device *); -extern int invalidate_inodes(struct super_block *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); +#endif +extern int invalidate_inodes(struct super_block *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); unsigned long invalidate_inode_pages(struct address_space *mapping); @@ -1590,7 +1603,9 @@ extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, void *data, int force); +#ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); +#endif extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int, struct nameidata *); extern int generic_permission(struct inode *, int, @@ -1673,9 +1688,11 @@ static inline void insert_inode_hash(struct inode *inode) { extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern void file_kill(struct file *f); +#ifdef CONFIG_BLOCK struct bio; extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); +#endif extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@ -1756,6 +1773,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos, actor); } +#ifdef CONFIG_BLOCK ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, @@ -1793,6 +1811,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, get_block, end_io, DIO_OWN_LOCKING); } +#endif extern const struct file_operations generic_ro_fops; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e4af57e87c17..41f276fdd185 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,6 +11,8 @@ #include +#ifdef CONFIG_BLOCK + enum { /* These three have identical behaviour; use the second one if DOS FDISK gets confused about extended/logical partitions starting past cylinder 1023. */ @@ -420,3 +422,5 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index) #endif #endif + +#endif diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 517c098fde20..cc5fb75af78a 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -9,6 +9,7 @@ * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do * nested includes. Get it right in the .c file). */ +#ifdef CONFIG_BLOCK struct writeback_control; typedef int (writepage_t)(struct page *page, struct writeback_control *wbc); @@ -20,3 +21,5 @@ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); + +#endif diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index eb3e547c8fee..c588709acbbc 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -53,6 +53,8 @@ #include #include +#ifdef CONFIG_MD + /* * Different major versions are not compatible. * Different minor versions are only downward compatible. @@ -95,5 +97,6 @@ extern void md_new_event(mddev_t *mddev); extern void md_update_sb(mddev_t * mddev); +#endif /* CONFIG_MD */ #endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index d28890295852..920b94fe31fa 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -18,6 +18,8 @@ /* and dm-bio-list.h is not under include/linux because.... ??? */ #include "../../../drivers/md/dm-bio-list.h" +#ifdef CONFIG_BLOCK + #define LEVEL_MULTIPATH (-4) #define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) @@ -362,5 +364,6 @@ static inline void safe_put_page(struct page *p) if (p) put_page(p); } +#endif /* CONFIG_BLOCK */ #endif diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index bbf66219b769..c247a28259bc 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -6,7 +6,6 @@ #include #include - #define MSG_SIMPLE_TAG 0x20 #define MSG_HEAD_TAG 0x21 #define MSG_ORDERED_TAG 0x22 @@ -14,6 +13,7 @@ #define SCSI_NO_TAG (-1) /* identify no tag in use */ +#ifdef CONFIG_BLOCK /** * scsi_get_tag_type - get the type of tag the device supports @@ -144,4 +144,5 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) return shost->bqt ? 0 : -ENOMEM; } +#endif /* CONFIG_BLOCK */ #endif /* _SCSI_SCSI_TCQ_H */ diff --git a/init/Kconfig b/init/Kconfig index 4381006dd666..d2eb7a84a264 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -92,7 +92,7 @@ config LOCALVERSION_AUTO config SWAP bool "Support for paging of anonymous memory (swap)" - depends on MMU + depends on MMU && BLOCK default y help This option allows you to choose whether you want to have support diff --git a/init/do_mounts.c b/init/do_mounts.c index b290aadb1d3f..dc1ec0803ef9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -285,7 +285,11 @@ void __init mount_block_root(char *name, int flags) { char *fs_names = __getname(); char *p; +#ifdef CONFIG_BLOCK char b[BDEVNAME_SIZE]; +#else + const char *b = name; +#endif get_fs_names(fs_names); retry: @@ -304,7 +308,9 @@ retry: * Allow the user to distinguish between failed sys_open * and bad superblock on root device. */ +#ifdef CONFIG_BLOCK __bdevname(ROOT_DEV, b); +#endif printk("VFS: Cannot open root device \"%s\" or %s\n", root_device_name, b); printk("Please append a correct \"root=\" boot option\n"); @@ -316,7 +322,10 @@ retry: for (p = fs_names; *p; p += strlen(p)+1) printk(" %s", p); printk("\n"); - panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b)); +#ifdef CONFIG_BLOCK + __bdevname(ROOT_DEV, b); +#endif + panic("VFS: Unable to mount root fs on %s", b); out: putname(fs_names); } @@ -387,8 +396,10 @@ void __init mount_root(void) change_floppy("root floppy"); } #endif +#ifdef CONFIG_BLOCK create_dev("/dev/root", ROOT_DEV); mount_block_root("/dev/root", root_mountflags); +#endif } /* diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 6991bece67e8..7a3b2e75f040 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -134,3 +134,8 @@ cond_syscall(sys_madvise); cond_syscall(sys_mremap); cond_syscall(sys_remap_file_pages); cond_syscall(compat_sys_move_pages); + +/* block-layer dependent */ +cond_syscall(sys_bdflush); +cond_syscall(sys_ioprio_set); +cond_syscall(sys_ioprio_get); diff --git a/mm/Makefile b/mm/Makefile index 4f2166a833b9..12b3a4eee88d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -12,7 +12,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o \ prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) -ifeq ($(CONFIG_MMU),y) +ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) obj-y += bounce.o endif obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o diff --git a/mm/filemap.c b/mm/filemap.c index d6846de08887..c4fe97f5ace0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) *count = inode->i_sb->s_maxbytes - *pos; } else { +#ifdef CONFIG_BLOCK loff_t isize; if (bdev_read_only(I_BDEV(inode))) return -EPERM; @@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (*pos + *count > isize) *count = isize - *pos; +#else + return -EPERM; +#endif } return 0; } diff --git a/mm/migrate.c b/mm/migrate.c index 7f50e3ff54cd..ba2453f9483d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping, } EXPORT_SYMBOL(migrate_page); +#ifdef CONFIG_BLOCK /* * Migration function for pages with buffers. This function can only be used * if the underlying filesystem guarantees that no other references to "page" @@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping, return 0; } EXPORT_SYMBOL(buffer_migrate_page); +#endif /* * Writeback a page to clean the dirty state diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ecf27839c203..c0d4ce144dec 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -807,9 +807,11 @@ int fastcall set_page_dirty(struct page *page) if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; - if (spd) - return (*spd)(page); - return __set_page_dirty_buffers(page); +#ifdef CONFIG_BLOCK + if (!spd) + spd = __set_page_dirty_buffers; +#endif + return (*spd)(page); } if (!PageDirty(page)) { if (!TestSetPageDirty(page)) diff --git a/mm/truncate.c b/mm/truncate.c index cd3e34b816db..8fde6580657e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -35,8 +35,10 @@ void do_invalidatepage(struct page *page, unsigned long offset) { void (*invalidatepage)(struct page *, unsigned long); invalidatepage = page->mapping->a_ops->invalidatepage; +#ifdef CONFIG_BLOCK if (!invalidatepage) invalidatepage = block_invalidatepage; +#endif if (invalidatepage) (*invalidatepage)(page, offset); } -- cgit v1.2.3-71-gd317 From bcfd8d36151e531e1c6c731f1fbf792509a1c494 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 31 Aug 2006 12:56:06 +0200 Subject: [PATCH] CONFIG_BLOCK: blk_congestion_wait() fix Don't just do nothing: it'll cause busywaits all over writeback and page reclaim. For now, take a fixed-length nap. Will improve when NFS starts waking up throttled processes. Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3e36107d342a..1d79b8d4ca6d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,6 +1,7 @@ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H +#include #include #include #include @@ -848,7 +849,7 @@ void kblockd_flush(void); static inline long blk_congestion_wait(int rw, long timeout) { - return timeout; + return io_schedule_timeout(timeout); } static inline long nr_blockdev_pages(void) @@ -856,7 +857,9 @@ static inline long nr_blockdev_pages(void) return 0; } -static inline void exit_io_context(void) {} +static inline void exit_io_context(void) +{ +} #endif /* CONFIG_BLOCK */ -- cgit v1.2.3-71-gd317 From 236561e5df009f79f1939e3ca269b9b6f18092f5 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 30 Sep 2006 23:27:03 -0700 Subject: [PATCH] PCI quirks update This fixes two things Firstly someone mistakenly used "errata" for the singular. This causes Dave Woodhouse to emit diagnostics whenever the string is read, and so should be fixed. Secondly the AMD AGP tunnel has an erratum which causes hangs if you try and do direct PCI to AGP transfers in some cases. We have a flag for PCI/PCI failures but we need a different flag for this really as in this case we don't want to stop PCI/PCI transfers using things like IOAT and the new RAID offload work. I'll post some updates to make proper use of the PCIAGP flag in the media/video drivers to Mauro. Signed-off-by: Alan Cox Cc: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pci/quirks.c | 17 +++++++++++++++-- include/linux/pci.h | 5 +++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 08cd86a6dd66..23b599d6a9d5 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -93,8 +93,21 @@ static void __devinit quirk_nopcipci(struct pci_dev *dev) pci_pci_problems |= PCIPCI_FAIL; } } + +static void __devinit quirk_nopciamd(struct pci_dev *dev) +{ + u8 rev; + pci_read_config_byte(dev, 0x08, &rev); + if (rev == 0x13) { + /* Erratum 24 */ + printk(KERN_INFO "Chipset erratum: Disabling direct PCI/AGP transfers.\n"); + pci_pci_problems |= PCIAGP_FAIL; + } +} + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, quirk_nopcipci ); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496, quirk_nopcipci ); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8151_0, quirk_nopciamd ); /* * Triton requires workarounds to be used by the drivers @@ -555,7 +568,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt * is currently marked NoFix * * We have multiple reports of hangs with this chipset that went away with - * noapic specified. For the moment we assume its the errata. We may be wrong + * noapic specified. For the moment we assume it's the erratum. We may be wrong * of course. However the advice is demonstrably good even if so.. */ static void __devinit quirk_amd_ioapic(struct pci_dev *dev) @@ -564,7 +577,7 @@ static void __devinit quirk_amd_ioapic(struct pci_dev *dev) pci_read_config_byte(dev, PCI_REVISION_ID, &rev); if (rev >= 0x02) { - printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n"); + printk(KERN_WARNING "I/O APIC: AMD Erratum #22 may be present. In the event of instability try\n"); printk(KERN_WARNING " : booting with the \"noapic\" option.\n"); } } diff --git a/include/linux/pci.h b/include/linux/pci.h index 5c3a4176eb64..4431ce4e1e6f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -787,12 +787,13 @@ enum pci_fixup_pass { void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); extern int pci_pci_problems; -#define PCIPCI_FAIL 1 +#define PCIPCI_FAIL 1 /* No PCI PCI DMA */ #define PCIPCI_TRITON 2 #define PCIPCI_NATOMA 4 #define PCIPCI_VIAETBF 8 #define PCIPCI_VSFX 16 -#define PCIPCI_ALIMAGIK 32 +#define PCIPCI_ALIMAGIK 32 /* Need low latency setting */ +#define PCIAGP_FAIL 64 /* No PCI to AGP DMA */ #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3-71-gd317 From f28c5edc06ecd8068b38b7662ad19f4d20d741af Mon Sep 17 00:00:00 2001 From: Keith Mannthey Date: Sat, 30 Sep 2006 23:27:04 -0700 Subject: [PATCH] hot-add-mem x86_64: fixup externs Fix up externs in memory_hotplug.c. Cleanup. Signed-off-by: Keith Mannthey Cc: KAMEZAWA Hiroyuki Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 ++ include/linux/mm.h | 2 ++ mm/memory_hotplug.c | 4 ---- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 218501cfaeb9..7b54666cea8e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -172,5 +172,7 @@ static inline int __remove_pages(struct zone *zone, unsigned long start_pfn, extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int remove_memory(u64 start, u64 size); +extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, + int nr_pages); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 4edf1934e5ca..b7966ab8cb6a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -946,6 +946,8 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern void zonetable_add(struct zone *zone, int nid, enum zone_type zid, + unsigned long pfn, unsigned long size); #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2053bb165a21..63b14d4f68fb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -26,8 +26,6 @@ #include -extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, - unsigned long size); static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) { struct pglist_data *pgdat = zone->zone_pgdat; @@ -47,8 +45,6 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) return 0; } -extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, - int nr_pages); static int __add_section(struct zone *zone, unsigned long phys_start_pfn) { int nr_pages = PAGES_PER_SECTION; -- cgit v1.2.3-71-gd317 From 53947027ad90542ddb2bb746e3175827c270610a Mon Sep 17 00:00:00 2001 From: Keith Mannthey Date: Sat, 30 Sep 2006 23:27:08 -0700 Subject: [PATCH] hot-add-mem x86_64: use CONFIG_MEMORY_HOTPLUG_SPARSE Migate CONFIG_MEMORY_HOTPLUG to CONFIG_MEMORY_HOTPLUG_SPARSE where needed. Signed-off-by: Keith Mannthey Cc: KAMEZAWA Hiroyuki Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/Makefile | 2 +- include/linux/memory.h | 4 ++-- mm/memory_hotplug.c | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b539e5e75b56..7bbb9eeda235 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -8,7 +8,7 @@ obj-y += power/ obj-$(CONFIG_ISA) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o -obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o +obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o obj-$(CONFIG_SMP) += topology.o obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o diff --git a/include/linux/memory.h b/include/linux/memory.h index 8f04143ca363..654ef5544878 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -57,7 +57,7 @@ struct memory_block { struct notifier_block; struct mem_section; -#ifndef CONFIG_MEMORY_HOTPLUG +#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE static inline int memory_dev_init(void) { return 0; @@ -78,7 +78,7 @@ extern int remove_memory_block(unsigned long, struct mem_section *, int); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) { struct pglist_data *pgdat = zone->zone_pgdat; @@ -192,6 +193,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) writeback_set_ratelimit(); return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ static pg_data_t *hotadd_new_pgdat(int nid, u64 start) { -- cgit v1.2.3-71-gd317 From 6e21828743247270d09a86756a0c11702500dbfb Mon Sep 17 00:00:00 2001 From: Richard Knutsson Date: Sat, 30 Sep 2006 23:27:11 -0700 Subject: [PATCH] Generic boolean This patch defines: * a generic boolean-type, named 'bool' * aliases to 0 and 1, named 'false' and 'true' Removing colliding definitions of 'bool', 'false' and 'true'. Signed-off-by: Richard Knutsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/DAC960.h | 2 +- drivers/media/video/cpia2/cpia2.h | 4 ---- drivers/net/dgrs.c | 1 - drivers/scsi/BusLogic.h | 5 +---- include/linux/stddef.h | 6 ++++++ include/linux/types.h | 2 ++ 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h index a82f37f749a5..f9217c34bc2b 100644 --- a/drivers/block/DAC960.h +++ b/drivers/block/DAC960.h @@ -71,7 +71,7 @@ Define a Boolean data type. */ -typedef enum { false, true } __attribute__ ((packed)) boolean; +typedef bool boolean; /* diff --git a/drivers/media/video/cpia2/cpia2.h b/drivers/media/video/cpia2/cpia2.h index c5ecb2be5f93..8d2dfc128821 100644 --- a/drivers/media/video/cpia2/cpia2.h +++ b/drivers/media/video/cpia2/cpia2.h @@ -50,10 +50,6 @@ /*** * Image defines ***/ -#ifndef true -#define true 1 -#define false 0 -#endif /* Misc constants */ #define ALLOW_CORRUPT 0 /* Causes collater to discard checksum */ diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c index e9361cb5f4cd..d0842527b369 100644 --- a/drivers/net/dgrs.c +++ b/drivers/net/dgrs.c @@ -110,7 +110,6 @@ static char version[] __initdata = * DGRS include files */ typedef unsigned char uchar; -typedef unsigned int bool; #define vol volatile #include "dgrs.h" diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h index 9792e5af5252..d6d1d5613c8a 100644 --- a/drivers/scsi/BusLogic.h +++ b/drivers/scsi/BusLogic.h @@ -237,10 +237,7 @@ enum BusLogic_BIOS_DiskGeometryTranslation { Define a Boolean data type. */ -typedef enum { - false, - true -} PACKED boolean; +typedef bool boolean; /* Define a 10^18 Statistics Byte Counter data type. diff --git a/include/linux/stddef.h b/include/linux/stddef.h index ea65dfb60cd8..6a40c76bdcf1 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -11,6 +11,12 @@ #endif #ifdef __KERNEL__ + +enum { + false = 0, + true = 1 +}; + #undef offsetof #ifdef __compiler_offsetof #define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) diff --git a/include/linux/types.h b/include/linux/types.h index 3f235660a3cd..406d4ae57631 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -33,6 +33,8 @@ typedef __kernel_clockid_t clockid_t; typedef __kernel_mqd_t mqd_t; #ifdef __KERNEL__ +typedef _Bool bool; + typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; -- cgit v1.2.3-71-gd317 From 5c87579e65ee4f419b2369407f82326d38b5d2d8 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 30 Sep 2006 23:27:17 -0700 Subject: [PATCH] maximum latency tracking infrastructure Add infrastructure to track "maximum allowable latency" for power saving policies. The reason for adding this infrastructure is that power management in the idle loop needs to make a tradeoff between latency and power savings (deeper power save modes have a longer latency to running code again). The code that today makes this tradeoff just does a rather simple algorithm; however this is not good enough: There are devices and use cases where a lower latency is required than that the higher power saving states provide. An example would be audio playback, but another example is the ipw2100 wireless driver that right now has a very direct and ugly acpi hook to disable some higher power states randomly when it gets certain types of error. The proposed solution is to have an interface where drivers can * announce the maximum latency (in microseconds) that they can deal with * modify this latency * give up their constraint and a function where the code that decides on power saving strategy can query the current global desired maximum. This patch has a user of each side: on the consumer side, ACPI is patched to use this, on the producer side the ipw2100 driver is patched. A generic maximum latency is also registered of 2 timer ticks (more and you lose accurate time tracking after all). While the existing users of the patch are x86 specific, the infrastructure is not. I'd like to ask the arch maintainers of other architectures if the infrastructure is generic enough for their use (assuming the architecture has such a tradeoff as concept at all), and the sound/multimedia driver owners to look at the driver facing API to see if this is something they can use. [akpm@osdl.org: cleanups] Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Acked-by: Jesse Barnes Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/processor_idle.c | 38 +++++- drivers/net/wireless/ipw2100.c | 10 ++ include/linux/latency.h | 25 ++++ kernel/Makefile | 2 +- kernel/latency.c | 279 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 349 insertions(+), 5 deletions(-) create mode 100644 include/linux/latency.h create mode 100644 kernel/latency.c (limited to 'include/linux') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 71066066d626..0a395fca843b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -38,6 +38,7 @@ #include #include #include /* need_resched() */ +#include #include #include @@ -453,7 +454,8 @@ static void acpi_processor_idle(void) */ if (cx->promotion.state && ((cx->promotion.state - pr->power.states) <= max_cstate)) { - if (sleep_ticks > cx->promotion.threshold.ticks) { + if (sleep_ticks > cx->promotion.threshold.ticks && + cx->promotion.state->latency <= system_latency_constraint()) { cx->promotion.count++; cx->demotion.count = 0; if (cx->promotion.count >= @@ -494,8 +496,10 @@ static void acpi_processor_idle(void) end: /* * Demote if current state exceeds max_cstate + * or if the latency of the current state is unacceptable */ - if ((pr->power.state - pr->power.states) > max_cstate) { + if ((pr->power.state - pr->power.states) > max_cstate || + pr->power.state->latency > system_latency_constraint()) { if (cx->demotion.state) next_state = cx->demotion.state; } @@ -1009,9 +1013,11 @@ static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "active state: C%zd\n" "max_cstate: C%d\n" - "bus master activity: %08x\n", + "bus master activity: %08x\n" + "maximum allowed latency: %d usec\n", pr->power.state ? pr->power.state - pr->power.states : 0, - max_cstate, (unsigned)pr->power.bm_activity); + max_cstate, (unsigned)pr->power.bm_activity, + system_latency_constraint()); seq_puts(seq, "states:\n"); @@ -1077,6 +1083,28 @@ static const struct file_operations acpi_processor_power_fops = { .release = single_release, }; +static void smp_callback(void *v) +{ + /* we already woke the CPU up, nothing more to do */ +} + +/* + * This function gets called when a part of the kernel has a new latency + * requirement. This means we need to get all processors out of their C-state, + * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that + * wakes them all right up. + */ +static int acpi_processor_latency_notify(struct notifier_block *b, + unsigned long l, void *v) +{ + smp_call_function(smp_callback, NULL, 0, 1); + return NOTIFY_OK; +} + +static struct notifier_block acpi_processor_latency_notifier = { + .notifier_call = acpi_processor_latency_notify, +}; + int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { @@ -1093,6 +1121,7 @@ int acpi_processor_power_init(struct acpi_processor *pr, "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; + register_latency_notifier(&acpi_processor_latency_notifier); } if (!pr) @@ -1164,6 +1193,7 @@ int acpi_processor_power_exit(struct acpi_processor *pr, * copies of pm_idle before proceeding. */ cpu_idle_wait(); + unregister_latency_notifier(&acpi_processor_latency_notifier); } return 0; diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 6c5add701a6f..97937809de09 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -163,6 +163,7 @@ that only one external action is invoked at a time. #include #include #include +#include #include "ipw2100.h" @@ -1697,6 +1698,11 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred) return 0; } + /* the ipw2100 hardware really doesn't want power management delays + * longer than 175usec + */ + modify_acceptable_latency("ipw2100", 175); + /* If the interrupt is enabled, turn it off... */ spin_lock_irqsave(&priv->low_lock, flags); ipw2100_disable_interrupts(priv); @@ -1849,6 +1855,8 @@ static void ipw2100_down(struct ipw2100_priv *priv) ipw2100_disable_interrupts(priv); spin_unlock_irqrestore(&priv->low_lock, flags); + modify_acceptable_latency("ipw2100", INFINITE_LATENCY); + #ifdef ACPI_CSTATE_LIMIT_DEFINED if (priv->config & CFG_C3_DISABLED) { IPW_DEBUG_INFO(": Resetting C3 transitions.\n"); @@ -6534,6 +6542,7 @@ static int __init ipw2100_init(void) ret = pci_register_driver(&ipw2100_pci_driver); + set_acceptable_latency("ipw2100", INFINITE_LATENCY); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; driver_create_file(&ipw2100_pci_driver.driver, @@ -6554,6 +6563,7 @@ static void __exit ipw2100_exit(void) &driver_attr_debug_level); #endif pci_unregister_driver(&ipw2100_pci_driver); + remove_acceptable_latency("ipw2100"); } module_init(ipw2100_init); diff --git a/include/linux/latency.h b/include/linux/latency.h new file mode 100644 index 000000000000..c08b52bb55b0 --- /dev/null +++ b/include/linux/latency.h @@ -0,0 +1,25 @@ +/* + * latency.h: Explicit system-wide latency-expectation infrastructure + * + * (C) Copyright 2006 Intel Corporation + * Author: Arjan van de Ven + * + */ + +#ifndef _INCLUDE_GUARD_LATENCY_H_ +#define _INCLUDE_GUARD_LATENCY_H_ + +#include + +void set_acceptable_latency(char *identifier, int usecs); +void modify_acceptable_latency(char *identifier, int usecs); +void remove_acceptable_latency(char *identifier); +void synchronize_acceptable_latency(void); +int system_latency_constraint(void); + +int register_latency_notifier(struct notifier_block * nb); +int unregister_latency_notifier(struct notifier_block * nb); + +#define INFINITE_LATENCY 1000000 + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index d62ec66c1af2..e210e8cf7237 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o + hrtimer.o rwsem.o latency.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff --git a/kernel/latency.c b/kernel/latency.c new file mode 100644 index 000000000000..258f2555abbc --- /dev/null +++ b/kernel/latency.c @@ -0,0 +1,279 @@ +/* + * latency.c: Explicit system-wide latency-expectation infrastructure + * + * The purpose of this infrastructure is to allow device drivers to set + * latency constraint they have and to collect and summarize these + * expectations globally. The cummulated result can then be used by + * power management and similar users to make decisions that have + * tradoffs with a latency component. + * + * An example user of this are the x86 C-states; each higher C state saves + * more power, but has a higher exit latency. For the idle loop power + * code to make a good decision which C-state to use, information about + * acceptable latencies is required. + * + * An example announcer of latency is an audio driver that knowns it + * will get an interrupt when the hardware has 200 usec of samples + * left in the DMA buffer; in that case the driver can set a latency + * constraint of, say, 150 usec. + * + * Multiple drivers can each announce their maximum accepted latency, + * to keep these appart, a string based identifier is used. + * + * + * (C) Copyright 2006 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct latency_info { + struct list_head list; + int usecs; + char *identifier; +}; + +/* + * locking rule: all modifications to current_max_latency and + * latency_list need to be done while holding the latency_lock. + * latency_lock needs to be taken _irqsave. + */ +static atomic_t current_max_latency; +static DEFINE_SPINLOCK(latency_lock); + +static LIST_HEAD(latency_list); +static BLOCKING_NOTIFIER_HEAD(latency_notifier); + +/* + * This function returns the maximum latency allowed, which + * happens to be the minimum of all maximum latencies on the + * list. + */ +static int __find_max_latency(void) +{ + int min = INFINITE_LATENCY; + struct latency_info *info; + + list_for_each_entry(info, &latency_list, list) { + if (info->usecs < min) + min = info->usecs; + } + return min; +} + +/** + * set_acceptable_latency - sets the maximum latency acceptable + * @identifier: string that identifies this driver + * @usecs: maximum acceptable latency for this driver + * + * This function informs the kernel that this device(driver) + * can accept at most usecs latency. This setting is used for + * power management and similar tradeoffs. + * + * This function sleeps and can only be called from process + * context. + * Calling this function with an existing identifier is valid + * and will cause the existing latency setting to be changed. + */ +void set_acceptable_latency(char *identifier, int usecs) +{ + struct latency_info *info, *iter; + unsigned long flags; + int found_old = 0; + + info = kzalloc(sizeof(struct latency_info), GFP_KERNEL); + if (!info) + return; + info->usecs = usecs; + info->identifier = kstrdup(identifier, GFP_KERNEL); + if (!info->identifier) + goto free_info; + + spin_lock_irqsave(&latency_lock, flags); + list_for_each_entry(iter, &latency_list, list) { + if (strcmp(iter->identifier, identifier)==0) { + found_old = 1; + iter->usecs = usecs; + break; + } + } + if (!found_old) + list_add(&info->list, &latency_list); + + if (usecs < atomic_read(¤t_max_latency)) + atomic_set(¤t_max_latency, usecs); + + spin_unlock_irqrestore(&latency_lock, flags); + + blocking_notifier_call_chain(&latency_notifier, + atomic_read(¤t_max_latency), NULL); + + /* + * if we inserted the new one, we're done; otherwise there was + * an existing one so we need to free the redundant data + */ + if (!found_old) + return; + + kfree(info->identifier); +free_info: + kfree(info); +} +EXPORT_SYMBOL_GPL(set_acceptable_latency); + +/** + * modify_acceptable_latency - changes the maximum latency acceptable + * @identifier: string that identifies this driver + * @usecs: maximum acceptable latency for this driver + * + * This function informs the kernel that this device(driver) + * can accept at most usecs latency. This setting is used for + * power management and similar tradeoffs. + * + * This function does not sleep and can be called in any context. + * Trying to use a non-existing identifier silently gets ignored. + * + * Due to the atomic nature of this function, the modified latency + * value will only be used for future decisions; past decisions + * can still lead to longer latencies in the near future. + */ +void modify_acceptable_latency(char *identifier, int usecs) +{ + struct latency_info *iter; + unsigned long flags; + + spin_lock_irqsave(&latency_lock, flags); + list_for_each_entry(iter, &latency_list, list) { + if (strcmp(iter->identifier, identifier) == 0) { + iter->usecs = usecs; + break; + } + } + if (usecs < atomic_read(¤t_max_latency)) + atomic_set(¤t_max_latency, usecs); + spin_unlock_irqrestore(&latency_lock, flags); +} +EXPORT_SYMBOL_GPL(modify_acceptable_latency); + +/** + * remove_acceptable_latency - removes the maximum latency acceptable + * @identifier: string that identifies this driver + * + * This function removes a previously set maximum latency setting + * for the driver and frees up any resources associated with the + * bookkeeping needed for this. + * + * This function does not sleep and can be called in any context. + * Trying to use a non-existing identifier silently gets ignored. + */ +void remove_acceptable_latency(char *identifier) +{ + unsigned long flags; + int newmax = 0; + struct latency_info *iter, *temp; + + spin_lock_irqsave(&latency_lock, flags); + + list_for_each_entry_safe(iter, temp, &latency_list, list) { + if (strcmp(iter->identifier, identifier) == 0) { + list_del(&iter->list); + newmax = iter->usecs; + kfree(iter->identifier); + kfree(iter); + break; + } + } + + /* If we just deleted the system wide value, we need to + * recalculate with a full search + */ + if (newmax == atomic_read(¤t_max_latency)) { + newmax = __find_max_latency(); + atomic_set(¤t_max_latency, newmax); + } + spin_unlock_irqrestore(&latency_lock, flags); +} +EXPORT_SYMBOL_GPL(remove_acceptable_latency); + +/** + * system_latency_constraint - queries the system wide latency maximum + * + * This function returns the system wide maximum latency in + * microseconds. + * + * This function does not sleep and can be called in any context. + */ +int system_latency_constraint(void) +{ + return atomic_read(¤t_max_latency); +} +EXPORT_SYMBOL_GPL(system_latency_constraint); + +/** + * synchronize_acceptable_latency - recalculates all latency decisions + * + * This function will cause a callback to various kernel pieces that + * will make those pieces rethink their latency decisions. This implies + * that if there are overlong latencies in hardware state already, those + * latencies get taken right now. When this call completes no overlong + * latency decisions should be active anymore. + * + * Typical usecase of this is after a modify_acceptable_latency() call, + * which in itself is non-blocking and non-synchronizing. + * + * This function blocks and should not be called with locks held. + */ + +void synchronize_acceptable_latency(void) +{ + blocking_notifier_call_chain(&latency_notifier, + atomic_read(¤t_max_latency), NULL); +} +EXPORT_SYMBOL_GPL(synchronize_acceptable_latency); + +/* + * Latency notifier: this notifier gets called when a non-atomic new + * latency value gets set. The expectation nof the caller of the + * non-atomic set is that when the call returns, future latencies + * are within bounds, so the functions on the notifier list are + * expected to take the overlong latencies immediately, inside the + * callback, and not make a overlong latency decision anymore. + * + * The callback gets called when the new latency value is made + * active so system_latency_constraint() returns the new latency. + */ +int register_latency_notifier(struct notifier_block * nb) +{ + return blocking_notifier_chain_register(&latency_notifier, nb); +} +EXPORT_SYMBOL_GPL(register_latency_notifier); + +int unregister_latency_notifier(struct notifier_block * nb) +{ + return blocking_notifier_chain_unregister(&latency_notifier, nb); +} +EXPORT_SYMBOL_GPL(unregister_latency_notifier); + +static __init int latency_init(void) +{ + atomic_set(¤t_max_latency, INFINITE_LATENCY); + /* + * we don't want by default to have longer latencies than 2 ticks, + * since that would cause lost ticks + */ + set_acceptable_latency("kernel", 2*1000000/HZ); + return 0; +} + +module_init(latency_init); -- cgit v1.2.3-71-gd317 From 1a2f67b459bb7846d4a15924face63eb2683acc2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Sep 2006 23:27:20 -0700 Subject: [PATCH] kmemdup: introduce One of idiomatic ways to duplicate a region of memory is dst = kmalloc(len, GFP_KERNEL); if (!dst) return -ENOMEM; memcpy(dst, src, len); which is neat code except a programmer needs to write size twice. Which sometimes leads to mistakes. If len passed to kmalloc is smaller that len passed to memcpy, it's straight overwrite-beyond-end. If len passed to memcpy is smaller than len passed to kmalloc, it's either a) legit behaviour ;-), or b) cloned buffer will contain garbage in second half. Slight trolling of commit lists shows several duplications bugs done exactly because of diverged lenghts: Linux: [CRYPTO]: Fix memcpy/memset args. [PATCH] memcpy/memset fixes OpenBSD: kerberosV/src/lib/asn1: der_copy.c:1.4 If programmer is given only one place to play with lengths, I believe, such mistakes could be avoided. With kmemdup, the snippet above will be rewritten as: dst = kmemdup(src, len, GFP_KERNEL); if (!dst) return -ENOMEM; This also leads to smaller code (kzalloc effect). Quick grep shows 200+ places where kmemdup() can be used. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 1 + mm/util.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index e4c755860316..4f69ef9e6eb5 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -99,6 +99,7 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif extern char *kstrdup(const char *s, gfp_t gfp); +extern void *kmemdup(const void *src, size_t len, gfp_t gfp); #ifdef __cplusplus } diff --git a/mm/util.c b/mm/util.c index 7368479220b3..e14fa84ef39a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -40,6 +40,24 @@ char *kstrdup(const char *s, gfp_t gfp) } EXPORT_SYMBOL(kstrdup); +/** + * kmemdup - duplicate region of memory + * + * @src: memory region to duplicate + * @len: memory region length + * @gfp: GFP mask to use + */ +void *kmemdup(const void *src, size_t len, gfp_t gfp) +{ + void *p; + + p = ____kmalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} +EXPORT_SYMBOL(kmemdup); + /* * strndup_user - duplicate an existing string from user space * -- cgit v1.2.3-71-gd317 From 82b0547cfae1fb2ee26cad588f6d49a347d24740 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Sep 2006 23:27:22 -0700 Subject: [PATCH] Create fs/utimes.c * fs/open.c is getting bit crowdy * preparation to lutimes(2) Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Makefile | 2 +- fs/open.c | 134 ------------------------------------------------ fs/utimes.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/namei.h | 1 + include/linux/utime.h | 2 + 5 files changed, 141 insertions(+), 135 deletions(-) create mode 100644 fs/utimes.c (limited to 'include/linux') diff --git a/fs/Makefile b/fs/Makefile index a503e6ce0f32..819b2a93bebe 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o super.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ - pnode.o drop_caches.o splice.o sync.o + pnode.o drop_caches.o splice.o sync.o utimes.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/open.c b/fs/open.c index 304c1c7814cb..35c3e454458e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -29,8 +28,6 @@ #include #include -#include - int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { int retval = -ENODEV; @@ -353,137 +350,6 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) } #endif -#ifdef __ARCH_WANT_SYS_UTIME - -/* - * sys_utime() can be implemented in user-level using sys_utimes(). - * Is this for backwards compatibility? If so, why not move it - * into the appropriate arch directory (for those architectures that - * need it). - */ - -/* If times==NULL, set access and modification to current time, - * must be owner or have write permission. - * Else, update from *times, must be owner or super user. - */ -asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) -{ - int error; - struct nameidata nd; - struct inode * inode; - struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; - inode = nd.dentry->d_inode; - - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; - - /* Don't worry, the checks are done in inode_change_ok() */ - newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; - if (times) { - error = -EPERM; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - goto dput_and_out; - - error = get_user(newattrs.ia_atime.tv_sec, ×->actime); - newattrs.ia_atime.tv_nsec = 0; - if (!error) - error = get_user(newattrs.ia_mtime.tv_sec, ×->modtime); - newattrs.ia_mtime.tv_nsec = 0; - if (error) - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; - } else { - error = -EACCES; - if (IS_IMMUTABLE(inode)) - goto dput_and_out; - - if (current->fsuid != inode->i_uid && - (error = vfs_permission(&nd, MAY_WRITE)) != 0) - goto dput_and_out; - } - mutex_lock(&inode->i_mutex); - error = notify_change(nd.dentry, &newattrs); - mutex_unlock(&inode->i_mutex); -dput_and_out: - path_release(&nd); -out: - return error; -} - -#endif - -/* If times==NULL, set access and modification to current time, - * must be owner or have write permission. - * Else, update from *times, must be owner or super user. - */ -long do_utimes(int dfd, char __user *filename, struct timeval *times) -{ - int error; - struct nameidata nd; - struct inode * inode; - struct iattr newattrs; - - error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); - - if (error) - goto out; - inode = nd.dentry->d_inode; - - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; - - /* Don't worry, the checks are done in inode_change_ok() */ - newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; - if (times) { - error = -EPERM; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - goto dput_and_out; - - newattrs.ia_atime.tv_sec = times[0].tv_sec; - newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000; - newattrs.ia_mtime.tv_sec = times[1].tv_sec; - newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; - } else { - error = -EACCES; - if (IS_IMMUTABLE(inode)) - goto dput_and_out; - - if (current->fsuid != inode->i_uid && - (error = vfs_permission(&nd, MAY_WRITE)) != 0) - goto dput_and_out; - } - mutex_lock(&inode->i_mutex); - error = notify_change(nd.dentry, &newattrs); - mutex_unlock(&inode->i_mutex); -dput_and_out: - path_release(&nd); -out: - return error; -} - -asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) -{ - struct timeval times[2]; - - if (utimes && copy_from_user(×, utimes, sizeof(times))) - return -EFAULT; - return do_utimes(dfd, filename, utimes ? times : NULL); -} - -asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) -{ - return sys_futimesat(AT_FDCWD, filename, utimes); -} - - /* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and diff --git a/fs/utimes.c b/fs/utimes.c new file mode 100644 index 000000000000..1bcd852fc4a9 --- /dev/null +++ b/fs/utimes.c @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __ARCH_WANT_SYS_UTIME + +/* + * sys_utime() can be implemented in user-level using sys_utimes(). + * Is this for backwards compatibility? If so, why not move it + * into the appropriate arch directory (for those architectures that + * need it). + */ + +/* If times==NULL, set access and modification to current time, + * must be owner or have write permission. + * Else, update from *times, must be owner or super user. + */ +asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) +{ + int error; + struct nameidata nd; + struct inode * inode; + struct iattr newattrs; + + error = user_path_walk(filename, &nd); + if (error) + goto out; + inode = nd.dentry->d_inode; + + error = -EROFS; + if (IS_RDONLY(inode)) + goto dput_and_out; + + /* Don't worry, the checks are done in inode_change_ok() */ + newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; + if (times) { + error = -EPERM; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + goto dput_and_out; + + error = get_user(newattrs.ia_atime.tv_sec, ×->actime); + newattrs.ia_atime.tv_nsec = 0; + if (!error) + error = get_user(newattrs.ia_mtime.tv_sec, ×->modtime); + newattrs.ia_mtime.tv_nsec = 0; + if (error) + goto dput_and_out; + + newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; + } else { + error = -EACCES; + if (IS_IMMUTABLE(inode)) + goto dput_and_out; + + if (current->fsuid != inode->i_uid && + (error = vfs_permission(&nd, MAY_WRITE)) != 0) + goto dput_and_out; + } + mutex_lock(&inode->i_mutex); + error = notify_change(nd.dentry, &newattrs); + mutex_unlock(&inode->i_mutex); +dput_and_out: + path_release(&nd); +out: + return error; +} + +#endif + +/* If times==NULL, set access and modification to current time, + * must be owner or have write permission. + * Else, update from *times, must be owner or super user. + */ +long do_utimes(int dfd, char __user *filename, struct timeval *times) +{ + int error; + struct nameidata nd; + struct inode * inode; + struct iattr newattrs; + + error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); + + if (error) + goto out; + inode = nd.dentry->d_inode; + + error = -EROFS; + if (IS_RDONLY(inode)) + goto dput_and_out; + + /* Don't worry, the checks are done in inode_change_ok() */ + newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; + if (times) { + error = -EPERM; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + goto dput_and_out; + + newattrs.ia_atime.tv_sec = times[0].tv_sec; + newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000; + newattrs.ia_mtime.tv_sec = times[1].tv_sec; + newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000; + newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; + } else { + error = -EACCES; + if (IS_IMMUTABLE(inode)) + goto dput_and_out; + + if (current->fsuid != inode->i_uid && + (error = vfs_permission(&nd, MAY_WRITE)) != 0) + goto dput_and_out; + } + mutex_lock(&inode->i_mutex); + error = notify_change(nd.dentry, &newattrs); + mutex_unlock(&inode->i_mutex); +dput_and_out: + path_release(&nd); +out: + return error; +} + +asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) +{ + struct timeval times[2]; + + if (utimes && copy_from_user(×, utimes, sizeof(times))) + return -EFAULT; + return do_utimes(dfd, filename, utimes ? times : NULL); +} + +asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) +{ + return sys_futimesat(AT_FDCWD, filename, utimes); +} diff --git a/include/linux/namei.h b/include/linux/namei.h index c6470ba00668..f5f19606effb 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -1,6 +1,7 @@ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H +#include #include struct vfsmount; diff --git a/include/linux/utime.h b/include/linux/utime.h index c6bf27b7897e..640be6a1959e 100644 --- a/include/linux/utime.h +++ b/include/linux/utime.h @@ -1,6 +1,8 @@ #ifndef _LINUX_UTIME_H #define _LINUX_UTIME_H +#include + struct utimbuf { time_t actime; time_t modtime; -- cgit v1.2.3-71-gd317 From cb10dc9ac7eea2c891df6b79b9ef1fbe59cb5429 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Sat, 30 Sep 2006 23:27:45 -0700 Subject: [PATCH] synclink_gt: add bisync and monosync modes Add bisync and monosync serial protocol support to the synclink_gt driver. Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/synclink_gt.c | 88 +++++++++++++++++++++++++++++++++------------- include/linux/synclink.h | 4 ++- 2 files changed, 66 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 2f07b085536b..bd3821679ac8 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -461,7 +461,7 @@ static int adapter_test(struct slgt_info *info); static void reset_adapter(struct slgt_info *info); static void reset_port(struct slgt_info *info); static void async_mode(struct slgt_info *info); -static void hdlc_mode(struct slgt_info *info); +static void sync_mode(struct slgt_info *info); static void rx_stop(struct slgt_info *info); static void rx_start(struct slgt_info *info); @@ -881,7 +881,9 @@ static int write(struct tty_struct *tty, if (!count) goto cleanup; - if (info->params.mode == MGSL_MODE_RAW) { + if (info->params.mode == MGSL_MODE_RAW || + info->params.mode == MGSL_MODE_MONOSYNC || + info->params.mode == MGSL_MODE_BISYNC) { unsigned int bufs_needed = (count/DMABUFSIZE); unsigned int bufs_free = free_tbuf_count(info); if (count % DMABUFSIZE) @@ -1897,6 +1899,8 @@ static void bh_handler(void* context) while(rx_get_frame(info)); break; case MGSL_MODE_RAW: + case MGSL_MODE_MONOSYNC: + case MGSL_MODE_BISYNC: while(rx_get_buf(info)); break; } @@ -2362,10 +2366,9 @@ static void program_hw(struct slgt_info *info) rx_stop(info); tx_stop(info); - if (info->params.mode == MGSL_MODE_HDLC || - info->params.mode == MGSL_MODE_RAW || + if (info->params.mode != MGSL_MODE_ASYNC || info->netcount) - hdlc_mode(info); + sync_mode(info); else async_mode(info); @@ -2564,6 +2567,10 @@ static int rx_enable(struct slgt_info *info, int enable) if (enable) { if (!info->rx_enabled) rx_start(info); + else if (enable == 2) { + /* force hunt mode (write 1 to RCR[3]) */ + wr_reg16(info, RCR, rd_reg16(info, RCR) | BIT3); + } } else { if (info->rx_enabled) rx_stop(info); @@ -3748,7 +3755,7 @@ static void tx_start(struct slgt_info *info) { if (!info->tx_enabled) { wr_reg16(info, TCR, - (unsigned short)(rd_reg16(info, TCR) | BIT1)); + (unsigned short)((rd_reg16(info, TCR) | BIT1) & ~BIT2)); info->tx_enabled = TRUE; } @@ -3775,13 +3782,18 @@ static void tx_start(struct slgt_info *info) tdma_reset(info); /* set 1st descriptor address */ wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc); - if (info->params.mode == MGSL_MODE_RAW) + switch(info->params.mode) { + case MGSL_MODE_RAW: + case MGSL_MODE_MONOSYNC: + case MGSL_MODE_BISYNC: wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */ - else + break; + default: wr_reg32(info, TDCSR, BIT0); /* DMA enable */ + } } - if (info->params.mode != MGSL_MODE_RAW) { + if (info->params.mode == MGSL_MODE_HDLC) { info->tx_timer.expires = jiffies + msecs_to_jiffies(5000); add_timer(&info->tx_timer); } @@ -3814,7 +3826,6 @@ static void tx_stop(struct slgt_info *info) /* reset and disable transmitter */ val = rd_reg16(info, TCR) & ~BIT1; /* clear enable bit */ wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */ - wr_reg16(info, TCR, val); /* clear reset */ slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER); @@ -3982,7 +3993,7 @@ static void async_mode(struct slgt_info *info) enable_loopback(info); } -static void hdlc_mode(struct slgt_info *info) +static void sync_mode(struct slgt_info *info) { unsigned short val; @@ -3992,7 +4003,7 @@ static void hdlc_mode(struct slgt_info *info) /* TCR (tx control) * - * 15..13 mode, 000=HDLC 001=raw sync + * 15..13 mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync * 12..10 encoding * 09 CRC enable * 08 CRC32 @@ -4006,8 +4017,11 @@ static void hdlc_mode(struct slgt_info *info) */ val = 0; - if (info->params.mode == MGSL_MODE_RAW) - val |= BIT13; + switch(info->params.mode) { + case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break; + case MGSL_MODE_BISYNC: val |= BIT15; break; + case MGSL_MODE_RAW: val |= BIT13; break; + } if (info->if_mode & MGSL_INTERFACE_RTS_EN) val |= BIT7; @@ -4058,7 +4072,7 @@ static void hdlc_mode(struct slgt_info *info) /* RCR (rx control) * - * 15..13 mode, 000=HDLC 001=raw sync + * 15..13 mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync * 12..10 encoding * 09 CRC enable * 08 CRC32 @@ -4069,8 +4083,11 @@ static void hdlc_mode(struct slgt_info *info) */ val = 0; - if (info->params.mode == MGSL_MODE_RAW) - val |= BIT13; + switch(info->params.mode) { + case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break; + case MGSL_MODE_BISYNC: val |= BIT15; break; + case MGSL_MODE_RAW: val |= BIT13; break; + } switch(info->params.encoding) { @@ -4309,10 +4326,15 @@ static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last while(!done) { /* reset current buffer for reuse */ info->rbufs[i].status = 0; - if (info->params.mode == MGSL_MODE_RAW) + switch(info->params.mode) { + case MGSL_MODE_RAW: + case MGSL_MODE_MONOSYNC: + case MGSL_MODE_BISYNC: set_desc_count(info->rbufs[i], info->raw_rx_size); - else + break; + default: set_desc_count(info->rbufs[i], DMABUFSIZE); + } if (i == last) done = 1; @@ -4477,13 +4499,24 @@ cleanup: static int rx_get_buf(struct slgt_info *info) { unsigned int i = info->rbuf_current; + unsigned int count; if (!desc_complete(info->rbufs[i])) return 0; - DBGDATA(info, info->rbufs[i].buf, desc_count(info->rbufs[i]), "rx"); - DBGINFO(("rx_get_buf size=%d\n", desc_count(info->rbufs[i]))); - ldisc_receive_buf(info->tty, info->rbufs[i].buf, - info->flag_buf, desc_count(info->rbufs[i])); + count = desc_count(info->rbufs[i]); + switch(info->params.mode) { + case MGSL_MODE_MONOSYNC: + case MGSL_MODE_BISYNC: + /* ignore residue in byte synchronous modes */ + if (desc_residue(info->rbufs[i])) + count--; + break; + } + DBGDATA(info, info->rbufs[i].buf, count, "rx"); + DBGINFO(("rx_get_buf size=%d\n", count)); + if (count) + ldisc_receive_buf(info->tty, info->rbufs[i].buf, + info->flag_buf, count); free_rbufs(info, i, i); return 1; } @@ -4549,8 +4582,13 @@ static void tx_load(struct slgt_info *info, const char *buf, unsigned int size) size -= count; buf += count; - if (!size && info->params.mode != MGSL_MODE_RAW) - set_desc_eof(*d, 1); /* HDLC: set EOF of last desc */ + /* + * set EOF bit for last buffer of HDLC frame or + * for every buffer in raw mode + */ + if ((!size && info->params.mode == MGSL_MODE_HDLC) || + info->params.mode == MGSL_MODE_RAW) + set_desc_eof(*d, 1); else set_desc_eof(*d, 0); diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 0577f5284cbc..c8b042667af1 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -1,7 +1,7 @@ /* * SyncLink Multiprotocol Serial Adapter Driver * - * $Id: synclink.h,v 3.13 2006/05/23 18:25:06 paulkf Exp $ + * $Id: synclink.h,v 3.14 2006/07/17 20:15:43 paulkf Exp $ * * Copyright (C) 1998-2000 by Microgate Corporation * @@ -124,6 +124,8 @@ #define MGSL_MODE_ASYNC 1 #define MGSL_MODE_HDLC 2 +#define MGSL_MODE_MONOSYNC 3 +#define MGSL_MODE_BISYNC 4 #define MGSL_MODE_RAW 6 #define MGSL_BUS_TYPE_ISA 1 -- cgit v1.2.3-71-gd317 From 89bbc03c01f68e627a2b120963f136e2815f0d84 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 30 Sep 2006 23:27:54 -0700 Subject: [PATCH] Prevent multiple inclusion of linux/sysrq.h Prevent multiple inclusions of include/linux/sysrq.h using traditional #ifndef..#endif. Signed-off-by: Thomas Petazzoni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysrq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 4812ff60561c..e657e523b9bf 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -11,6 +11,8 @@ * based upon discusions in irc://irc.openprojects.net/#kernelnewbies */ +#ifndef _LINUX_SYSRQ_H +#define _LINUX_SYSRQ_H struct pt_regs; struct tty_struct; @@ -57,3 +59,5 @@ static inline int __reterr(void) #define unregister_sysrq_key(ig,nore) __reterr() #endif + +#endif /* _LINUX_SYSRQ_H */ -- cgit v1.2.3-71-gd317 From 54f67f631dfc25ca7a8b19200e34013abc974337 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Sat, 30 Sep 2006 23:27:55 -0700 Subject: [PATCH] Move ncpfs 32bit compat ioctl to ncpfs The ncp specific compat ioctls are clearly local to one file system, so the code can better live there. This version of the patch moves everything into the generic ioctl handler and uses it for both 32 and 64 bit calls. Signed-off-by: Arnd Bergmann Signed-off-by: Petr Vandrovec Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 198 ----------------------------------- fs/ncpfs/dir.c | 3 + fs/ncpfs/file.c | 3 + fs/ncpfs/ioctl.c | 239 +++++++++++++++++++++++++++++++++++++------ include/linux/compat_ioctl.h | 12 --- include/linux/ncp_fs.h | 1 + 6 files changed, 217 insertions(+), 239 deletions(-) (limited to 'include/linux') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 64b34533edea..27ca1aa30562 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -60,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -2348,193 +2347,6 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg) } } -#if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE) -struct ncp_ioctl_request_32 { - u32 function; - u32 size; - compat_caddr_t data; -}; - -struct ncp_fs_info_v2_32 { - s32 version; - u32 mounted_uid; - u32 connection; - u32 buffer_size; - - u32 volume_number; - u32 directory_id; - - u32 dummy1; - u32 dummy2; - u32 dummy3; -}; - -struct ncp_objectname_ioctl_32 -{ - s32 auth_type; - u32 object_name_len; - compat_caddr_t object_name; /* an userspace data, in most cases user name */ -}; - -struct ncp_privatedata_ioctl_32 -{ - u32 len; - compat_caddr_t data; /* ~1000 for NDS */ -}; - -#define NCP_IOC_NCPREQUEST_32 _IOR('n', 1, struct ncp_ioctl_request_32) -#define NCP_IOC_GETMOUNTUID2_32 _IOW('n', 2, u32) -#define NCP_IOC_GET_FS_INFO_V2_32 _IOWR('n', 4, struct ncp_fs_info_v2_32) -#define NCP_IOC_GETOBJECTNAME_32 _IOWR('n', 9, struct ncp_objectname_ioctl_32) -#define NCP_IOC_SETOBJECTNAME_32 _IOR('n', 9, struct ncp_objectname_ioctl_32) -#define NCP_IOC_GETPRIVATEDATA_32 _IOWR('n', 10, struct ncp_privatedata_ioctl_32) -#define NCP_IOC_SETPRIVATEDATA_32 _IOR('n', 10, struct ncp_privatedata_ioctl_32) - -static int do_ncp_ncprequest(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct ncp_ioctl_request_32 n32; - struct ncp_ioctl_request __user *p = compat_alloc_user_space(sizeof(*p)); - - if (copy_from_user(&n32, compat_ptr(arg), sizeof(n32)) || - put_user(n32.function, &p->function) || - put_user(n32.size, &p->size) || - put_user(compat_ptr(n32.data), &p->data)) - return -EFAULT; - - return sys_ioctl(fd, NCP_IOC_NCPREQUEST, (unsigned long)p); -} - -static int do_ncp_getmountuid2(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - mm_segment_t old_fs = get_fs(); - __kernel_uid_t kuid; - int err; - - cmd = NCP_IOC_GETMOUNTUID2; - - set_fs(KERNEL_DS); - err = sys_ioctl(fd, cmd, (unsigned long)&kuid); - set_fs(old_fs); - - if (!err) - err = put_user(kuid, - (unsigned int __user *) compat_ptr(arg)); - - return err; -} - -static int do_ncp_getfsinfo2(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - mm_segment_t old_fs = get_fs(); - struct ncp_fs_info_v2_32 n32; - struct ncp_fs_info_v2 n; - int err; - - if (copy_from_user(&n32, compat_ptr(arg), sizeof(n32))) - return -EFAULT; - if (n32.version != NCP_GET_FS_INFO_VERSION_V2) - return -EINVAL; - n.version = NCP_GET_FS_INFO_VERSION_V2; - - set_fs(KERNEL_DS); - err = sys_ioctl(fd, NCP_IOC_GET_FS_INFO_V2, (unsigned long)&n); - set_fs(old_fs); - - if (!err) { - n32.version = n.version; - n32.mounted_uid = n.mounted_uid; - n32.connection = n.connection; - n32.buffer_size = n.buffer_size; - n32.volume_number = n.volume_number; - n32.directory_id = n.directory_id; - n32.dummy1 = n.dummy1; - n32.dummy2 = n.dummy2; - n32.dummy3 = n.dummy3; - err = copy_to_user(compat_ptr(arg), &n32, sizeof(n32)) ? -EFAULT : 0; - } - return err; -} - -static int do_ncp_getobjectname(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct ncp_objectname_ioctl_32 n32, __user *p32 = compat_ptr(arg); - struct ncp_objectname_ioctl __user *p = compat_alloc_user_space(sizeof(*p)); - s32 auth_type; - u32 name_len; - int err; - - if (copy_from_user(&n32, p32, sizeof(n32)) || - put_user(n32.object_name_len, &p->object_name_len) || - put_user(compat_ptr(n32.object_name), &p->object_name)) - return -EFAULT; - - err = sys_ioctl(fd, NCP_IOC_GETOBJECTNAME, (unsigned long)p); - if (err) - return err; - - if (get_user(auth_type, &p->auth_type) || - put_user(auth_type, &p32->auth_type) || - get_user(name_len, &p->object_name_len) || - put_user(name_len, &p32->object_name_len)) - return -EFAULT; - - return 0; -} - -static int do_ncp_setobjectname(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct ncp_objectname_ioctl_32 n32, __user *p32 = compat_ptr(arg); - struct ncp_objectname_ioctl __user *p = compat_alloc_user_space(sizeof(*p)); - - if (copy_from_user(&n32, p32, sizeof(n32)) || - put_user(n32.auth_type, &p->auth_type) || - put_user(n32.object_name_len, &p->object_name_len) || - put_user(compat_ptr(n32.object_name), &p->object_name)) - return -EFAULT; - - return sys_ioctl(fd, NCP_IOC_SETOBJECTNAME, (unsigned long)p); -} - -static int do_ncp_getprivatedata(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct ncp_privatedata_ioctl_32 n32, __user *p32 = compat_ptr(arg); - struct ncp_privatedata_ioctl __user *p = - compat_alloc_user_space(sizeof(*p)); - u32 len; - int err; - - if (copy_from_user(&n32, p32, sizeof(n32)) || - put_user(n32.len, &p->len) || - put_user(compat_ptr(n32.data), &p->data)) - return -EFAULT; - - err = sys_ioctl(fd, NCP_IOC_GETPRIVATEDATA, (unsigned long)p); - if (err) - return err; - - if (get_user(len, &p->len) || - put_user(len, &p32->len)) - return -EFAULT; - - return 0; -} - -static int do_ncp_setprivatedata(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct ncp_privatedata_ioctl_32 n32; - struct ncp_privatedata_ioctl_32 __user *p32 = compat_ptr(arg); - struct ncp_privatedata_ioctl __user *p = - compat_alloc_user_space(sizeof(*p)); - - if (copy_from_user(&n32, p32, sizeof(n32)) || - put_user(n32.len, &p->len) || - put_user(compat_ptr(n32.data), &p->data)) - return -EFAULT; - - return sys_ioctl(fd, NCP_IOC_SETPRIVATEDATA, (unsigned long)p); -} -#endif - static int lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg) { @@ -2748,16 +2560,6 @@ HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl) HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl) HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl) -#if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE) -HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest) -HANDLE_IOCTL(NCP_IOC_GETMOUNTUID2_32, do_ncp_getmountuid2) -HANDLE_IOCTL(NCP_IOC_GET_FS_INFO_V2_32, do_ncp_getfsinfo2) -HANDLE_IOCTL(NCP_IOC_GETOBJECTNAME_32, do_ncp_getobjectname) -HANDLE_IOCTL(NCP_IOC_SETOBJECTNAME_32, do_ncp_setobjectname) -HANDLE_IOCTL(NCP_IOC_GETPRIVATEDATA_32, do_ncp_getprivatedata) -HANDLE_IOCTL(NCP_IOC_SETPRIVATEDATA_32, do_ncp_setprivatedata) -#endif - /* dvb */ HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event) HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index b4ee89250e95..458b3b785194 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -53,6 +53,9 @@ const struct file_operations ncp_dir_operations = .read = generic_read_dir, .readdir = ncp_readdir, .ioctl = ncp_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ncp_compat_ioctl, +#endif }; struct inode_operations ncp_dir_inode_operations = diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index e6b7c67cf057..df37524b85db 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -289,6 +289,9 @@ const struct file_operations ncp_file_operations = .read = ncp_file_read, .write = ncp_file_write, .ioctl = ncp_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ncp_compat_ioctl, +#endif .mmap = ncp_mmap, .release = ncp_release, .fsync = ncp_fsync, diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 42039fe0653c..a89ac84a8241 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -7,19 +7,21 @@ * */ - -#include #include +#include #include #include #include #include #include #include +#include #include #include +#include + #include "ncplib_kernel.h" /* maximum limit for ncp_objectname_ioctl */ @@ -89,6 +91,82 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct file *file, return 0; } +#ifdef CONFIG_COMPAT +struct compat_ncp_objectname_ioctl +{ + s32 auth_type; + u32 object_name_len; + compat_caddr_t object_name; /* an userspace data, in most cases user name */ +}; + +struct compat_ncp_fs_info_v2 { + s32 version; + u32 mounted_uid; + u32 connection; + u32 buffer_size; + + u32 volume_number; + u32 directory_id; + + u32 dummy1; + u32 dummy2; + u32 dummy3; +}; + +struct compat_ncp_ioctl_request { + u32 function; + u32 size; + compat_caddr_t data; +}; + +struct compat_ncp_privatedata_ioctl +{ + u32 len; + compat_caddr_t data; /* ~1000 for NDS */ +}; + +#define NCP_IOC_GET_FS_INFO_V2_32 _IOWR('n', 4, struct compat_ncp_fs_info_v2) +#define NCP_IOC_NCPREQUEST_32 _IOR('n', 1, struct compat_ncp_ioctl_request) +#define NCP_IOC_GETOBJECTNAME_32 _IOWR('n', 9, struct compat_ncp_objectname_ioctl) +#define NCP_IOC_SETOBJECTNAME_32 _IOR('n', 9, struct compat_ncp_objectname_ioctl) +#define NCP_IOC_GETPRIVATEDATA_32 _IOWR('n', 10, struct compat_ncp_privatedata_ioctl) +#define NCP_IOC_SETPRIVATEDATA_32 _IOR('n', 10, struct compat_ncp_privatedata_ioctl) + +static int +ncp_get_compat_fs_info_v2(struct ncp_server * server, struct file *file, + struct compat_ncp_fs_info_v2 __user * arg) +{ + struct inode *inode = file->f_dentry->d_inode; + struct compat_ncp_fs_info_v2 info2; + + if ((file_permission(file, MAY_WRITE) != 0) + && (current->uid != server->m.mounted_uid)) { + return -EACCES; + } + if (copy_from_user(&info2, arg, sizeof(info2))) + return -EFAULT; + + if (info2.version != NCP_GET_FS_INFO_VERSION_V2) { + DPRINTK("info.version invalid: %d\n", info2.version); + return -EINVAL; + } + info2.mounted_uid = server->m.mounted_uid; + info2.connection = server->connection; + info2.buffer_size = server->buffer_size; + info2.volume_number = NCP_FINFO(inode)->volNumber; + info2.directory_id = NCP_FINFO(inode)->DosDirNum; + info2.dummy1 = info2.dummy2 = info2.dummy3 = 0; + + if (copy_to_user(arg, &info2, sizeof(info2))) + return -EFAULT; + return 0; +} +#endif + +#define NCP_IOC_GETMOUNTUID16 _IOW('n', 2, u16) +#define NCP_IOC_GETMOUNTUID32 _IOW('n', 2, u32) +#define NCP_IOC_GETMOUNTUID64 _IOW('n', 2, u64) + #ifdef CONFIG_NCPFS_NLS /* Here we are select the iocharset and the codepage for NLS. * Thanks Petr Vandrovec for idea and many hints. @@ -192,12 +270,24 @@ int ncp_ioctl(struct inode *inode, struct file *filp, void __user *argp = (void __user *)arg; switch (cmd) { +#ifdef CONFIG_COMPAT + case NCP_IOC_NCPREQUEST_32: +#endif case NCP_IOC_NCPREQUEST: - if ((file_permission(filp, MAY_WRITE) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } +#ifdef CONFIG_COMPAT + if (cmd == NCP_IOC_NCPREQUEST_32) { + struct compat_ncp_ioctl_request request32; + if (copy_from_user(&request32, argp, sizeof(request32))) + return -EFAULT; + request.function = request32.function; + request.size = request32.size; + request.data = compat_ptr(request32.data); + } else +#endif if (copy_from_user(&request, argp, sizeof(request))) return -EFAULT; @@ -254,19 +344,35 @@ int ncp_ioctl(struct inode *inode, struct file *filp, case NCP_IOC_GET_FS_INFO_V2: return ncp_get_fs_info_v2(server, filp, argp); - case NCP_IOC_GETMOUNTUID2: - { - unsigned long tmp = server->m.mounted_uid; - - if ((file_permission(filp, MAY_READ) != 0) - && (current->uid != server->m.mounted_uid)) - { - return -EACCES; - } - if (put_user(tmp, (unsigned long __user *)argp)) +#ifdef CONFIG_COMPAT + case NCP_IOC_GET_FS_INFO_V2_32: + return ncp_get_compat_fs_info_v2(server, filp, argp); +#endif + /* we have too many combinations of CONFIG_COMPAT, + * CONFIG_64BIT and CONFIG_UID16, so just handle + * any of the possible ioctls */ + case NCP_IOC_GETMOUNTUID16: + case NCP_IOC_GETMOUNTUID32: + case NCP_IOC_GETMOUNTUID64: + if ((file_permission(filp, MAY_READ) != 0) + && (current->uid != server->m.mounted_uid)) { + return -EACCES; + } + if (cmd == NCP_IOC_GETMOUNTUID16) { + u16 uid; + SET_UID(uid, server->m.mounted_uid); + if (put_user(uid, (u16 __user *)argp)) + return -EFAULT; + } else if (cmd == NCP_IOC_GETMOUNTUID32) { + if (put_user(server->m.mounted_uid, + (u32 __user *)argp)) + return -EFAULT; + } else { + if (put_user(server->m.mounted_uid, + (u64 __user *)argp)) return -EFAULT; - return 0; } + return 0; case NCP_IOC_GETROOT: { @@ -476,6 +582,32 @@ outrel: } #endif /* CONFIG_NCPFS_IOCTL_LOCKING */ +#ifdef CONFIG_COMPAT + case NCP_IOC_GETOBJECTNAME_32: + if (current->uid != server->m.mounted_uid) { + return -EACCES; + } + { + struct compat_ncp_objectname_ioctl user; + size_t outl; + + if (copy_from_user(&user, argp, sizeof(user))) + return -EFAULT; + user.auth_type = server->auth.auth_type; + outl = user.object_name_len; + user.object_name_len = server->auth.object_name_len; + if (outl > user.object_name_len) + outl = user.object_name_len; + if (outl) { + if (copy_to_user(compat_ptr(user.object_name), + server->auth.object_name, + outl)) return -EFAULT; + } + if (copy_to_user(argp, &user, sizeof(user))) + return -EFAULT; + return 0; + } +#endif case NCP_IOC_GETOBJECTNAME: if (current->uid != server->m.mounted_uid) { return -EACCES; @@ -500,6 +632,9 @@ outrel: return -EFAULT; return 0; } +#ifdef CONFIG_COMPAT + case NCP_IOC_SETOBJECTNAME_32: +#endif case NCP_IOC_SETOBJECTNAME: if (current->uid != server->m.mounted_uid) { return -EACCES; @@ -512,8 +647,19 @@ outrel: void* oldprivate; size_t oldprivatelen; +#ifdef CONFIG_COMPAT + if (cmd == NCP_IOC_SETOBJECTNAME_32) { + struct compat_ncp_objectname_ioctl user32; + if (copy_from_user(&user32, argp, sizeof(user32))) + return -EFAULT; + user.auth_type = user32.auth_type; + user.object_name_len = user32.object_name_len; + user.object_name = compat_ptr(user32.object_name); + } else +#endif if (copy_from_user(&user, argp, sizeof(user))) return -EFAULT; + if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) return -ENOMEM; if (user.object_name_len) { @@ -544,6 +690,9 @@ outrel: kfree(oldname); return 0; } +#ifdef CONFIG_COMPAT + case NCP_IOC_GETPRIVATEDATA_32: +#endif case NCP_IOC_GETPRIVATEDATA: if (current->uid != server->m.mounted_uid) { return -EACCES; @@ -552,8 +701,18 @@ outrel: struct ncp_privatedata_ioctl user; size_t outl; +#ifdef CONFIG_COMPAT + if (cmd == NCP_IOC_GETPRIVATEDATA_32) { + struct compat_ncp_privatedata_ioctl user32; + if (copy_from_user(&user32, argp, sizeof(user32))) + return -EFAULT; + user.len = user32.len; + user.data = compat_ptr(user32.data); + } else +#endif if (copy_from_user(&user, argp, sizeof(user))) return -EFAULT; + outl = user.len; user.len = server->priv.len; if (outl > user.len) outl = user.len; @@ -562,10 +721,23 @@ outrel: server->priv.data, outl)) return -EFAULT; } +#ifdef CONFIG_COMPAT + if (cmd == NCP_IOC_GETPRIVATEDATA_32) { + struct compat_ncp_privatedata_ioctl user32; + user32.len = user.len; + user32.data = (unsigned long) user.data; + if (copy_to_user(&user32, argp, sizeof(user32))) + return -EFAULT; + } else +#endif if (copy_to_user(argp, &user, sizeof(user))) return -EFAULT; + return 0; } +#ifdef CONFIG_COMPAT + case NCP_IOC_SETPRIVATEDATA_32: +#endif case NCP_IOC_SETPRIVATEDATA: if (current->uid != server->m.mounted_uid) { return -EACCES; @@ -576,8 +748,18 @@ outrel: void* old; size_t oldlen; +#ifdef CONFIG_COMPAT + if (cmd == NCP_IOC_SETPRIVATEDATA_32) { + struct compat_ncp_privatedata_ioctl user32; + if (copy_from_user(&user32, argp, sizeof(user32))) + return -EFAULT; + user.len = user32.len; + user.data = compat_ptr(user32.data); + } else +#endif if (copy_from_user(&user, argp, sizeof(user))) return -EFAULT; + if (user.len > NCP_PRIVATE_DATA_MAX_LEN) return -ENOMEM; if (user.len) { @@ -636,20 +818,19 @@ outrel: } } -/* #ifdef CONFIG_UID16 */ - /* NCP_IOC_GETMOUNTUID may be same as NCP_IOC_GETMOUNTUID2, - so we have this out of switch */ - if (cmd == NCP_IOC_GETMOUNTUID) { - __kernel_uid_t uid = 0; - if ((file_permission(filp, MAY_READ) != 0) - && (current->uid != server->m.mounted_uid)) { - return -EACCES; - } - SET_UID(uid, server->m.mounted_uid); - if (put_user(uid, (__kernel_uid_t __user *)argp)) - return -EFAULT; - return 0; - } -/* #endif */ return -EINVAL; } + +#ifdef CONFIG_COMPAT +long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + lock_kernel(); + arg = (unsigned long) compat_ptr(arg); + ret = ncp_ioctl(inode, file, cmd, arg); + unlock_kernel(); + return ret; +} +#endif diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index d61ef5951538..d5b7abc4f409 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -569,18 +569,6 @@ COMPATIBLE_IOCTL(RAW_SETBIND) COMPATIBLE_IOCTL(RAW_GETBIND) /* SMB ioctls which do not need any translations */ COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) -/* NCP ioctls which do not need any translations */ -COMPATIBLE_IOCTL(NCP_IOC_CONN_LOGGED_IN) -COMPATIBLE_IOCTL(NCP_IOC_SIGN_INIT) -COMPATIBLE_IOCTL(NCP_IOC_SIGN_WANTED) -COMPATIBLE_IOCTL(NCP_IOC_SET_SIGN_WANTED) -COMPATIBLE_IOCTL(NCP_IOC_LOCKUNLOCK) -COMPATIBLE_IOCTL(NCP_IOC_GETROOT) -COMPATIBLE_IOCTL(NCP_IOC_SETROOT) -COMPATIBLE_IOCTL(NCP_IOC_GETCHARSETS) -COMPATIBLE_IOCTL(NCP_IOC_SETCHARSETS) -COMPATIBLE_IOCTL(NCP_IOC_GETDENTRYTTL) -COMPATIBLE_IOCTL(NCP_IOC_SETDENTRYTTL) /* Little a */ COMPATIBLE_IOCTL(ATMSIGD_CTRL) COMPATIBLE_IOCTL(ATMARPD_CTRL) diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 02e352be717e..0ea7f89e613c 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -212,6 +212,7 @@ void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date); /* linux/fs/ncpfs/ioctl.c */ int ncp_ioctl(struct inode *, struct file *, unsigned int, unsigned long); +long ncp_compat_ioctl(struct file *, unsigned int, unsigned long); /* linux/fs/ncpfs/sock.c */ int ncp_request2(struct ncp_server *server, int function, -- cgit v1.2.3-71-gd317 From c69c31270c35a6b8421a8e4ba81de1247ac6df95 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sat, 30 Sep 2006 23:27:56 -0700 Subject: [PATCH] IPMI: per-channel command registration This patch adds the ability to register for a command per-channel in the IPMI driver. If your BMC supports multiple channels, incoming messages can be useful to have the ability to register to receive commands on a specific channel instead the current behaviour of all channels. Signed-off-by: David Barksdale Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/IPMI.txt | 9 +++-- drivers/char/ipmi/ipmi_devintf.c | 34 ++++++++++++++++- drivers/char/ipmi/ipmi_msghandler.c | 75 +++++++++++++++++++++++++++---------- include/linux/ipmi.h | 48 ++++++++++++++++++++++-- 4 files changed, 138 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 0256805b548f..7756e09ea759 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -326,9 +326,12 @@ for events, they will all receive all events that come in. For receiving commands, you have to individually register commands you want to receive. Call ipmi_register_for_cmd() and supply the netfn -and command name for each command you want to receive. Only one user -may be registered for each netfn/cmd, but different users may register -for different commands. +and command name for each command you want to receive. You also +specify a bitmask of the channels you want to receive the command from +(or use IPMI_CHAN_ALL for all channels if you don't care). Only one +user may be registered for each netfn/cmd/channel, but different users +may register for different commands, or the same command if the +channel bitmasks do not overlap. From userland, equivalent IOCTLs are provided to do these functions. diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index 68d7c61a864e..81fcf0ce21d1 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -377,7 +377,8 @@ static int ipmi_ioctl(struct inode *inode, break; } - rv = ipmi_register_for_cmd(priv->user, val.netfn, val.cmd); + rv = ipmi_register_for_cmd(priv->user, val.netfn, val.cmd, + IPMI_CHAN_ALL); break; } @@ -390,7 +391,36 @@ static int ipmi_ioctl(struct inode *inode, break; } - rv = ipmi_unregister_for_cmd(priv->user, val.netfn, val.cmd); + rv = ipmi_unregister_for_cmd(priv->user, val.netfn, val.cmd, + IPMI_CHAN_ALL); + break; + } + + case IPMICTL_REGISTER_FOR_CMD_CHANS: + { + struct ipmi_cmdspec_chans val; + + if (copy_from_user(&val, arg, sizeof(val))) { + rv = -EFAULT; + break; + } + + rv = ipmi_register_for_cmd(priv->user, val.netfn, val.cmd, + val.chans); + break; + } + + case IPMICTL_UNREGISTER_FOR_CMD_CHANS: + { + struct ipmi_cmdspec_chans val; + + if (copy_from_user(&val, arg, sizeof(val))) { + rv = -EFAULT; + break; + } + + rv = ipmi_unregister_for_cmd(priv->user, val.netfn, val.cmd, + val.chans); break; } diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 843d34c8627c..2455e8d478ac 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -96,6 +96,7 @@ struct cmd_rcvr ipmi_user_t user; unsigned char netfn; unsigned char cmd; + unsigned int chans; /* * This is used to form a linked lised during mass deletion. @@ -953,24 +954,41 @@ int ipmi_set_gets_events(ipmi_user_t user, int val) static struct cmd_rcvr *find_cmd_rcvr(ipmi_smi_t intf, unsigned char netfn, - unsigned char cmd) + unsigned char cmd, + unsigned char chan) { struct cmd_rcvr *rcvr; list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { - if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd)) + if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd) + && (rcvr->chans & (1 << chan))) return rcvr; } return NULL; } +static int is_cmd_rcvr_exclusive(ipmi_smi_t intf, + unsigned char netfn, + unsigned char cmd, + unsigned int chans) +{ + struct cmd_rcvr *rcvr; + + list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { + if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd) + && (rcvr->chans & chans)) + return 0; + } + return 1; +} + int ipmi_register_for_cmd(ipmi_user_t user, unsigned char netfn, - unsigned char cmd) + unsigned char cmd, + unsigned int chans) { ipmi_smi_t intf = user->intf; struct cmd_rcvr *rcvr; - struct cmd_rcvr *entry; int rv = 0; @@ -979,12 +997,12 @@ int ipmi_register_for_cmd(ipmi_user_t user, return -ENOMEM; rcvr->cmd = cmd; rcvr->netfn = netfn; + rcvr->chans = chans; rcvr->user = user; mutex_lock(&intf->cmd_rcvrs_mutex); /* Make sure the command/netfn is not already registered. */ - entry = find_cmd_rcvr(intf, netfn, cmd); - if (entry) { + if (!is_cmd_rcvr_exclusive(intf, netfn, cmd, chans)) { rv = -EBUSY; goto out_unlock; } @@ -1001,24 +1019,39 @@ int ipmi_register_for_cmd(ipmi_user_t user, int ipmi_unregister_for_cmd(ipmi_user_t user, unsigned char netfn, - unsigned char cmd) + unsigned char cmd, + unsigned int chans) { ipmi_smi_t intf = user->intf; struct cmd_rcvr *rcvr; + struct cmd_rcvr *rcvrs = NULL; + int i, rv = -ENOENT; mutex_lock(&intf->cmd_rcvrs_mutex); - /* Make sure the command/netfn is not already registered. */ - rcvr = find_cmd_rcvr(intf, netfn, cmd); - if ((rcvr) && (rcvr->user == user)) { - list_del_rcu(&rcvr->link); - mutex_unlock(&intf->cmd_rcvrs_mutex); - synchronize_rcu(); + for (i = 0; i < IPMI_NUM_CHANNELS; i++) { + if (((1 << i) & chans) == 0) + continue; + rcvr = find_cmd_rcvr(intf, netfn, cmd, i); + if (rcvr == NULL) + continue; + if (rcvr->user == user) { + rv = 0; + rcvr->chans &= ~chans; + if (rcvr->chans == 0) { + list_del_rcu(&rcvr->link); + rcvr->next = rcvrs; + rcvrs = rcvr; + } + } + } + mutex_unlock(&intf->cmd_rcvrs_mutex); + synchronize_rcu(); + while (rcvrs) { + rcvr = rcvrs; + rcvrs = rcvr->next; kfree(rcvr); - return 0; - } else { - mutex_unlock(&intf->cmd_rcvrs_mutex); - return -ENOENT; } + return rv; } void ipmi_user_set_run_to_completion(ipmi_user_t user, int val) @@ -2548,6 +2581,7 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, int rv = 0; unsigned char netfn; unsigned char cmd; + unsigned char chan; ipmi_user_t user = NULL; struct ipmi_ipmb_addr *ipmb_addr; struct ipmi_recv_msg *recv_msg; @@ -2568,9 +2602,10 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, netfn = msg->rsp[4] >> 2; cmd = msg->rsp[8]; + chan = msg->rsp[3] & 0xf; rcu_read_lock(); - rcvr = find_cmd_rcvr(intf, netfn, cmd); + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); if (rcvr) { user = rcvr->user; kref_get(&user->refcount); @@ -2728,6 +2763,7 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, int rv = 0; unsigned char netfn; unsigned char cmd; + unsigned char chan; ipmi_user_t user = NULL; struct ipmi_lan_addr *lan_addr; struct ipmi_recv_msg *recv_msg; @@ -2748,9 +2784,10 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, netfn = msg->rsp[6] >> 2; cmd = msg->rsp[10]; + chan = msg->rsp[3] & 0xf; rcu_read_lock(); - rcvr = find_cmd_rcvr(intf, netfn, cmd); + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); if (rcvr) { user = rcvr->user; kref_get(&user->refcount); diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index d09fbeabf1dc..796ca009fd46 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -148,6 +148,13 @@ struct ipmi_lan_addr #define IPMI_BMC_CHANNEL 0xf #define IPMI_NUM_CHANNELS 0x10 +/* + * Used to signify an "all channel" bitmask. This is more than the + * actual number of channels because this is used in userland and + * will cover us if the number of channels is extended. + */ +#define IPMI_CHAN_ALL (~0) + /* * A raw IPMI message without any addressing. This covers both @@ -350,18 +357,21 @@ int ipmi_request_supply_msgs(ipmi_user_t user, /* * When commands come in to the SMS, the user can register to receive - * them. Only one user can be listening on a specific netfn/cmd pair + * them. Only one user can be listening on a specific netfn/cmd/chan tuple * at a time, you will get an EBUSY error if the command is already * registered. If a command is received that does not have a user * registered, the driver will automatically return the proper - * error. + * error. Channels are specified as a bitfield, use IPMI_CHAN_ALL to + * mean all channels. */ int ipmi_register_for_cmd(ipmi_user_t user, unsigned char netfn, - unsigned char cmd); + unsigned char cmd, + unsigned int chans); int ipmi_unregister_for_cmd(ipmi_user_t user, unsigned char netfn, - unsigned char cmd); + unsigned char cmd, + unsigned int chans); /* * Allow run-to-completion mode to be set for the interface of @@ -571,6 +581,36 @@ struct ipmi_cmdspec #define IPMICTL_UNREGISTER_FOR_CMD _IOR(IPMI_IOC_MAGIC, 15, \ struct ipmi_cmdspec) +/* + * Register to get commands from other entities on specific channels. + * This way, you can only listen on specific channels, or have messages + * from some channels go to one place and other channels to someplace + * else. The chans field is a bitmask, (1 << channel) for each channel. + * It may be IPMI_CHAN_ALL for all channels. + */ +struct ipmi_cmdspec_chans +{ + unsigned int netfn; + unsigned int cmd; + unsigned int chans; +}; + +/* + * Register to receive a specific command on specific channels. error values: + * - EFAULT - an address supplied was invalid. + * - EBUSY - One of the netfn/cmd/chans supplied was already in use. + * - ENOMEM - could not allocate memory for the entry. + */ +#define IPMICTL_REGISTER_FOR_CMD_CHANS _IOR(IPMI_IOC_MAGIC, 28, \ + struct ipmi_cmdspec_chans) +/* + * Unregister some netfn/cmd/chans. error values: + * - EFAULT - an address supplied was invalid. + * - ENOENT - None of the netfn/cmd/chans were found registered for this user. + */ +#define IPMICTL_UNREGISTER_FOR_CMD_CHANS _IOR(IPMI_IOC_MAGIC, 29, \ + struct ipmi_cmdspec_chans) + /* * Set whether this interface receives events. Note that the first * user registered for events will get all pending events for the -- cgit v1.2.3-71-gd317 From 3a2711116073db258224afd2cc0f478bdf305575 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Sat, 30 Sep 2006 23:28:09 -0700 Subject: [PATCH] Remove BUG_ON(unlikely) in include/linux/aio.h BUG_ON() does this unlikely check itself, as bugs in Linux are unlikely anyway :) Signed-off-by: Rolf Eike Beer Acked-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 00c8efa95cc3..8a0193385a9b 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -213,11 +213,11 @@ int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb)); #define get_ioctx(kioctx) do { \ - BUG_ON(unlikely(atomic_read(&(kioctx)->users) <= 0)); \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ atomic_inc(&(kioctx)->users); \ } while (0) #define put_ioctx(kioctx) do { \ - BUG_ON(unlikely(atomic_read(&(kioctx)->users) <= 0)); \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ __put_ioctx(kioctx); \ } while (0) -- cgit v1.2.3-71-gd317 From ff8371ac9a5a55c956991fed8e5f58640c7a32f3 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sat, 30 Sep 2006 23:28:17 -0700 Subject: [PATCH] constify rtc_class_ops: update drivers Update RTC framework so that drivers can constify their method tables, moving them from ".data" to ".rodata". Then update the drivers. Signed-off-by: David Brownell Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/class.c | 2 +- drivers/rtc/rtc-at91.c | 2 +- drivers/rtc/rtc-dev.c | 4 ++-- drivers/rtc/rtc-ds1307.c | 2 +- drivers/rtc/rtc-ds1553.c | 2 +- drivers/rtc/rtc-ds1672.c | 2 +- drivers/rtc/rtc-ds1742.c | 2 +- drivers/rtc/rtc-ep93xx.c | 2 +- drivers/rtc/rtc-isl1208.c | 2 +- drivers/rtc/rtc-m48t86.c | 2 +- drivers/rtc/rtc-max6902.c | 2 +- drivers/rtc/rtc-pcf8563.c | 2 +- drivers/rtc/rtc-pcf8583.c | 2 +- drivers/rtc/rtc-pl031.c | 2 +- drivers/rtc/rtc-proc.c | 2 +- drivers/rtc/rtc-rs5c348.c | 2 +- drivers/rtc/rtc-rs5c372.c | 2 +- drivers/rtc/rtc-s3c.c | 2 +- drivers/rtc/rtc-sa1100.c | 2 +- drivers/rtc/rtc-test.c | 2 +- drivers/rtc/rtc-v3020.c | 2 +- drivers/rtc/rtc-vr41xx.c | 2 +- drivers/rtc/rtc-x1205.c | 2 +- include/linux/rtc.h | 4 ++-- 24 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 306d600a764a..7a0d8ee2de9c 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -39,7 +39,7 @@ static void rtc_device_release(struct class_device *class_dev) * Returns the pointer to the new struct class device. */ struct rtc_device *rtc_device_register(const char *name, struct device *dev, - struct rtc_class_ops *ops, + const struct rtc_class_ops *ops, struct module *owner) { struct rtc_device *rtc; diff --git a/drivers/rtc/rtc-at91.c b/drivers/rtc/rtc-at91.c index dfd0ce86f6a0..3cf3529888c7 100644 --- a/drivers/rtc/rtc-at91.c +++ b/drivers/rtc/rtc-at91.c @@ -267,7 +267,7 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id, return IRQ_NONE; /* not handled */ } -static struct rtc_class_ops at91_rtc_ops = { +static const struct rtc_class_ops at91_rtc_ops = { .ioctl = at91_rtc_ioctl, .read_time = at91_rtc_readtime, .set_time = at91_rtc_settime, diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index 629d47cc7e88..583789c66cdb 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -24,7 +24,7 @@ static int rtc_dev_open(struct inode *inode, struct file *file) int err; struct rtc_device *rtc = container_of(inode->i_cdev, struct rtc_device, char_dev); - struct rtc_class_ops *ops = rtc->ops; + const struct rtc_class_ops *ops = rtc->ops; /* We keep the lock as long as the device is in use * and return immediately if busy @@ -209,7 +209,7 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file, int err = 0; struct class_device *class_dev = file->private_data; struct rtc_device *rtc = to_rtc_device(class_dev); - struct rtc_class_ops *ops = rtc->ops; + const struct rtc_class_ops *ops = rtc->ops; struct rtc_time tm; struct rtc_wkalrm alarm; void __user *uarg = (void __user *) arg; diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index e8afb9384786..cc5032b6f42a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -178,7 +178,7 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) return 0; } -static struct rtc_class_ops ds13xx_rtc_ops = { +static const struct rtc_class_ops ds13xx_rtc_ops = { .read_time = ds1307_get_time, .set_time = ds1307_set_time, }; diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 209001495474..4fc9422ed86d 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -250,7 +250,7 @@ static int ds1553_rtc_ioctl(struct device *dev, unsigned int cmd, return 0; } -static struct rtc_class_ops ds1553_rtc_ops = { +static const struct rtc_class_ops ds1553_rtc_ops = { .read_time = ds1553_rtc_read_time, .set_time = ds1553_rtc_set_time, .read_alarm = ds1553_rtc_read_alarm, diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 9be81fd4737c..9c68ec99afa5 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -156,7 +156,7 @@ static ssize_t show_control(struct device *dev, struct device_attribute *attr, c } static DEVICE_ATTR(control, S_IRUGO, show_control, NULL); -static struct rtc_class_ops ds1672_rtc_ops = { +static const struct rtc_class_ops ds1672_rtc_ops = { .read_time = ds1672_rtc_read_time, .set_time = ds1672_rtc_set_time, .set_mmss = ds1672_rtc_set_mmss, diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 8e47e5a06d2a..01da5ab15fbc 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -116,7 +116,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm) return 0; } -static struct rtc_class_ops ds1742_rtc_ops = { +static const struct rtc_class_ops ds1742_rtc_ops = { .read_time = ds1742_rtc_read_time, .set_time = ds1742_rtc_set_time, }; diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index e1a1169e4664..ef4f147f3c0c 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -73,7 +73,7 @@ static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq) return 0; } -static struct rtc_class_ops ep93xx_rtc_ops = { +static const struct rtc_class_ops ep93xx_rtc_ops = { .read_time = ep93xx_rtc_read_time, .set_time = ep93xx_rtc_set_time, .set_mmss = ep93xx_rtc_set_mmss, diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index f324d0a635d4..1c743641b73b 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -390,7 +390,7 @@ static int isl1208_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) return isl1208_i2c_read_alarm(to_i2c_client(dev), alarm); } -static struct rtc_class_ops isl1208_rtc_ops = { +static const struct rtc_class_ops isl1208_rtc_ops = { .proc = isl1208_rtc_proc, .read_time = isl1208_rtc_read_time, .set_time = isl1208_rtc_set_time, diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index 8c0d1a6739ad..8ff4a1221f59 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -138,7 +138,7 @@ static int m48t86_rtc_proc(struct device *dev, struct seq_file *seq) return 0; } -static struct rtc_class_ops m48t86_rtc_ops = { +static const struct rtc_class_ops m48t86_rtc_ops = { .read_time = m48t86_rtc_read_time, .set_time = m48t86_rtc_set_time, .proc = m48t86_rtc_proc, diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index 2c9739562b5c..9eeef964663a 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -207,7 +207,7 @@ static int max6902_set_time(struct device *dev, struct rtc_time *tm) return max6902_set_datetime(dev, tm); } -static struct rtc_class_ops max6902_rtc_ops = { +static const struct rtc_class_ops max6902_rtc_ops = { .read_time = max6902_read_time, .set_time = max6902_set_time, }; diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index bd4310643038..a760cf69af90 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -227,7 +227,7 @@ static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm) return pcf8563_set_datetime(to_i2c_client(dev), tm); } -static struct rtc_class_ops pcf8563_rtc_ops = { +static const struct rtc_class_ops pcf8563_rtc_ops = { .read_time = pcf8563_rtc_read_time, .set_time = pcf8563_rtc_set_time, }; diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c index b235a30cb661..5875ebb8c79d 100644 --- a/drivers/rtc/rtc-pcf8583.c +++ b/drivers/rtc/rtc-pcf8583.c @@ -273,7 +273,7 @@ static int pcf8583_rtc_set_time(struct device *dev, struct rtc_time *tm) return ret; } -static struct rtc_class_ops pcf8583_rtc_ops = { +static const struct rtc_class_ops pcf8583_rtc_ops = { .read_time = pcf8583_rtc_read_time, .set_time = pcf8583_rtc_set_time, }; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index d6d1c5726b0e..739d1a6e14eb 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -128,7 +128,7 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) return 0; } -static struct rtc_class_ops pl031_ops = { +static const struct rtc_class_ops pl031_ops = { .open = pl031_open, .release = pl031_release, .ioctl = pl031_ioctl, diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c index 2943d83edfd1..d51d8f20e634 100644 --- a/drivers/rtc/rtc-proc.c +++ b/drivers/rtc/rtc-proc.c @@ -23,7 +23,7 @@ static int rtc_proc_show(struct seq_file *seq, void *offset) { int err; struct class_device *class_dev = seq->private; - struct rtc_class_ops *ops = to_rtc_device(class_dev)->ops; + const struct rtc_class_ops *ops = to_rtc_device(class_dev)->ops; struct rtc_wkalrm alrm; struct rtc_time tm; diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c index 25589061f931..f50f3fc353cd 100644 --- a/drivers/rtc/rtc-rs5c348.c +++ b/drivers/rtc/rtc-rs5c348.c @@ -140,7 +140,7 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm) return 0; } -static struct rtc_class_ops rs5c348_rtc_ops = { +static const struct rtc_class_ops rs5c348_rtc_ops = { .read_time = rs5c348_rtc_read_time, .set_time = rs5c348_rtc_set_time, }; diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 7553d797603f..bbdad099471d 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -160,7 +160,7 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq) return 0; } -static struct rtc_class_ops rs5c372_rtc_ops = { +static const struct rtc_class_ops rs5c372_rtc_ops = { .proc = rs5c372_rtc_proc, .read_time = rs5c372_rtc_read_time, .set_time = rs5c372_rtc_set_time, diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 2c7de79c83b9..625dad2eeb4f 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -386,7 +386,7 @@ static void s3c_rtc_release(struct device *dev) free_irq(s3c_rtc_tickno, rtc_dev); } -static struct rtc_class_ops s3c_rtcops = { +static const struct rtc_class_ops s3c_rtcops = { .open = s3c_rtc_open, .release = s3c_rtc_release, .ioctl = s3c_rtc_ioctl, diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index ee4b61ee67b0..439c41aea31c 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -303,7 +303,7 @@ static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq) return 0; } -static struct rtc_class_ops sa1100_rtc_ops = { +static const struct rtc_class_ops sa1100_rtc_ops = { .open = sa1100_rtc_open, .read_callback = sa1100_rtc_read_callback, .release = sa1100_rtc_release, diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index e1fa5fe7901f..bc4bd24508a2 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -75,7 +75,7 @@ static int test_rtc_ioctl(struct device *dev, unsigned int cmd, } } -static struct rtc_class_ops test_rtc_ops = { +static const struct rtc_class_ops test_rtc_ops = { .proc = test_rtc_proc, .read_time = test_rtc_read_time, .set_time = test_rtc_set_time, diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index e28cc4b0901a..09b714f1cdc3 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -149,7 +149,7 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt) return 0; } -static struct rtc_class_ops v3020_rtc_ops = { +static const struct rtc_class_ops v3020_rtc_ops = { .read_time = v3020_read_time, .set_time = v3020_set_time, }; diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index 596764fd29f5..58e5ed0aa127 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -296,7 +296,7 @@ static irqreturn_t rtclong1_interrupt(int irq, void *dev_id, struct pt_regs *reg return IRQ_HANDLED; } -static struct rtc_class_ops vr41xx_rtc_ops = { +static const struct rtc_class_ops vr41xx_rtc_ops = { .release = vr41xx_rtc_release, .ioctl = vr41xx_rtc_ioctl, .read_time = vr41xx_rtc_read_time, diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index 788b6d1f8f2f..522c69753bbf 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -460,7 +460,7 @@ static int x1205_rtc_proc(struct device *dev, struct seq_file *seq) return 0; } -static struct rtc_class_ops x1205_rtc_ops = { +static const struct rtc_class_ops x1205_rtc_ops = { .proc = x1205_rtc_proc, .read_time = x1205_rtc_read_time, .set_time = x1205_rtc_set_time, diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 5371e4e74595..b89f09357054 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -141,7 +141,7 @@ struct rtc_device int id; char name[RTC_DEVICE_NAME_SIZE]; - struct rtc_class_ops *ops; + const struct rtc_class_ops *ops; struct mutex ops_lock; struct class_device *rtc_dev; @@ -172,7 +172,7 @@ struct rtc_device extern struct rtc_device *rtc_device_register(const char *name, struct device *dev, - struct rtc_class_ops *ops, + const struct rtc_class_ops *ops, struct module *owner); extern void rtc_device_unregister(struct rtc_device *rdev); extern int rtc_interface_register(struct class_interface *intf); -- cgit v1.2.3-71-gd317 From c902e0a0102f1095eec4b3511c13c84ca2bc4577 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 30 Sep 2006 23:28:21 -0700 Subject: [PATCH] Pass sparse the lock expression given to lock annotations The lock annotation macros __acquires, __releases, __acquire, and __release all currently throw away the lock expression passed as an argument. Now that sparse can parse __context__ and __attribute__((context)) with a context expression, pass the lock expression down to sparse as the context expression. This requires a version of sparse from GIT commit 37475a6c1c3e66219e68d912d5eb833f4098fd72 or later. Signed-off-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0780de440220..538423d4a865 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -10,10 +10,10 @@ # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) # define __iomem __attribute__((noderef, address_space(2))) -# define __acquires(x) __attribute__((context(0,1))) -# define __releases(x) __attribute__((context(1,0))) -# define __acquire(x) __context__(1) -# define __release(x) __context__(-1) +# define __acquires(x) __attribute__((context(x,0,1))) +# define __releases(x) __attribute__((context(x,1,0))) +# define __acquire(x) __context__(x,1) +# define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) extern void __chk_user_ptr(void __user *); extern void __chk_io_ptr(void __iomem *); -- cgit v1.2.3-71-gd317 From 4c7ee8de956fc250fe31e2fa91f6da980fabe317 Mon Sep 17 00:00:00 2001 From: john stultz Date: Sat, 30 Sep 2006 23:28:22 -0700 Subject: [PATCH] NTP: Move all the NTP related code to ntp.c Move all the NTP related code to ntp.c [akpm@osdl.org: cleanups, build fix] Signed-off-by: John Stultz Cc: Ingo Molnar Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 10 +- kernel/time.c | 173 ---------------------- kernel/time/Makefile | 2 +- kernel/time/ntp.c | 389 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 211 +-------------------------- 5 files changed, 399 insertions(+), 386 deletions(-) create mode 100644 kernel/time/ntp.c (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index d543d3871e38..2a21485bf183 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -294,11 +294,15 @@ extern void register_time_interpolator(struct time_interpolator *); extern void unregister_time_interpolator(struct time_interpolator *); extern void time_interpolator_reset(void); extern unsigned long time_interpolator_get_offset(void); +extern void time_interpolator_update(long delta_nsec); #else /* !CONFIG_TIME_INTERPOLATION */ -static inline void -time_interpolator_reset(void) +static inline void time_interpolator_reset(void) +{ +} + +static inline void time_interpolator_update(long delta_nsec) { } @@ -309,6 +313,8 @@ time_interpolator_reset(void) /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); +extern void second_overflow(void); +extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); #endif /* KERNEL */ diff --git a/kernel/time.c b/kernel/time.c index 5bd489747643..0e017bff4c19 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -202,179 +202,6 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv, return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } -/* we call this to notify the arch when the clock is being - * controlled. If no such arch routine, do nothing. - */ -void __attribute__ ((weak)) notify_arch_cmos_timer(void) -{ - return; -} - -/* adjtimex mainly allows reading (and writing, if superuser) of - * kernel time-keeping variables. used by xntpd. - */ -int do_adjtimex(struct timex *txc) -{ - long ltemp, mtemp, save_adjust; - int result; - - /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !capable(CAP_SYS_TIME)) - return -EPERM; - - /* Now we validate the data before disabling interrupts */ - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - /* singleshot must not be used with any other mode bits */ - if (txc->modes != ADJ_OFFSET_SINGLESHOT) - return -EINVAL; - - if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) - /* adjustment Offset limited to +- .512 seconds */ - if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) - return -EINVAL; - - /* if the quartz is off by more than 10% something is VERY wrong ! */ - if (txc->modes & ADJ_TICK) - if (txc->tick < 900000/USER_HZ || - txc->tick > 1100000/USER_HZ) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - result = time_state; /* mostly `TIME_OK' */ - - /* Save for later - semantics of adjtime is to return old value */ - save_adjust = time_next_adjust ? time_next_adjust : time_adjust; - -#if 0 /* STA_CLOCKERR is never set yet */ - time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ -#endif - /* If there are input parameters, then process them */ - if (txc->modes) - { - if (txc->modes & ADJ_STATUS) /* only set allowed bits */ - time_status = (txc->status & ~STA_RONLY) | - (time_status & STA_RONLY); - - if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ - if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { - result = -EINVAL; - goto leave; - } - time_freq = txc->freq; - } - - if (txc->modes & ADJ_MAXERROR) { - if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; - } - time_maxerror = txc->maxerror; - } - - if (txc->modes & ADJ_ESTERROR) { - if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; - } - time_esterror = txc->esterror; - } - - if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ - if (txc->constant < 0) { /* NTP v4 uses values > 6 */ - result = -EINVAL; - goto leave; - } - time_constant = txc->constant; - } - - if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ - if (txc->modes == ADJ_OFFSET_SINGLESHOT) { - /* adjtime() is independent from ntp_adjtime() */ - if ((time_next_adjust = txc->offset) == 0) - time_adjust = 0; - } - else if (time_status & STA_PLL) { - ltemp = txc->offset; - - /* - * Scale the phase adjustment and - * clamp to the operating range. - */ - if (ltemp > MAXPHASE) - time_offset = MAXPHASE << SHIFT_UPDATE; - else if (ltemp < -MAXPHASE) - time_offset = -(MAXPHASE << SHIFT_UPDATE); - else - time_offset = ltemp << SHIFT_UPDATE; - - /* - * Select whether the frequency is to be controlled - * and in which mode (PLL or FLL). Clamp to the operating - * range. Ugly multiply/divide should be replaced someday. - */ - - if (time_status & STA_FREQHOLD || time_reftime == 0) - time_reftime = xtime.tv_sec; - mtemp = xtime.tv_sec - time_reftime; - time_reftime = xtime.tv_sec; - if (time_status & STA_FLL) { - if (mtemp >= MINSEC) { - ltemp = (time_offset / mtemp) << (SHIFT_USEC - - SHIFT_UPDATE); - time_freq += shift_right(ltemp, SHIFT_KH); - } else /* calibration interval too short (p. 12) */ - result = TIME_ERROR; - } else { /* PLL mode */ - if (mtemp < MAXSEC) { - ltemp *= mtemp; - time_freq += shift_right(ltemp,(time_constant + - time_constant + - SHIFT_KF - SHIFT_USEC)); - } else /* calibration interval too long (p. 12) */ - result = TIME_ERROR; - } - time_freq = min(time_freq, time_tolerance); - time_freq = max(time_freq, -time_tolerance); - } /* STA_PLL */ - } /* txc->modes & ADJ_OFFSET */ - if (txc->modes & ADJ_TICK) { - tick_usec = txc->tick; - tick_nsec = TICK_USEC_TO_NSEC(tick_usec); - } - } /* txc->modes */ -leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) - result = TIME_ERROR; - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - txc->offset = save_adjust; - else { - txc->offset = shift_right(time_offset, SHIFT_UPDATE); - } - txc->freq = time_freq; - txc->maxerror = time_maxerror; - txc->esterror = time_esterror; - txc->status = time_status; - txc->constant = time_constant; - txc->precision = time_precision; - txc->tolerance = time_tolerance; - txc->tick = tick_usec; - - /* PPS is not implemented, so these are zero */ - txc->ppsfreq = 0; - txc->jitter = 0; - txc->shift = 0; - txc->stabil = 0; - txc->jitcnt = 0; - txc->calcnt = 0; - txc->errcnt = 0; - txc->stbcnt = 0; - write_sequnlock_irq(&xtime_lock); - do_gettimeofday(&txc->time); - notify_arch_cmos_timer(); - return(result); -} - asmlinkage long sys_adjtimex(struct timex __user *txc_p) { struct timex txc; /* Local copy of parameter */ diff --git a/kernel/time/Makefile b/kernel/time/Makefile index e1dfd8e86cce..61a3907d16fb 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1 +1 @@ -obj-y += clocksource.o jiffies.o +obj-y += ntp.o clocksource.o jiffies.o diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c new file mode 100644 index 000000000000..8ccce15b4b23 --- /dev/null +++ b/kernel/time/ntp.c @@ -0,0 +1,389 @@ +/* + * linux/kernel/time/ntp.c + * + * NTP state machine interfaces and logic. + * + * This code was mainly moved from kernel/timer.c and kernel/time.c + * Please see those files for relevant copyright info and historical + * changelogs. + */ + +#include +#include +#include + +#include +#include + +/* Don't completely fail for HZ > 500. */ +int tickadj = 500/HZ ? : 1; /* microsecs */ + +/* + * phase-lock loop variables + */ +/* TIME_ERROR prevents overwriting the CMOS clock */ +int time_state = TIME_OK; /* clock synchronization status */ +int time_status = STA_UNSYNC; /* clock status bits */ +long time_offset; /* time adjustment (us) */ +long time_constant = 2; /* pll time constant */ +long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ +long time_precision = 1; /* clock precision (us) */ +long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ +long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ +long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; + /* frequency offset (scaled ppm)*/ +static long time_adj; /* tick adjust (scaled 1 / HZ) */ +long time_reftime; /* time at last adjustment (s) */ +long time_adjust; +long time_next_adjust; + +/* + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + */ +void second_overflow(void) +{ + long ltemp; + + /* Bump the maxerror field */ + time_maxerror += time_tolerance >> SHIFT_USEC; + if (time_maxerror > NTP_PHASE_LIMIT) { + time_maxerror = NTP_PHASE_LIMIT; + time_status |= STA_UNSYNC; + } + + /* + * Leap second processing. If in leap-insert state at the end of the + * day, the system clock is set back one second; if in leap-delete + * state, the system clock is set ahead one second. The microtime() + * routine or external clock driver will insure that reported time is + * always monotonic. The ugly divides should be replaced. + */ + switch (time_state) { + case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; + break; + case TIME_INS: + if (xtime.tv_sec % 86400 == 0) { + xtime.tv_sec--; + wall_to_monotonic.tv_sec++; + /* + * The timer interpolator will make time change + * gradually instead of an immediate jump by one second + */ + time_interpolator_update(-NSEC_PER_SEC); + time_state = TIME_OOP; + clock_was_set(); + printk(KERN_NOTICE "Clock: inserting leap second " + "23:59:60 UTC\n"); + } + break; + case TIME_DEL: + if ((xtime.tv_sec + 1) % 86400 == 0) { + xtime.tv_sec++; + wall_to_monotonic.tv_sec--; + /* + * Use of time interpolator for a gradual change of + * time + */ + time_interpolator_update(NSEC_PER_SEC); + time_state = TIME_WAIT; + clock_was_set(); + printk(KERN_NOTICE "Clock: deleting leap second " + "23:59:59 UTC\n"); + } + break; + case TIME_OOP: + time_state = TIME_WAIT; + break; + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + } + + /* + * Compute the phase adjustment for the next second. In PLL mode, the + * offset is reduced by a fixed factor times the time constant. In FLL + * mode the offset is used directly. In either mode, the maximum phase + * adjustment for each second is clamped so as to spread the adjustment + * over not more than the number of seconds between updates. + */ + ltemp = time_offset; + if (!(time_status & STA_FLL)) + ltemp = shift_right(ltemp, SHIFT_KG + time_constant); + ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE); + ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE); + time_offset -= ltemp; + time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + + /* + * Compute the frequency estimate and additional phase adjustment due + * to frequency error for the next second. + */ + ltemp = time_freq; + time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE)); + +#if HZ == 100 + /* + * Compensate for (HZ==100) != (1 << SHIFT_HZ). Add 25% and 3.125% to + * get 128.125; => only 0.125% error (p. 14) + */ + time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5); +#endif +#if HZ == 250 + /* + * Compensate for (HZ==250) != (1 << SHIFT_HZ). Add 1.5625% and + * 0.78125% to get 255.85938; => only 0.05% error (p. 14) + */ + time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); +#endif +#if HZ == 1000 + /* + * Compensate for (HZ==1000) != (1 << SHIFT_HZ). Add 1.5625% and + * 0.78125% to get 1023.4375; => only 0.05% error (p. 14) + */ + time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); +#endif +} + +/* + * Returns how many microseconds we need to add to xtime this tick + * in doing an adjustment requested with adjtime. + */ +static long adjtime_adjustment(void) +{ + long time_adjust_step; + + time_adjust_step = time_adjust; + if (time_adjust_step) { + /* + * We are doing an adjtime thing. Prepare time_adjust_step to + * be within bounds. Note that a positive time_adjust means we + * want the clock to run faster. + * + * Limit the amount of the step to be in the range + * -tickadj .. +tickadj + */ + time_adjust_step = min(time_adjust_step, (long)tickadj); + time_adjust_step = max(time_adjust_step, (long)-tickadj); + } + return time_adjust_step; +} + +/* in the NTP reference this is called "hardclock()" */ +void update_ntp_one_tick(void) +{ + long time_adjust_step; + + time_adjust_step = adjtime_adjustment(); + if (time_adjust_step) + /* Reduce by this step the amount of time left */ + time_adjust -= time_adjust_step; + + /* Changes by adjtime() do not take effect till next tick. */ + if (time_next_adjust != 0) { + time_adjust = time_next_adjust; + time_next_adjust = 0; + } +} + +/* + * Return how long ticks are at the moment, that is, how much time + * update_wall_time_one_tick will add to xtime next time we call it + * (assuming no calls to do_adjtimex in the meantime). + * The return value is in fixed-point nanoseconds shifted by the + * specified number of bits to the right of the binary point. + * This function has no side-effects. + */ +u64 current_tick_length(void) +{ + long delta_nsec; + u64 ret; + + /* calculate the finest interval NTP will allow. + * ie: nanosecond value shifted by (SHIFT_SCALE - 10) + */ + delta_nsec = tick_nsec + adjtime_adjustment() * 1000; + ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; + ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); + + return ret; +} + + +void __attribute__ ((weak)) notify_arch_cmos_timer(void) +{ + return; +} + +/* adjtimex mainly allows reading (and writing, if superuser) of + * kernel time-keeping variables. used by xntpd. + */ +int do_adjtimex(struct timex *txc) +{ + long ltemp, mtemp, save_adjust; + int result; + + /* In order to modify anything, you gotta be super-user! */ + if (txc->modes && !capable(CAP_SYS_TIME)) + return -EPERM; + + /* Now we validate the data before disabling interrupts */ + + if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) + /* singleshot must not be used with any other mode bits */ + if (txc->modes != ADJ_OFFSET_SINGLESHOT) + return -EINVAL; + + if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) + /* adjustment Offset limited to +- .512 seconds */ + if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) + return -EINVAL; + + /* if the quartz is off by more than 10% something is VERY wrong ! */ + if (txc->modes & ADJ_TICK) + if (txc->tick < 900000/USER_HZ || + txc->tick > 1100000/USER_HZ) + return -EINVAL; + + write_seqlock_irq(&xtime_lock); + result = time_state; /* mostly `TIME_OK' */ + + /* Save for later - semantics of adjtime is to return old value */ + save_adjust = time_next_adjust ? time_next_adjust : time_adjust; + +#if 0 /* STA_CLOCKERR is never set yet */ + time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ +#endif + /* If there are input parameters, then process them */ + if (txc->modes) + { + if (txc->modes & ADJ_STATUS) /* only set allowed bits */ + time_status = (txc->status & ~STA_RONLY) | + (time_status & STA_RONLY); + + if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ + if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { + result = -EINVAL; + goto leave; + } + time_freq = txc->freq; + } + + if (txc->modes & ADJ_MAXERROR) { + if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { + result = -EINVAL; + goto leave; + } + time_maxerror = txc->maxerror; + } + + if (txc->modes & ADJ_ESTERROR) { + if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { + result = -EINVAL; + goto leave; + } + time_esterror = txc->esterror; + } + + if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ + if (txc->constant < 0) { /* NTP v4 uses values > 6 */ + result = -EINVAL; + goto leave; + } + time_constant = txc->constant; + } + + if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ + if (txc->modes == ADJ_OFFSET_SINGLESHOT) { + /* adjtime() is independent from ntp_adjtime() */ + if ((time_next_adjust = txc->offset) == 0) + time_adjust = 0; + } + else if (time_status & STA_PLL) { + ltemp = txc->offset; + + /* + * Scale the phase adjustment and + * clamp to the operating range. + */ + if (ltemp > MAXPHASE) + time_offset = MAXPHASE << SHIFT_UPDATE; + else if (ltemp < -MAXPHASE) + time_offset = -(MAXPHASE << SHIFT_UPDATE); + else + time_offset = ltemp << SHIFT_UPDATE; + + /* + * Select whether the frequency is to be controlled + * and in which mode (PLL or FLL). Clamp to the operating + * range. Ugly multiply/divide should be replaced someday. + */ + + if (time_status & STA_FREQHOLD || time_reftime == 0) + time_reftime = xtime.tv_sec; + mtemp = xtime.tv_sec - time_reftime; + time_reftime = xtime.tv_sec; + if (time_status & STA_FLL) { + if (mtemp >= MINSEC) { + ltemp = (time_offset / mtemp) << (SHIFT_USEC - + SHIFT_UPDATE); + time_freq += shift_right(ltemp, SHIFT_KH); + } else /* calibration interval too short (p. 12) */ + result = TIME_ERROR; + } else { /* PLL mode */ + if (mtemp < MAXSEC) { + ltemp *= mtemp; + time_freq += shift_right(ltemp,(time_constant + + time_constant + + SHIFT_KF - SHIFT_USEC)); + } else /* calibration interval too long (p. 12) */ + result = TIME_ERROR; + } + time_freq = min(time_freq, time_tolerance); + time_freq = max(time_freq, -time_tolerance); + } /* STA_PLL */ + } /* txc->modes & ADJ_OFFSET */ + if (txc->modes & ADJ_TICK) { + tick_usec = txc->tick; + tick_nsec = TICK_USEC_TO_NSEC(tick_usec); + } + } /* txc->modes */ +leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) + result = TIME_ERROR; + + if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) + txc->offset = save_adjust; + else { + txc->offset = shift_right(time_offset, SHIFT_UPDATE); + } + txc->freq = time_freq; + txc->maxerror = time_maxerror; + txc->esterror = time_esterror; + txc->status = time_status; + txc->constant = time_constant; + txc->precision = time_precision; + txc->tolerance = time_tolerance; + txc->tick = tick_usec; + + /* PPS is not implemented, so these are zero */ + txc->ppsfreq = 0; + txc->jitter = 0; + txc->shift = 0; + txc->stabil = 0; + txc->jitcnt = 0; + txc->calcnt = 0; + txc->errcnt = 0; + txc->stbcnt = 0; + write_sequnlock_irq(&xtime_lock); + do_gettimeofday(&txc->time); + notify_arch_cmos_timer(); + return(result); +} diff --git a/kernel/timer.c b/kernel/timer.c index 4f55622b0d38..5fccc7cbf3b4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -41,12 +41,6 @@ #include #include -#ifdef CONFIG_TIME_INTERPOLATION -static void time_interpolator_update(long delta_nsec); -#else -#define time_interpolator_update(x) -#endif - u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); @@ -587,209 +581,6 @@ struct timespec wall_to_monotonic __attribute__ ((aligned (16))); EXPORT_SYMBOL(xtime); -/* Don't completely fail for HZ > 500. */ -int tickadj = 500/HZ ? : 1; /* microsecs */ - - -/* - * phase-lock loop variables - */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (us) */ -long time_constant = 2; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ -long time_precision = 1; /* clock precision (us) */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; - /* frequency offset (scaled ppm)*/ -static long time_adj; /* tick adjust (scaled 1 / HZ) */ -long time_reftime; /* time at last adjustment (s) */ -long time_adjust; -long time_next_adjust; - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - * - */ -static void second_overflow(void) -{ - long ltemp; - - /* Bump the maxerror field */ - time_maxerror += time_tolerance >> SHIFT_USEC; - if (time_maxerror > NTP_PHASE_LIMIT) { - time_maxerror = NTP_PHASE_LIMIT; - time_status |= STA_UNSYNC; - } - - /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. The microtime() - * routine or external clock driver will insure that reported time is - * always monotonic. The ugly divides should be replaced. - */ - switch (time_state) { - case TIME_OK: - if (time_status & STA_INS) - time_state = TIME_INS; - else if (time_status & STA_DEL) - time_state = TIME_DEL; - break; - case TIME_INS: - if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; - wall_to_monotonic.tv_sec++; - /* - * The timer interpolator will make time change - * gradually instead of an immediate jump by one second - */ - time_interpolator_update(-NSEC_PER_SEC); - time_state = TIME_OOP; - clock_was_set(); - printk(KERN_NOTICE "Clock: inserting leap second " - "23:59:60 UTC\n"); - } - break; - case TIME_DEL: - if ((xtime.tv_sec + 1) % 86400 == 0) { - xtime.tv_sec++; - wall_to_monotonic.tv_sec--; - /* - * Use of time interpolator for a gradual change of - * time - */ - time_interpolator_update(NSEC_PER_SEC); - time_state = TIME_WAIT; - clock_was_set(); - printk(KERN_NOTICE "Clock: deleting leap second " - "23:59:59 UTC\n"); - } - break; - case TIME_OOP: - time_state = TIME_WAIT; - break; - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - } - - /* - * Compute the phase adjustment for the next second. In PLL mode, the - * offset is reduced by a fixed factor times the time constant. In FLL - * mode the offset is used directly. In either mode, the maximum phase - * adjustment for each second is clamped so as to spread the adjustment - * over not more than the number of seconds between updates. - */ - ltemp = time_offset; - if (!(time_status & STA_FLL)) - ltemp = shift_right(ltemp, SHIFT_KG + time_constant); - ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE); - ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE); - time_offset -= ltemp; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - - /* - * Compute the frequency estimate and additional phase adjustment due - * to frequency error for the next second. - */ - ltemp = time_freq; - time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE)); - -#if HZ == 100 - /* - * Compensate for (HZ==100) != (1 << SHIFT_HZ). Add 25% and 3.125% to - * get 128.125; => only 0.125% error (p. 14) - */ - time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5); -#endif -#if HZ == 250 - /* - * Compensate for (HZ==250) != (1 << SHIFT_HZ). Add 1.5625% and - * 0.78125% to get 255.85938; => only 0.05% error (p. 14) - */ - time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); -#endif -#if HZ == 1000 - /* - * Compensate for (HZ==1000) != (1 << SHIFT_HZ). Add 1.5625% and - * 0.78125% to get 1023.4375; => only 0.05% error (p. 14) - */ - time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); -#endif -} - -/* - * Returns how many microseconds we need to add to xtime this tick - * in doing an adjustment requested with adjtime. - */ -static long adjtime_adjustment(void) -{ - long time_adjust_step; - - time_adjust_step = time_adjust; - if (time_adjust_step) { - /* - * We are doing an adjtime thing. Prepare time_adjust_step to - * be within bounds. Note that a positive time_adjust means we - * want the clock to run faster. - * - * Limit the amount of the step to be in the range - * -tickadj .. +tickadj - */ - time_adjust_step = min(time_adjust_step, (long)tickadj); - time_adjust_step = max(time_adjust_step, (long)-tickadj); - } - return time_adjust_step; -} - -/* in the NTP reference this is called "hardclock()" */ -static void update_ntp_one_tick(void) -{ - long time_adjust_step; - - time_adjust_step = adjtime_adjustment(); - if (time_adjust_step) - /* Reduce by this step the amount of time left */ - time_adjust -= time_adjust_step; - - /* Changes by adjtime() do not take effect till next tick. */ - if (time_next_adjust != 0) { - time_adjust = time_next_adjust; - time_next_adjust = 0; - } -} - -/* - * Return how long ticks are at the moment, that is, how much time - * update_wall_time_one_tick will add to xtime next time we call it - * (assuming no calls to do_adjtimex in the meantime). - * The return value is in fixed-point nanoseconds shifted by the - * specified number of bits to the right of the binary point. - * This function has no side-effects. - */ -u64 current_tick_length(void) -{ - long delta_nsec; - u64 ret; - - /* calculate the finest interval NTP will allow. - * ie: nanosecond value shifted by (SHIFT_SCALE - 10) - */ - delta_nsec = tick_nsec + adjtime_adjustment() * 1000; - ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; - ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); - - return ret; -} /* XXX - all of this timekeeping code should be later moved to time.c */ #include @@ -1775,7 +1566,7 @@ unsigned long time_interpolator_get_offset(void) #define INTERPOLATOR_ADJUST 65536 #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST -static void time_interpolator_update(long delta_nsec) +void time_interpolator_update(long delta_nsec) { u64 counter; unsigned long offset; -- cgit v1.2.3-71-gd317 From b0ee75561beadc4db4d9a899c8ef4a7db50aa0ab Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:22 -0700 Subject: [PATCH] ntp: add ntp_update_frequency This introduces ntp_update_frequency() and deinlines ntp_clear() (as it's not performance critical). ntp_update_frequency() calculates the base tick length using tick_usec and adds a base adjustment, in case the frequency doesn't divide evenly by HZ. Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 14 ++------------ kernel/time/ntp.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------ kernel/timer.c | 11 ++++------- 3 files changed, 50 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 2a21485bf183..b589c8218bb9 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -219,18 +219,8 @@ extern long time_reftime; /* time at last adjustment (s) */ extern long time_adjust; /* The amount of adjtime left */ extern long time_next_adjust; /* Value for time_adjust at next tick */ -/** - * ntp_clear - Clears the NTP state variables - * - * Must be called while holding a write on the xtime_lock - */ -static inline void ntp_clear(void) -{ - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; -} +extern void ntp_clear(void); +extern void ntp_update_frequency(void); /** * ntp_synced - Returns 1 if the NTP status is not UNSYNC diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8ccce15b4b23..77137bec2aea 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -15,6 +15,13 @@ #include #include +/* + * Timekeeping variables + */ +unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ +unsigned long tick_nsec; /* ACTHZ period (nsec) */ +static u64 tick_length, tick_length_base; + /* Don't completely fail for HZ > 500. */ int tickadj = 500/HZ ? : 1; /* microsecs */ @@ -37,6 +44,36 @@ long time_reftime; /* time at last adjustment (s) */ long time_adjust; long time_next_adjust; +/** + * ntp_clear - Clears the NTP state variables + * + * Must be called while holding a write on the xtime_lock + */ +void ntp_clear(void) +{ + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + + ntp_update_frequency(); + + tick_length = tick_length_base; +} + +#define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) +#define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / (s64)CLOCK_TICK_RATE) + +void ntp_update_frequency(void) +{ + tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; + tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; + + do_div(tick_length_base, HZ); + + tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; +} + /* * this routine handles the overflow of the microsecond field * @@ -151,6 +188,7 @@ void second_overflow(void) */ time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); #endif + tick_length = tick_length_base; } /* @@ -204,14 +242,13 @@ void update_ntp_one_tick(void) */ u64 current_tick_length(void) { - long delta_nsec; u64 ret; /* calculate the finest interval NTP will allow. * ie: nanosecond value shifted by (SHIFT_SCALE - 10) */ - delta_nsec = tick_nsec + adjtime_adjustment() * 1000; - ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; + ret = tick_length; + ret += (u64)(adjtime_adjustment() * 1000) << TICK_LENGTH_SHIFT; ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); return ret; @@ -351,10 +388,11 @@ int do_adjtimex(struct timex *txc) time_freq = max(time_freq, -time_tolerance); } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ - if (txc->modes & ADJ_TICK) { + if (txc->modes & ADJ_TICK) tick_usec = txc->tick; - tick_nsec = TICK_USEC_TO_NSEC(tick_usec); - } + + if (txc->modes & ADJ_TICK) + ntp_update_frequency(); } /* txc->modes */ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) result = TIME_ERROR; diff --git a/kernel/timer.c b/kernel/timer.c index 5fccc7cbf3b4..78d3fa10fcd6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -562,12 +562,6 @@ found: /******************************************************************/ -/* - * Timekeeping variables - */ -unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ -unsigned long tick_nsec = TICK_NSEC; /* ACTHZ period (nsec) */ - /* * The current time * wall_to_monotonic is what we need to add to xtime (or xtime corrected @@ -757,10 +751,13 @@ void __init timekeeping_init(void) unsigned long flags; write_seqlock_irqsave(&xtime_lock, flags); + + ntp_clear(); + clock = clocksource_get_next(); clocksource_calculate_interval(clock, tick_nsec); clock->cycle_last = clocksource_read(clock); - ntp_clear(); + write_sequnlock_irqrestore(&xtime_lock, flags); } -- cgit v1.2.3-71-gd317 From 3d3675cc3d04d7fd4bb11e8c1ea79e5ade4f5e44 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:25 -0700 Subject: [PATCH] ntp: prescale time_offset This converts time_offset into a scaled per tick value. This avoids now completely the crude compensation in second_overflow(). Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 2 +- kernel/time/ntp.c | 64 +++++++++++++-------------------------------------- 2 files changed, 17 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index b589c8218bb9..1cde6f6a2712 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -89,7 +89,7 @@ * FINENSEC is 1 ns in SHIFT_UPDATE units of the time_phase variable. */ #define SHIFT_SCALE 22 /* phase scale (shift) */ -#define SHIFT_UPDATE (SHIFT_KG + MAXTC) /* time offset scale (shift) */ +#define SHIFT_UPDATE (SHIFT_HZ + 1) /* time offset scale (shift) */ #define SHIFT_USEC 16 /* frequency offset scale (shift) */ #define FINENSEC (1L << (SHIFT_SCALE - 10)) /* ~1 ns in phase units */ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index ab21eb06e09b..238ce47ef09d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -31,7 +31,7 @@ int tickadj = 500/HZ ? : 1; /* microsecs */ /* TIME_ERROR prevents overwriting the CMOS clock */ int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (us) */ +long time_offset; /* time adjustment (ns) */ long time_constant = 2; /* pll time constant */ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ long time_precision = 1; /* clock precision (us) */ @@ -57,6 +57,7 @@ void ntp_clear(void) ntp_update_frequency(); tick_length = tick_length_base; + time_offset = 0; } #define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) @@ -83,7 +84,7 @@ void ntp_update_frequency(void) */ void second_overflow(void) { - long ltemp, time_adj; + long time_adj; /* Bump the maxerror field */ time_maxerror += time_tolerance >> SHIFT_USEC; @@ -151,42 +152,14 @@ void second_overflow(void) * adjustment for each second is clamped so as to spread the adjustment * over not more than the number of seconds between updates. */ - ltemp = time_offset; - if (!(time_status & STA_FLL)) - ltemp = shift_right(ltemp, SHIFT_KG + time_constant); - ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE); - ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE); - time_offset -= ltemp; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - - /* - * Compute the frequency estimate and additional phase adjustment due - * to frequency error for the next second. - */ - -#if HZ == 100 - /* - * Compensate for (HZ==100) != (1 << SHIFT_HZ). Add 25% and 3.125% to - * get 128.125; => only 0.125% error (p. 14) - */ - time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5); -#endif -#if HZ == 250 - /* - * Compensate for (HZ==250) != (1 << SHIFT_HZ). Add 1.5625% and - * 0.78125% to get 255.85938; => only 0.05% error (p. 14) - */ - time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); -#endif -#if HZ == 1000 - /* - * Compensate for (HZ==1000) != (1 << SHIFT_HZ). Add 1.5625% and - * 0.78125% to get 1023.4375; => only 0.05% error (p. 14) - */ - time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7); -#endif tick_length = tick_length_base; - tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); + time_adj = time_offset; + if (!(time_status & STA_FLL)) + time_adj = shift_right(time_adj, SHIFT_KG + time_constant); + time_adj = min(time_adj, -((MAXPHASE / HZ) << SHIFT_UPDATE) / MINSEC); + time_adj = max(time_adj, ((MAXPHASE / HZ) << SHIFT_UPDATE) / MINSEC); + time_offset -= time_adj; + tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); } /* @@ -347,12 +320,8 @@ int do_adjtimex(struct timex *txc) * Scale the phase adjustment and * clamp to the operating range. */ - if (ltemp > MAXPHASE) - time_offset = MAXPHASE << SHIFT_UPDATE; - else if (ltemp < -MAXPHASE) - time_offset = -(MAXPHASE << SHIFT_UPDATE); - else - time_offset = ltemp << SHIFT_UPDATE; + time_offset = min(ltemp, MAXPHASE); + time_offset = max(time_offset, -MAXPHASE); /* * Select whether the frequency is to be controlled @@ -366,8 +335,7 @@ int do_adjtimex(struct timex *txc) time_reftime = xtime.tv_sec; if (time_status & STA_FLL) { if (mtemp >= MINSEC) { - ltemp = (time_offset / mtemp) << (SHIFT_USEC - - SHIFT_UPDATE); + ltemp = ((time_offset << 12) / mtemp) << (SHIFT_USEC - 12); time_freq += shift_right(ltemp, SHIFT_KH); } else /* calibration interval too short (p. 12) */ result = TIME_ERROR; @@ -382,6 +350,7 @@ int do_adjtimex(struct timex *txc) } time_freq = min(time_freq, time_tolerance); time_freq = max(time_freq, -time_tolerance); + time_offset = (time_offset * NSEC_PER_USEC / HZ) << SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -395,9 +364,8 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) txc->offset = save_adjust; - else { - txc->offset = shift_right(time_offset, SHIFT_UPDATE); - } + else + txc->offset = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; txc->freq = time_freq; txc->maxerror = time_maxerror; txc->esterror = time_esterror; -- cgit v1.2.3-71-gd317 From 8f807f8d2137ba728d22820103131038639b68a9 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:25 -0700 Subject: [PATCH] ntp: add time_adjust to tick length This folds update_ntp_one_tick() into second_overflow() and adds time_adjust to the tick length, this makes time_next_adjust unnecessary. This slightly changes the adjtime() behaviour, instead of applying it to the next tick, it's applied to the next second. Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 1 - kernel/time/ntp.c | 71 +++++++++++++-------------------------------------- kernel/timer.c | 2 -- 3 files changed, 18 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 1cde6f6a2712..b5f297e17668 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -217,7 +217,6 @@ extern long time_freq; /* frequency offset (scaled ppm) */ extern long time_reftime; /* time at last adjustment (s) */ extern long time_adjust; /* The amount of adjtime left */ -extern long time_next_adjust; /* Value for time_adjust at next tick */ extern void ntp_clear(void); extern void ntp_update_frequency(void); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 238ce47ef09d..65223b7ed810 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -22,8 +22,9 @@ unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ unsigned long tick_nsec; /* ACTHZ period (nsec) */ static u64 tick_length, tick_length_base; -/* Don't completely fail for HZ > 500. */ -int tickadj = 500/HZ ? : 1; /* microsecs */ +#define MAX_TICKADJ 500 /* microsecs */ +#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ + TICK_LENGTH_SHIFT) / HZ) /* * phase-lock loop variables @@ -40,7 +41,6 @@ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ long time_freq; /* frequency offset (scaled ppm)*/ long time_reftime; /* time at last adjustment (s) */ long time_adjust; -long time_next_adjust; /** * ntp_clear - Clears the NTP state variables @@ -160,46 +160,19 @@ void second_overflow(void) time_adj = max(time_adj, ((MAXPHASE / HZ) << SHIFT_UPDATE) / MINSEC); time_offset -= time_adj; tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); -} - -/* - * Returns how many microseconds we need to add to xtime this tick - * in doing an adjustment requested with adjtime. - */ -static long adjtime_adjustment(void) -{ - long time_adjust_step; - - time_adjust_step = time_adjust; - if (time_adjust_step) { - /* - * We are doing an adjtime thing. Prepare time_adjust_step to - * be within bounds. Note that a positive time_adjust means we - * want the clock to run faster. - * - * Limit the amount of the step to be in the range - * -tickadj .. +tickadj - */ - time_adjust_step = min(time_adjust_step, (long)tickadj); - time_adjust_step = max(time_adjust_step, (long)-tickadj); - } - return time_adjust_step; -} -/* in the NTP reference this is called "hardclock()" */ -void update_ntp_one_tick(void) -{ - long time_adjust_step; - - time_adjust_step = adjtime_adjustment(); - if (time_adjust_step) - /* Reduce by this step the amount of time left */ - time_adjust -= time_adjust_step; - - /* Changes by adjtime() do not take effect till next tick. */ - if (time_next_adjust != 0) { - time_adjust = time_next_adjust; - time_next_adjust = 0; + if (unlikely(time_adjust)) { + if (time_adjust > MAX_TICKADJ) { + time_adjust -= MAX_TICKADJ; + tick_length += MAX_TICKADJ_SCALED; + } else if (time_adjust < -MAX_TICKADJ) { + time_adjust += MAX_TICKADJ; + tick_length -= MAX_TICKADJ_SCALED; + } else { + time_adjust = 0; + tick_length += (s64)(time_adjust * NSEC_PER_USEC / + HZ) << TICK_LENGTH_SHIFT; + } } } @@ -213,14 +186,7 @@ void update_ntp_one_tick(void) */ u64 current_tick_length(void) { - u64 ret; - - /* calculate the finest interval NTP will allow. - */ - ret = tick_length; - ret += (u64)(adjtime_adjustment() * 1000) << TICK_LENGTH_SHIFT; - - return ret; + return tick_length; } @@ -263,7 +229,7 @@ int do_adjtimex(struct timex *txc) result = time_state; /* mostly `TIME_OK' */ /* Save for later - semantics of adjtime is to return old value */ - save_adjust = time_next_adjust ? time_next_adjust : time_adjust; + save_adjust = time_adjust; #if 0 /* STA_CLOCKERR is never set yet */ time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ @@ -310,8 +276,7 @@ int do_adjtimex(struct timex *txc) if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ if (txc->modes == ADJ_OFFSET_SINGLESHOT) { /* adjtime() is independent from ntp_adjtime() */ - if ((time_next_adjust = txc->offset) == 0) - time_adjust = 0; + time_adjust = txc->offset; } else if (time_status & STA_PLL) { ltemp = txc->offset; diff --git a/kernel/timer.c b/kernel/timer.c index 78d3fa10fcd6..654dd5df2417 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -937,8 +937,6 @@ static void update_wall_time(void) /* interpolator bits */ time_interpolator_update(clock->xtime_interval >> clock->shift); - /* increment the NTP state machine */ - update_ntp_one_tick(); /* accumulate error between NTP and clock interval */ clock->error += current_tick_length(); -- cgit v1.2.3-71-gd317 From 97eebe138caaf78354b1fad233e63bafdcc4fd54 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:26 -0700 Subject: [PATCH] ntp: remove time_tolerance time_tolerance isn't changed at all in the kernel, so simply remove it, this simplifies the next patch, as it avoids a number of conversions. Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 1 - kernel/time/ntp.c | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index b5f297e17668..7715b4c0caf9 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -208,7 +208,6 @@ extern int time_state; /* clock status */ extern int time_status; /* clock synchronization status bits */ extern long time_offset; /* time adjustment (us) */ extern long time_constant; /* pll time constant */ -extern long time_tolerance; /* frequency tolerance (ppm) */ extern long time_precision; /* clock precision (us) */ extern long time_maxerror; /* maximum error */ extern long time_esterror; /* estimated error */ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 65223b7ed810..af7563f5d4e2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -34,7 +34,6 @@ int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ long time_offset; /* time adjustment (ns) */ long time_constant = 2; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ long time_precision = 1; /* clock precision (us) */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ @@ -87,7 +86,7 @@ void second_overflow(void) long time_adj; /* Bump the maxerror field */ - time_maxerror += time_tolerance >> SHIFT_USEC; + time_maxerror += MAXFREQ >> SHIFT_USEC; if (time_maxerror > NTP_PHASE_LIMIT) { time_maxerror = NTP_PHASE_LIMIT; time_status |= STA_UNSYNC; @@ -313,8 +312,8 @@ int do_adjtimex(struct timex *txc) } else /* calibration interval too long (p. 12) */ result = TIME_ERROR; } - time_freq = min(time_freq, time_tolerance); - time_freq = max(time_freq, -time_tolerance); + time_freq = min(time_freq, MAXFREQ); + time_freq = max(time_freq, -MAXFREQ); time_offset = (time_offset * NSEC_PER_USEC / HZ) << SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ @@ -337,7 +336,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) txc->status = time_status; txc->constant = time_constant; txc->precision = time_precision; - txc->tolerance = time_tolerance; + txc->tolerance = MAXFREQ; txc->tick = tick_usec; /* PPS is not implemented, so these are zero */ -- cgit v1.2.3-71-gd317 From 04b617e71e363e640e88be1e43f53fa6a3afef9f Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:27 -0700 Subject: [PATCH] ntp: convert time_freq to nsec value This converts time_freq to a scaled nsec value and adds around 6bit of extra resolution. This pushes the time_freq to its 32bit limits so the calculatons have to be done with 64bit. Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 2 ++ kernel/time/ntp.c | 36 ++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 7715b4c0caf9..671609ee1a3d 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -91,10 +91,12 @@ #define SHIFT_SCALE 22 /* phase scale (shift) */ #define SHIFT_UPDATE (SHIFT_HZ + 1) /* time offset scale (shift) */ #define SHIFT_USEC 16 /* frequency offset scale (shift) */ +#define SHIFT_NSEC 12 /* kernel frequency offset scale */ #define FINENSEC (1L << (SHIFT_SCALE - 10)) /* ~1 ns in phase units */ #define MAXPHASE 512000L /* max phase error (us) */ #define MAXFREQ (512L << SHIFT_USEC) /* max frequency error (ppm) */ +#define MAXFREQ_NSEC (512000L << SHIFT_NSEC) /* max frequency error (ppb) */ #define MINSEC 16L /* min interval between updates (s) */ #define MAXSEC 1200L /* max interval between updates (s) */ #define NTP_PHASE_LIMIT (MAXPHASE << 5) /* beyond max. dispersion */ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index af7563f5d4e2..9137b54613e0 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -66,7 +66,7 @@ void ntp_update_frequency(void) { tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; - tick_length_base += ((s64)time_freq * NSEC_PER_USEC) << (TICK_LENGTH_SHIFT - SHIFT_USEC); + tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); do_div(tick_length_base, HZ); @@ -200,6 +200,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void) int do_adjtimex(struct timex *txc) { long ltemp, mtemp, save_adjust; + s64 freq_adj; int result; /* In order to modify anything, you gotta be super-user! */ @@ -245,7 +246,7 @@ int do_adjtimex(struct timex *txc) result = -EINVAL; goto leave; } - time_freq = txc->freq; + time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); } if (txc->modes & ADJ_MAXERROR) { @@ -278,14 +279,14 @@ int do_adjtimex(struct timex *txc) time_adjust = txc->offset; } else if (time_status & STA_PLL) { - ltemp = txc->offset; + ltemp = txc->offset * NSEC_PER_USEC; /* * Scale the phase adjustment and * clamp to the operating range. */ - time_offset = min(ltemp, MAXPHASE); - time_offset = max(time_offset, -MAXPHASE); + time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); + time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); /* * Select whether the frequency is to be controlled @@ -297,24 +298,31 @@ int do_adjtimex(struct timex *txc) time_reftime = xtime.tv_sec; mtemp = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; + freq_adj = 0; if (time_status & STA_FLL) { if (mtemp >= MINSEC) { - ltemp = ((time_offset << 12) / mtemp) << (SHIFT_USEC - 12); - time_freq += shift_right(ltemp, SHIFT_KH); + freq_adj = (s64)time_offset << (SHIFT_NSEC - SHIFT_KH); + if (time_offset < 0) { + freq_adj = -freq_adj; + do_div(freq_adj, mtemp); + freq_adj = -freq_adj; + } else + do_div(freq_adj, mtemp); } else /* calibration interval too short (p. 12) */ result = TIME_ERROR; } else { /* PLL mode */ if (mtemp < MAXSEC) { - ltemp *= mtemp; - time_freq += shift_right(ltemp,(time_constant + + freq_adj = (s64)ltemp * mtemp; + freq_adj = shift_right(freq_adj,(time_constant + time_constant + - SHIFT_KF - SHIFT_USEC)); + SHIFT_KF - SHIFT_NSEC)); } else /* calibration interval too long (p. 12) */ result = TIME_ERROR; } - time_freq = min(time_freq, MAXFREQ); - time_freq = max(time_freq, -MAXFREQ); - time_offset = (time_offset * NSEC_PER_USEC / HZ) << SHIFT_UPDATE; + freq_adj += time_freq; + freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); + time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); + time_offset = (time_offset / HZ) << SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -330,7 +338,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) txc->offset = save_adjust; else txc->offset = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; - txc->freq = time_freq; + txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; -- cgit v1.2.3-71-gd317 From f19923937321244e7dc334767eb4b67e0e3d5c74 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:28 -0700 Subject: [PATCH] ntp: convert to the NTP4 reference model This converts the kernel ntp model into a model which matches the nanokernel reference implementations. The previous patches already increased the resolution and precision of the computations, so that this conversion becomes quite simple. explains: The original NTP kernel interface was defined in units of microseconds. That's what Linux implements. As computers have gotten faster and can now split microseconds easily, a new kernel interface using nanosecond units was defined ("the nanokernel", confusing as that name is to OS hackers), and there's an STA_NANO bit in the adjtimex() status field to tell the application which units it's using. The current ntpd supports both, but Linux loses some possible timing resolution because of quantization effects, and the ntpd hackers would really like to be able to drop the backwards compatibility code. Ulrich Windl has been maintaining a patch set to do the conversion for years, but it's hard to keep in sync. Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 11 +++++------ kernel/time/ntp.c | 51 +++++++++++++++++++-------------------------------- 2 files changed, 24 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 671609ee1a3d..ac808f13fa0e 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -69,10 +69,9 @@ * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, * respectively. */ -#define SHIFT_KG 6 /* phase factor (shift) */ -#define SHIFT_KF 16 /* PLL frequency factor (shift) */ -#define SHIFT_KH 2 /* FLL frequency factor (shift) */ -#define MAXTC 6 /* maximum time constant (shift) */ +#define SHIFT_PLL 4 /* PLL frequency factor (shift) */ +#define SHIFT_FLL 2 /* FLL frequency factor (shift) */ +#define MAXTC 10 /* maximum time constant (shift) */ /* * The SHIFT_SCALE define establishes the decimal point of the time_phase @@ -97,8 +96,8 @@ #define MAXPHASE 512000L /* max phase error (us) */ #define MAXFREQ (512L << SHIFT_USEC) /* max frequency error (ppm) */ #define MAXFREQ_NSEC (512000L << SHIFT_NSEC) /* max frequency error (ppb) */ -#define MINSEC 16L /* min interval between updates (s) */ -#define MAXSEC 1200L /* max interval between updates (s) */ +#define MINSEC 256 /* min interval between updates (s) */ +#define MAXSEC 2048 /* max interval between updates (s) */ #define NTP_PHASE_LIMIT (MAXPHASE << 5) /* beyond max. dispersion */ /* diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 9137b54613e0..1ab5e9d7fa50 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -145,18 +145,11 @@ void second_overflow(void) } /* - * Compute the phase adjustment for the next second. In PLL mode, the - * offset is reduced by a fixed factor times the time constant. In FLL - * mode the offset is used directly. In either mode, the maximum phase - * adjustment for each second is clamped so as to spread the adjustment - * over not more than the number of seconds between updates. + * Compute the phase adjustment for the next second. The offset is + * reduced by a fixed factor times the time constant. */ tick_length = tick_length_base; - time_adj = time_offset; - if (!(time_status & STA_FLL)) - time_adj = shift_right(time_adj, SHIFT_KG + time_constant); - time_adj = min(time_adj, -((MAXPHASE / HZ) << SHIFT_UPDATE) / MINSEC); - time_adj = max(time_adj, ((MAXPHASE / HZ) << SHIFT_UPDATE) / MINSEC); + time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); time_offset -= time_adj; tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); @@ -200,7 +193,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void) int do_adjtimex(struct timex *txc) { long ltemp, mtemp, save_adjust; - s64 freq_adj; + s64 freq_adj, temp64; int result; /* In order to modify anything, you gotta be super-user! */ @@ -270,7 +263,7 @@ int do_adjtimex(struct timex *txc) result = -EINVAL; goto leave; } - time_constant = txc->constant; + time_constant = min(txc->constant + 4, (long)MAXTC); } if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ @@ -298,26 +291,20 @@ int do_adjtimex(struct timex *txc) time_reftime = xtime.tv_sec; mtemp = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; - freq_adj = 0; - if (time_status & STA_FLL) { - if (mtemp >= MINSEC) { - freq_adj = (s64)time_offset << (SHIFT_NSEC - SHIFT_KH); - if (time_offset < 0) { - freq_adj = -freq_adj; - do_div(freq_adj, mtemp); - freq_adj = -freq_adj; - } else - do_div(freq_adj, mtemp); - } else /* calibration interval too short (p. 12) */ - result = TIME_ERROR; - } else { /* PLL mode */ - if (mtemp < MAXSEC) { - freq_adj = (s64)ltemp * mtemp; - freq_adj = shift_right(freq_adj,(time_constant + - time_constant + - SHIFT_KF - SHIFT_NSEC)); - } else /* calibration interval too long (p. 12) */ - result = TIME_ERROR; + + freq_adj = (s64)time_offset * mtemp; + freq_adj = shift_right(freq_adj, time_constant * 2 + + (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); + if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { + temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); + if (time_offset < 0) { + temp64 = -temp64; + do_div(temp64, mtemp); + freq_adj -= temp64; + } else { + do_div(temp64, mtemp); + freq_adj += temp64; + } } freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); -- cgit v1.2.3-71-gd317 From 0883d899ef862c1b0f8b2c2d38098470c193a3dd Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 30 Sep 2006 23:28:29 -0700 Subject: [PATCH] ntp: cleanup defines and comments Remove a few unused defines and remove obsolete information from comments. Signed-off-by: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-frv/timex.h | 5 ----- include/asm-m32r/timex.h | 3 --- include/asm-sh64/timex.h | 3 --- include/asm-xtensa/timex.h | 3 --- include/linux/timex.h | 13 +++---------- 5 files changed, 3 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/asm-frv/timex.h b/include/asm-frv/timex.h index 2aa562fa067b..a89bddefdacf 100644 --- a/include/asm-frv/timex.h +++ b/include/asm-frv/timex.h @@ -6,11 +6,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ #define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ -#define FINETUNE \ -((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ - << (SHIFT_SCALE-SHIFT_HZ)) / HZ) - typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) diff --git a/include/asm-m32r/timex.h b/include/asm-m32r/timex.h index e89bfd17db51..019441c1d7a0 100644 --- a/include/asm-m32r/timex.h +++ b/include/asm-m32r/timex.h @@ -12,9 +12,6 @@ #define CLOCK_TICK_RATE (CONFIG_BUS_CLOCK / CONFIG_TIMER_DIVIDE) #define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ -#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ - << (SHIFT_SCALE-SHIFT_HZ)) / HZ) #ifdef __KERNEL__ /* diff --git a/include/asm-sh64/timex.h b/include/asm-sh64/timex.h index af0b79269661..163e2b62fe27 100644 --- a/include/asm-sh64/timex.h +++ b/include/asm-sh64/timex.h @@ -17,9 +17,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ #define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ -#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ - << (SHIFT_SCALE-SHIFT_HZ)) / HZ) typedef unsigned long cycles_t; diff --git a/include/asm-xtensa/timex.h b/include/asm-xtensa/timex.h index d14a3755a12b..c7b705e66655 100644 --- a/include/asm-xtensa/timex.h +++ b/include/asm-xtensa/timex.h @@ -31,9 +31,6 @@ #define CLOCK_TICK_RATE 1193180 /* (everyone is using this value) */ #define CLOCK_TICK_FACTOR 20 /* Factor of both 10^6 and CLOCK_TICK_RATE */ -#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ - << (SHIFT_SCALE-SHIFT_HZ)) / HZ) #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT extern unsigned long ccount_per_jiffy; diff --git a/include/linux/timex.h b/include/linux/timex.h index ac808f13fa0e..261381b5da82 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -74,24 +74,17 @@ #define MAXTC 10 /* maximum time constant (shift) */ /* - * The SHIFT_SCALE define establishes the decimal point of the time_phase - * variable which serves as an extension to the low-order bits of the - * system clock variable. The SHIFT_UPDATE define establishes the decimal - * point of the time_offset variable which represents the current offset - * with respect to standard time. The FINENSEC define represents 1 nsec in - * scaled units. + * The SHIFT_UPDATE define establishes the decimal point of the + * time_offset variable which represents the current offset with + * respect to standard time. * * SHIFT_USEC defines the scaling (shift) of the time_freq and * time_tolerance variables, which represent the current frequency * offset and maximum frequency tolerance. - * - * FINENSEC is 1 ns in SHIFT_UPDATE units of the time_phase variable. */ -#define SHIFT_SCALE 22 /* phase scale (shift) */ #define SHIFT_UPDATE (SHIFT_HZ + 1) /* time offset scale (shift) */ #define SHIFT_USEC 16 /* frequency offset scale (shift) */ #define SHIFT_NSEC 12 /* kernel frequency offset scale */ -#define FINENSEC (1L << (SHIFT_SCALE - 10)) /* ~1 ns in phase units */ #define MAXPHASE 512000L /* max phase error (us) */ #define MAXFREQ (512L << SHIFT_USEC) /* max frequency error (ppm) */ -- cgit v1.2.3-71-gd317 From 70bc42f90a3f4721c89dbe865e6c95da8565b41c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 30 Sep 2006 23:28:29 -0700 Subject: [PATCH] kernel/time/ntp.c: possible cleanups This patch contains the following possible cleanups: - make the following needlessly global function static: - ntp_update_frequency() - make the following needlessly global variables static: - time_state - time_offset - time_constant - time_reftime - remove the following read-only global variable: - time_precision Signed-off-by: Adrian Bunk Cc: Roman Zippel Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 6 ------ kernel/time/ntp.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 261381b5da82..049dfe4a11f2 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -198,21 +198,15 @@ extern int tickadj; /* amount of adjustment per tick */ /* * phase-lock loop variables */ -extern int time_state; /* clock status */ extern int time_status; /* clock synchronization status bits */ -extern long time_offset; /* time adjustment (us) */ -extern long time_constant; /* pll time constant */ -extern long time_precision; /* clock precision (us) */ extern long time_maxerror; /* maximum error */ extern long time_esterror; /* estimated error */ extern long time_freq; /* frequency offset (scaled ppm) */ -extern long time_reftime; /* time at last adjustment (s) */ extern long time_adjust; /* The amount of adjtime left */ extern void ntp_clear(void); -extern void ntp_update_frequency(void); /** * ntp_synced - Returns 1 if the NTP status is not UNSYNC diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 1ab5e9d7fa50..47195fa0ec4f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -30,17 +30,31 @@ static u64 tick_length, tick_length_base; * phase-lock loop variables */ /* TIME_ERROR prevents overwriting the CMOS clock */ -int time_state = TIME_OK; /* clock synchronization status */ +static int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (ns) */ -long time_constant = 2; /* pll time constant */ -long time_precision = 1; /* clock precision (us) */ +static long time_offset; /* time adjustment (ns) */ +static long time_constant = 2; /* pll time constant */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ long time_freq; /* frequency offset (scaled ppm)*/ -long time_reftime; /* time at last adjustment (s) */ +static long time_reftime; /* time at last adjustment (s) */ long time_adjust; +#define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) +#define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \ + (s64)CLOCK_TICK_RATE) + +static void ntp_update_frequency(void) +{ + tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; + tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; + tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); + + do_div(tick_length_base, HZ); + + tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; +} + /** * ntp_clear - Clears the NTP state variables * @@ -59,20 +73,6 @@ void ntp_clear(void) time_offset = 0; } -#define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) -#define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / (s64)CLOCK_TICK_RATE) - -void ntp_update_frequency(void) -{ - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); - - do_div(tick_length_base, HZ); - - tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; -} - /* * this routine handles the overflow of the microsecond field * @@ -330,7 +330,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) txc->esterror = time_esterror; txc->status = time_status; txc->constant = time_constant; - txc->precision = time_precision; + txc->precision = 1; txc->tolerance = MAXFREQ; txc->tick = tick_usec; -- cgit v1.2.3-71-gd317 From e1fabd3ccf02901374bffa434e0af472749a5bd9 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sat, 30 Sep 2006 23:28:40 -0700 Subject: [PATCH] reiserfs: fix is_reusable bitmap check to not traverse the bitmap info array There is a check in is_reusable to determine if a particular block is a bitmap block. It verifies this by going through the array of bitmap block buffer heads and comparing the block number to each one. Bitmap blocks are at defined locations on the disk in both old and current formats. Simply checking against the known good values is enough. This is a trivial optimization for a non-production codepath, but this is the first in a series of patches that will ultimately remove the buffer heads from that array. Signed-off-by: Jeff Mahoney Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 40 +++++++++++++++++++++++++--------------- fs/reiserfs/super.c | 2 ++ include/linux/reiserfs_fs_sb.h | 1 + 3 files changed, 28 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 4a7dbdee1b6d..1022347a211f 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -50,16 +50,15 @@ static inline void get_bit_address(struct super_block *s, { /* It is in the bitmap block number equal to the block * number divided by the number of bits in a block. */ - *bmap_nr = block / (s->s_blocksize << 3); + *bmap_nr = block >> (s->s_blocksize_bits + 3); /* Within that bitmap block it is located at bit offset *offset. */ *offset = block & ((s->s_blocksize << 3) - 1); - return; } #ifdef CONFIG_REISERFS_CHECK int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) { - int i, j; + int bmap, offset; if (block == 0 || block >= SB_BLOCK_COUNT(s)) { reiserfs_warning(s, @@ -68,34 +67,45 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) return 0; } - /* it can't be one of the bitmap blocks */ - for (i = 0; i < SB_BMAP_NR(s); i++) - if (block == SB_AP_BITMAP(s)[i].bh->b_blocknr) { + get_bit_address(s, block, &bmap, &offset); + + /* Old format filesystem? Unlikely, but the bitmaps are all up front so + * we need to account for it. */ + if (unlikely(test_bit(REISERFS_OLD_FORMAT, + &(REISERFS_SB(s)->s_properties)))) { + b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; + if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) { + reiserfs_warning(s, "vs: 4019: is_reusable: " + "bitmap block %lu(%u) can't be freed or reused", + block, SB_BMAP_NR(s)); + return 0; + } + } else { + if (offset == 0) { reiserfs_warning(s, "vs: 4020: is_reusable: " "bitmap block %lu(%u) can't be freed or reused", block, SB_BMAP_NR(s)); return 0; } + } - get_bit_address(s, block, &i, &j); - - if (i >= SB_BMAP_NR(s)) { + if (bmap >= SB_BMAP_NR(s)) { reiserfs_warning(s, "vs-4030: is_reusable: there is no so many bitmap blocks: " - "block=%lu, bitmap_nr=%d", block, i); + "block=%lu, bitmap_nr=%d", block, bmap); return 0; } if ((bit_value == 0 && - reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) || + reiserfs_test_le_bit(offset, SB_AP_BITMAP(s)[bmap].bh->b_data)) || (bit_value == 1 && - reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data) == 0)) { + reiserfs_test_le_bit(offset, SB_AP_BITMAP(s)[bmap].bh->b_data) == 0)) { reiserfs_warning(s, "vs-4040: is_reusable: corresponding bit of block %lu does not " - "match required value (i==%d, j==%d) test_bit==%d", - block, i, j, reiserfs_test_le_bit(j, + "match required value (bmap==%d, offset==%d) test_bit==%d", + block, bmap, offset, reiserfs_test_le_bit(offset, SB_AP_BITMAP - (s)[i].bh-> + (s)[bmap].bh-> b_data)); return 0; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 80fc3b32802f..db2c581df766 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1818,6 +1818,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (is_reiserfs_3_5(rs) || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) set_bit(REISERFS_3_5, &(sbi->s_properties)); + else if (old_format) + set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties)); else set_bit(REISERFS_3_6, &(sbi->s_properties)); diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 31b4c0bd4fa0..4f21ad388c79 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -414,6 +414,7 @@ struct reiserfs_sb_info { /* Definitions of reiserfs on-disk properties: */ #define REISERFS_3_5 0 #define REISERFS_3_6 1 +#define REISERFS_OLD_FORMAT 2 enum reiserfs_mount_options { /* Mount options */ -- cgit v1.2.3-71-gd317 From 6f01046b35d940079822827498a7dd6d3eec8c6b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sat, 30 Sep 2006 23:28:43 -0700 Subject: [PATCH] reiserfs: reorganize bitmap loading functions This patch moves the bitmap loading code from super.c to bitmap.c The code is also restructured somewhat. The only difference between new format bitmaps and old format bitmaps is where they are. That's a two liner before loading the block to use the correct one. There's no need for an entirely separate code path. The load path is generally the same, with the pattern being to throw out a bunch of requests and then wait for them, then cache the metadata from the contents. Again, like the previous patches, the purpose is to set up for later ones. Update: There was a bug in the previously posted version of this that resulted in corruption. The problem was that bitmap 0 on new format file systems must be treated specially, and wasn't. A stupid bug with an easy fix. This is hopefully the last fix for the disaster that is the reiserfs bitmap patch set. If a bitmap block was full, first_zero_hint would end up at zero since it would never be changed from it's zeroed out value. This just sets it beyond the end of the bitmap block. If any bits are freed, it will be reset to a valid bit. When info->free_count = 0, then we already know it's full. Signed-off-by: Jeff Mahoney Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 88 ++++++++++++++++++++++++++++++++++ fs/reiserfs/resize.c | 1 + fs/reiserfs/super.c | 114 +------------------------------------------- include/linux/reiserfs_fs.h | 4 ++ 4 files changed, 94 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 44d9410e9d6a..abdd6d9c4558 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1285,3 +1286,90 @@ int reiserfs_can_fit_pages(struct super_block *sb /* superblock of filesystem return space > 0 ? space : 0; } + +void reiserfs_cache_bitmap_metadata(struct super_block *sb, + struct buffer_head *bh, + struct reiserfs_bitmap_info *info) +{ + unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); + + info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3); + + while (--cur >= (unsigned long *)bh->b_data) { + int base = ((char *)cur - bh->b_data) << 3; + + /* 0 and ~0 are special, we can optimize for them */ + if (*cur == 0) { + info->first_zero_hint = base; + info->free_count += BITS_PER_LONG; + } else if (*cur != ~0L) { /* A mix, investigate */ + int b; + for (b = BITS_PER_LONG - 1; b >= 0; b--) { + if (!reiserfs_test_le_bit(b, cur)) { + info->first_zero_hint = base + b; + info->free_count++; + } + } + } + } + /* The first bit must ALWAYS be 1 */ + BUG_ON(info->first_zero_hint == 0); +} + +struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, + unsigned int bitmap) +{ + b_blocknr_t block = (sb->s_blocksize << 3) * bitmap; + struct buffer_head *bh; + + /* Way old format filesystems had the bitmaps packed up front. + * I doubt there are any of these left, but just in case... */ + if (unlikely(test_bit(REISERFS_OLD_FORMAT, + &(REISERFS_SB(sb)->s_properties)))) + block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; + else if (bitmap == 0) + block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; + + bh = sb_getblk(sb, block); + if (!buffer_uptodate(bh)) + ll_rw_block(READ, 1, &bh); + + return bh; +} + +int reiserfs_init_bitmap_cache(struct super_block *sb) +{ + struct reiserfs_bitmap_info *bitmap; + int i; + + bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb)); + if (bitmap == NULL) + return -ENOMEM; + + memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb)); + + for (i = 0; i < SB_BMAP_NR(sb); i++) + bitmap[i].bh = reiserfs_read_bitmap_block(sb, i); + + /* make sure we have them all */ + for (i = 0; i < SB_BMAP_NR(sb); i++) { + wait_on_buffer(bitmap[i].bh); + if (!buffer_uptodate(bitmap[i].bh)) { + reiserfs_warning(sb, "sh-2029: %s: " + "bitmap block (#%lu) reading failed", + __FUNCTION__, bitmap[i].bh->b_blocknr); + for (i = 0; i < SB_BMAP_NR(sb); i++) + brelse(bitmap[i].bh); + vfree(bitmap); + return -EIO; + } + } + + /* Cache the info on the bitmaps before we get rolling */ + for (i = 0; i < SB_BMAP_NR(sb); i++) + reiserfs_cache_bitmap_metadata(sb, bitmap[i].bh, &bitmap[i]); + + SB_AP_BITMAP(sb) = bitmap; + + return 0; +} diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 958b75978994..90d39fd3096f 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -132,6 +132,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) get_bh(bh); memset(bh->b_data, 0, sb_blocksize(sb)); reiserfs_test_and_set_le_bit(0, bh->b_data); + reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); set_buffer_uptodate(bh); mark_buffer_dirty(bh); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index db2c581df766..c78e99e196fa 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1243,118 +1243,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) return 0; } -/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk. - * @sb - superblock for this filesystem - * @bi - the bitmap info to be loaded. Requires that bi->bh is valid. - * - * This routine counts how many free bits there are, finding the first zero - * as a side effect. Could also be implemented as a loop of test_bit() calls, or - * a loop of find_first_zero_bit() calls. This implementation is similar to - * find_first_zero_bit(), but doesn't return after it finds the first bit. - * Should only be called on fs mount, but should be fairly efficient anyways. - * - * bi->first_zero_hint is considered unset if it == 0, since the bitmap itself - * will * invariably occupt block 0 represented in the bitmap. The only - * exception to this is when free_count also == 0, since there will be no - * free blocks at all. - */ - -static void load_bitmap_info_data(struct super_block *sb, - struct reiserfs_bitmap_info *bi) -{ - unsigned long *cur = (unsigned long *)bi->bh->b_data; - - while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { - - /* No need to scan if all 0's or all 1's. - * Since we're only counting 0's, we can simply ignore all 1's */ - if (*cur == 0) { - if (bi->first_zero_hint == 0) { - bi->first_zero_hint = - ((char *)cur - bi->bh->b_data) << 3; - } - bi->free_count += sizeof(unsigned long) * 8; - } else if (*cur != ~0L) { - int b; - for (b = 0; b < sizeof(unsigned long) * 8; b++) { - if (!reiserfs_test_le_bit(b, cur)) { - bi->free_count++; - if (bi->first_zero_hint == 0) - bi->first_zero_hint = - (((char *)cur - - bi->bh->b_data) << 3) + b; - } - } - } - cur++; - } - -#ifdef CONFIG_REISERFS_CHECK -// This outputs a lot of unneded info on big FSes -// reiserfs_warning ("bitmap loaded from block %d: %d free blocks", -// bi->bh->b_blocknr, bi->free_count); -#endif -} - -static int read_bitmaps(struct super_block *s) -{ - int i, bmap_nr; - - SB_AP_BITMAP(s) = - vmalloc(sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - if (SB_AP_BITMAP(s) == 0) - return 1; - memset(SB_AP_BITMAP(s), 0, - sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - for (i = 0, bmap_nr = - REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; - i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { - SB_AP_BITMAP(s)[i].bh = sb_getblk(s, bmap_nr); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) - ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); - } - for (i = 0; i < SB_BMAP_NR(s); i++) { - wait_on_buffer(SB_AP_BITMAP(s)[i].bh); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { - reiserfs_warning(s, "sh-2029: reiserfs read_bitmaps: " - "bitmap block (#%lu) reading failed", - SB_AP_BITMAP(s)[i].bh->b_blocknr); - for (i = 0; i < SB_BMAP_NR(s); i++) - brelse(SB_AP_BITMAP(s)[i].bh); - vfree(SB_AP_BITMAP(s)); - SB_AP_BITMAP(s) = NULL; - return 1; - } - load_bitmap_info_data(s, SB_AP_BITMAP(s) + i); - } - return 0; -} - -static int read_old_bitmaps(struct super_block *s) -{ - int i; - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ - - /* read true bitmap */ - SB_AP_BITMAP(s) = - vmalloc(sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); - if (SB_AP_BITMAP(s) == 0) - return 1; - - memset(SB_AP_BITMAP(s), 0, - sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); - - for (i = 0; i < sb_bmap_nr(rs); i++) { - SB_AP_BITMAP(s)[i].bh = sb_bread(s, bmp1 + i); - if (!SB_AP_BITMAP(s)[i].bh) - return 1; - load_bitmap_info_data(s, SB_AP_BITMAP(s) + i); - } - - return 0; -} - static int read_super_block(struct super_block *s, int offset) { struct buffer_head *bh; @@ -1736,7 +1624,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) sbi->s_mount_state = SB_REISERFS_STATE(s); sbi->s_mount_state = REISERFS_VALID_FS; - if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { + if ((errval = reiserfs_init_bitmap_cache(s))) { SWARN(silent, s, "jmacd-8: reiserfs_fill_super: unable to read bitmap"); goto error; diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 9c63abffd7b2..7bc6bfb86253 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2073,6 +2073,10 @@ void reiserfs_init_alloc_options(struct super_block *s); */ __le32 reiserfs_choose_packing(struct inode *dir); +int reiserfs_init_bitmap_cache(struct super_block *sb); +void reiserfs_free_bitmap_cache(struct super_block *sb); +void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); +struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, b_blocknr_t, int for_unformatted); -- cgit v1.2.3-71-gd317 From 5065227b46235ec0131b383cc2f537069b55c6b6 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sat, 30 Sep 2006 23:28:44 -0700 Subject: [PATCH] reiserfs: on-demand bitmap loading This is the patch the three previous ones have been leading up to. It changes the behavior of ReiserFS from loading and caching all the bitmaps as special, to treating the bitmaps like any other bit of metadata and just letting the system-wide caches figure out what to hang on to. Buffer heads are allocated on the fly, so there is no need to retain pointers to all of them. The caching of the metadata occurs when the data is read and updated, and is considered invalid and uncached until then. I needed to remove the vs-4040 check for performing a duplicate operation on a particular bit. The reason is that while the other sites for working with bitmaps are allowed to schedule, is_reusable() is called from do_balance(), which will panic if a schedule occurs in certain places. The benefit of on-demand bitmaps clearly outweighs a sanity check that depends on a compile-time option that is discouraged. [akpm@osdl.org: warning fix] Signed-off-by: Jeff Mahoney Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 97 +++++++++++++++++++----------------------- fs/reiserfs/resize.c | 24 ++++++++--- fs/reiserfs/super.c | 39 ++++------------- include/linux/reiserfs_fs_sb.h | 1 - 4 files changed, 70 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index abdd6d9c4558..cca1dbf5458f 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -60,7 +60,6 @@ static inline void get_bit_address(struct super_block *s, int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) { int bmap, offset; - struct buffer_head *bh; if (block == 0 || block >= SB_BLOCK_COUNT(s)) { reiserfs_warning(s, @@ -98,22 +97,6 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) return 0; } - bh = SB_AP_BITMAP(s)[bmap].bh; - get_bh(bh); - - if ((bit_value == 0 && reiserfs_test_le_bit(offset, bh->b_data)) || - (bit_value == 1 && reiserfs_test_le_bit(offset, bh->b_data) == 0)) { - reiserfs_warning(s, - "vs-4040: is_reusable: corresponding bit of block %lu does not " - "match required value (bmap==%d, offset==%d) test_bit==%d", - block, bmap, offset, - reiserfs_test_le_bit(offset, bh->b_data)); - - brelse(bh); - return 0; - } - brelse(bh); - if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { reiserfs_warning(s, "vs-4050: is_reusable: this is root block (%u), " @@ -173,13 +156,10 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, bmap_n); return 0; } - bh = bi->bh; - get_bh(bh); - if (buffer_locked(bh)) { - PROC_INFO_INC(s, scan_bitmap.wait); - __wait_on_buffer(bh); - } + bh = reiserfs_read_bitmap_block(s, bmap_n); + if (bh == NULL) + return 0; while (1) { cont: @@ -285,9 +265,20 @@ static int bmap_hash_id(struct super_block *s, u32 id) */ static inline int block_group_used(struct super_block *s, u32 id) { - int bm; - bm = bmap_hash_id(s, id); - if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) { + int bm = bmap_hash_id(s, id); + struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; + + /* If we don't have cached information on this bitmap block, we're + * going to have to load it later anyway. Loading it here allows us + * to make a better decision. This favors long-term performace gain + * with a better on-disk layout vs. a short term gain of skipping the + * read and potentially having a bad placement. */ + if (info->first_zero_hint == 0) { + struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); + brelse(bh); + } + + if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) { return 0; } return 1; @@ -413,8 +404,9 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, return; } - bmbh = apbi[nr].bh; - get_bh(bmbh); + bmbh = reiserfs_read_bitmap_block(s, nr); + if (!bmbh) + return; reiserfs_prepare_for_journal(s, bmbh, 1); @@ -1320,6 +1312,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap) { b_blocknr_t block = (sb->s_blocksize << 3) * bitmap; + struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; struct buffer_head *bh; /* Way old format filesystems had the bitmaps packed up front. @@ -1330,9 +1323,21 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, else if (bitmap == 0) block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - bh = sb_getblk(sb, block); - if (!buffer_uptodate(bh)) - ll_rw_block(READ, 1, &bh); + bh = sb_bread(sb, block); + if (bh == NULL) + reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%lu) " + "reading failed", __FUNCTION__, bh->b_blocknr); + else { + if (buffer_locked(bh)) { + PROC_INFO_INC(sb, scan_bitmap.wait); + __wait_on_buffer(bh); + } + BUG_ON(!buffer_uptodate(bh)); + BUG_ON(atomic_read(&bh->b_count) == 0); + + if (info->first_zero_hint == 0) + reiserfs_cache_bitmap_metadata(sb, bh, info); + } return bh; } @@ -1340,7 +1345,6 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, int reiserfs_init_bitmap_cache(struct super_block *sb) { struct reiserfs_bitmap_info *bitmap; - int i; bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb)); if (bitmap == NULL) @@ -1348,28 +1352,15 @@ int reiserfs_init_bitmap_cache(struct super_block *sb) memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb)); - for (i = 0; i < SB_BMAP_NR(sb); i++) - bitmap[i].bh = reiserfs_read_bitmap_block(sb, i); - - /* make sure we have them all */ - for (i = 0; i < SB_BMAP_NR(sb); i++) { - wait_on_buffer(bitmap[i].bh); - if (!buffer_uptodate(bitmap[i].bh)) { - reiserfs_warning(sb, "sh-2029: %s: " - "bitmap block (#%lu) reading failed", - __FUNCTION__, bitmap[i].bh->b_blocknr); - for (i = 0; i < SB_BMAP_NR(sb); i++) - brelse(bitmap[i].bh); - vfree(bitmap); - return -EIO; - } - } - - /* Cache the info on the bitmaps before we get rolling */ - for (i = 0; i < SB_BMAP_NR(sb); i++) - reiserfs_cache_bitmap_metadata(sb, bitmap[i].bh, &bitmap[i]); - SB_AP_BITMAP(sb) = bitmap; return 0; } + +void reiserfs_free_bitmap_cache(struct super_block *sb) +{ + if (SB_AP_BITMAP(sb)) { + vfree(SB_AP_BITMAP(sb)); + SB_AP_BITMAP(sb) = NULL; + } +} diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 90d39fd3096f..315684793d1d 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -128,8 +128,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) * transaction begins, and the new bitmaps don't matter if the * transaction fails. */ for (i = bmap_nr; i < bmap_nr_new; i++) { - bh = sb_getblk(s, i * s->s_blocksize * 8); - get_bh(bh); + /* don't use read_bitmap_block since it will cache + * the uninitialized bitmap */ + bh = sb_bread(s, i * s->s_blocksize * 8); memset(bh->b_data, 0, sb_blocksize(sb)); reiserfs_test_and_set_le_bit(0, bh->b_data); reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); @@ -140,7 +141,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) // update bitmap_info stuff bitmap[i].first_zero_hint = 1; bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; - bitmap[i].bh = bh; brelse(bh); } /* free old bitmap blocks array */ @@ -157,8 +157,13 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) /* Extend old last bitmap block - new blocks have been made available */ info = SB_AP_BITMAP(s) + bmap_nr - 1; - bh = info->bh; - get_bh(bh); + bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); + if (!bh) { + int jerr = journal_end(&th, s, 10); + if (jerr) + return jerr; + return -EIO; + } reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) @@ -172,8 +177,13 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) /* Correct new last bitmap block - It may not be full */ info = SB_AP_BITMAP(s) + bmap_nr_new - 1; - bh = info->bh; - get_bh(bh); + bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); + if (!bh) { + int jerr = journal_end(&th, s, 10); + if (jerr) + return jerr; + return -EIO; + } reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c78e99e196fa..c89aa2338191 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -432,7 +432,6 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_put_super(struct super_block *s) { - int i; struct reiserfs_transaction_handle th; th.t_trans_id = 0; @@ -462,10 +461,7 @@ static void reiserfs_put_super(struct super_block *s) */ journal_release(&th, s); - for (i = 0; i < SB_BMAP_NR(s); i++) - brelse(SB_AP_BITMAP(s)[i].bh); - - vfree(SB_AP_BITMAP(s)); + reiserfs_free_bitmap_cache(s); brelse(SB_BUFFER_WITH_SB(s)); @@ -1344,7 +1340,6 @@ static int read_super_block(struct super_block *s, int offset) /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { - int i; ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); wait_on_buffer(SB_BUFFER_WITH_SB(s)); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { @@ -1353,20 +1348,7 @@ static int reread_meta_blocks(struct super_block *s) return 1; } - for (i = 0; i < SB_BMAP_NR(s); i++) { - ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)); - wait_on_buffer(SB_AP_BITMAP(s)[i].bh); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { - reiserfs_warning(s, - "reread_meta_blocks, error reading bitmap block number %d at %llu", - i, - (unsigned long long)SB_AP_BITMAP(s)[i]. - bh->b_blocknr); - return 1; - } - } return 0; - } ///////////////////////////////////////////////////// @@ -1547,7 +1529,6 @@ static int function2code(hashf_t func) static int reiserfs_fill_super(struct super_block *s, void *data, int silent) { struct inode *root_inode; - int j; struct reiserfs_transaction_handle th; int old_format = 0; unsigned long blocks; @@ -1793,19 +1774,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (jinit_done) { /* kill the commit thread, free journal ram */ journal_release_error(NULL, s); } - if (SB_DISK_SUPER_BLOCK(s)) { - for (j = 0; j < SB_BMAP_NR(s); j++) { - if (SB_AP_BITMAP(s)) - brelse(SB_AP_BITMAP(s)[j].bh); - } - vfree(SB_AP_BITMAP(s)); - } + + reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); #ifdef CONFIG_QUOTA - for (j = 0; j < MAXQUOTAS; j++) { - kfree(sbi->s_qf_names[j]); - sbi->s_qf_names[j] = NULL; + { + int j; + for (j = 0; j < MAXQUOTAS; j++) { + kfree(sbi->s_qf_names[j]); + sbi->s_qf_names[j] = NULL; + } } #endif kfree(sbi); diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 4f21ad388c79..73e0becec086 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -267,7 +267,6 @@ struct reiserfs_bitmap_info { // FIXME: Won't work with block sizes > 8K __u16 first_zero_hint; __u16 free_count; - struct buffer_head *bh; /* the actual bitmap */ }; struct proc_dir_entry; -- cgit v1.2.3-71-gd317 From 027445c37282bc1ed26add45e573ad2d3e4860a5 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 30 Sep 2006 23:28:46 -0700 Subject: [PATCH] Vectorize aio_read/aio_write fileop methods This patch vectorizes aio_read() and aio_write() methods to prepare for collapsing all aio & vectored operations into one interface - which is aio_read()/aio_write(). Signed-off-by: Badari Pulavarty Signed-off-by: Christoph Hellwig Cc: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 5 +-- Documentation/filesystems/vfs.txt | 4 +- arch/s390/hypfs/inode.c | 17 +++++++-- drivers/char/raw.c | 14 +------ drivers/usb/gadget/inode.c | 80 +++++++++++++++++++++++++++------------ fs/aio.c | 15 ++++++-- fs/block_dev.c | 10 +---- fs/cifs/cifsfs.c | 6 +-- fs/ext3/file.c | 5 ++- fs/nfs/direct.c | 26 ++++++++++--- fs/nfs/file.c | 34 +++++++++-------- fs/ntfs/file.c | 8 ++-- fs/ocfs2/file.c | 28 +++++++------- fs/read_write.c | 20 ++++++++-- fs/reiserfs/file.c | 4 +- fs/xfs/linux-2.6/xfs_file.c | 46 +++++++++++----------- include/linux/aio.h | 2 + include/linux/fs.h | 10 ++--- include/linux/nfs_fs.h | 10 +++-- include/net/sock.h | 1 - mm/filemap.c | 39 +++++++++---------- net/socket.c | 49 +++++++++++------------- 22 files changed, 242 insertions(+), 191 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 247d7f619aa2..eb1a6cad21e6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -356,10 +356,9 @@ The last two are called only from check_disk_change(). prototypes: loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, - loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 1cb7e8be927a..cd07c21b8400 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -699,9 +699,9 @@ This describes how the VFS can manipulate an open file. As of kernel struct file_operations { loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 813fc21358f9..cd702ae45d6d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -134,12 +134,20 @@ static int hypfs_open(struct inode *inode, struct file *filp) return 0; } -static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf, - size_t count, loff_t offset) +static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) { char *data; size_t len; struct file *filp = iocb->ki_filp; + /* XXX: temporary */ + char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; + + if (nr_segs != 1) { + count = -EINVAL; + goto out; + } data = filp->private_data; len = strlen(data); @@ -158,12 +166,13 @@ static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf, out: return count; } -static ssize_t hypfs_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) { int rc; struct super_block *sb; struct hypfs_sb_info *fs_info; + size_t count = iov_length(iov, nr_segs); sb = iocb->ki_filp->f_dentry->d_inode->i_sb; fs_info = sb->s_fs_info; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index c596a08c07b3..173fb08555d5 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -249,23 +249,11 @@ static ssize_t raw_file_write(struct file *file, const char __user *buf, return generic_file_write_nolock(file, &local_iov, 1, ppos); } -static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { - .iov_base = (char __user *)buf, - .iov_len = count - }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - - static const struct file_operations raw_fops = { .read = generic_file_read, .aio_read = generic_file_aio_read, .write = raw_file_write, - .aio_write = raw_file_aio_write, + .aio_write = generic_file_aio_write_nolock, .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 4a000d846a93..86924f9cdd7e 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -533,7 +533,8 @@ struct kiocb_priv { struct usb_request *req; struct ep_data *epdata; void *buf; - char __user *ubuf; /* NULL for writes */ + const struct iovec *iv; + unsigned long nr_segs; unsigned actual; }; @@ -561,17 +562,32 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) static ssize_t ep_aio_read_retry(struct kiocb *iocb) { struct kiocb_priv *priv = iocb->private; - ssize_t status = priv->actual; - - /* we "retry" to get the right mm context for this: */ - status = copy_to_user(priv->ubuf, priv->buf, priv->actual); - if (unlikely(0 != status)) - status = -EFAULT; - else - status = priv->actual; - kfree(priv->buf); - kfree(priv); - return status; + ssize_t len, total; + int i; + + /* we "retry" to get the right mm context for this: */ + + /* copy stuff into user buffers */ + total = priv->actual; + len = 0; + for (i=0; i < priv->nr_segs; i++) { + ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total); + + if (copy_to_user(priv->iv[i].iov_base, priv->buf, this)) { + if (len == 0) + len = -EFAULT; + break; + } + + total -= this; + len += this; + if (total == 0) + break; + } + kfree(priv->buf); + kfree(priv); + aio_put_req(iocb); + return len; } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) @@ -584,7 +600,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) spin_lock(&epdata->dev->lock); priv->req = NULL; priv->epdata = NULL; - if (priv->ubuf == NULL + if (priv->iv == NULL || unlikely(req->actual == 0) || unlikely(kiocbIsCancelled(iocb))) { kfree(req->buf); @@ -619,7 +635,8 @@ ep_aio_rwtail( char *buf, size_t len, struct ep_data *epdata, - char __user *ubuf + const struct iovec *iv, + unsigned long nr_segs ) { struct kiocb_priv *priv; @@ -634,7 +651,8 @@ fail: return value; } iocb->private = priv; - priv->ubuf = ubuf; + priv->iv = iv; + priv->nr_segs = nr_segs; value = get_ready_ep(iocb->ki_filp->f_flags, epdata); if (unlikely(value < 0)) { @@ -674,41 +692,53 @@ fail: kfree(priv); put_ep(epdata); } else - value = (ubuf ? -EIOCBRETRY : -EIOCBQUEUED); + value = (iv ? -EIOCBRETRY : -EIOCBQUEUED); return value; } static ssize_t -ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o) +ep_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t o) { struct ep_data *epdata = iocb->ki_filp->private_data; char *buf; if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN)) return -EINVAL; - buf = kmalloc(len, GFP_KERNEL); + + buf = kmalloc(iocb->ki_left, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; + iocb->ki_retry = ep_aio_read_retry; - return ep_aio_rwtail(iocb, buf, len, epdata, ubuf); + return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs); } static ssize_t -ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o) +ep_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t o) { struct ep_data *epdata = iocb->ki_filp->private_data; char *buf; + size_t len = 0; + int i = 0; if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN))) return -EINVAL; - buf = kmalloc(len, GFP_KERNEL); + + buf = kmalloc(iocb->ki_left, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; - if (unlikely(copy_from_user(buf, ubuf, len) != 0)) { - kfree(buf); - return -EFAULT; + + for (i=0; i < nr_segs; i++) { + if (unlikely(copy_from_user(&buf[len], iov[i].iov_base, + iov[i].iov_len) != 0)) { + kfree(buf); + return -EFAULT; + } + len += iov[i].iov_len; } - return ep_aio_rwtail(iocb, buf, len, epdata, NULL); + return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0); } /*----------------------------------------------------------------------*/ diff --git a/fs/aio.c b/fs/aio.c index 950630187acc..27ff56540c73 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -15,6 +15,7 @@ #include #include #include +#include #define DEBUG 0 @@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *iocb) ssize_t ret = 0; do { - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + iocb->ki_inline_vec.iov_base = iocb->ki_buf; + iocb->ki_inline_vec.iov_len = iocb->ki_left; + + ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec, + 1, iocb->ki_pos); /* * Can't just depend on iocb->ki_left to determine * whether we are done. This may have been a short read. @@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *iocb) ssize_t ret = 0; do { - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + iocb->ki_inline_vec.iov_base = iocb->ki_buf; + iocb->ki_inline_vec.iov_len = iocb->ki_left; + + ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec, + 1, iocb->ki_pos); if (ret > 0) { iocb->ki_buf += ret; iocb->ki_left -= ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index 0c361ea7e5a6..8c819117310b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1162,14 +1162,6 @@ static ssize_t blkdev_file_write(struct file *file, const char __user *buf, return generic_file_write_nolock(file, &local_iov, 1, ppos); } -static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); @@ -1192,7 +1184,7 @@ const struct file_operations def_blk_fops = { .read = generic_file_read, .write = blkdev_file_write, .aio_read = generic_file_aio_read, - .aio_write = blkdev_file_aio_write, + .aio_write = generic_file_aio_write_nolock, .mmap = generic_file_mmap, .fsync = block_fsync, .unlocked_ioctl = block_ioctl, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 22bcf4d7e7ae..5abb42a7c53e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -492,13 +492,13 @@ static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, return written; } -static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_dentry->d_inode; ssize_t written; - written = generic_file_aio_write(iocb, buf, count, pos); + written = generic_file_aio_write(iocb, iov, nr_segs, pos); if (!CIFS_I(inode)->clientCanCacheAll) filemap_fdatawrite(inode->i_mapping); return written; diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 74ff20f9d09b..5c762457bc89 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -48,14 +48,15 @@ static int ext3_release_file (struct inode * inode, struct file * filp) } static ssize_t -ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ext3_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; ssize_t ret; int err; - ret = generic_file_aio_write(iocb, buf, count, pos); + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); /* * Skip flushing if there was an error, or if nothing was written. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 377839bed172..9f7f8b9ea1e2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -707,8 +707,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer into which to read data - * @count: number of bytes to read + * @iov: vector of user buffers into which to read data + * @nr_segs: size of iov vector * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling @@ -725,17 +725,24 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ + const char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); + if (nr_segs != 1) + return -EINVAL; + if (count < 0) goto out; retval = -EFAULT; @@ -760,8 +767,8 @@ out: /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer from which to write data - * @count: number of bytes to write + * @iov: vector of user buffers from which to write data + * @nr_segs: size of iov vector * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling @@ -782,17 +789,24 @@ out: * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { ssize_t retval; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ + const char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); + if (nr_segs != 1) + return -EINVAL; + retval = generic_write_checks(file, &pos, &count, 0); if (retval) goto out; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index be997d649127..cc93865cea93 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,8 +41,10 @@ static int nfs_file_release(struct inode *, struct file *); static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); -static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); -static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); +static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); @@ -53,8 +55,8 @@ const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs_file_open, .flush = nfs_file_flush, @@ -196,15 +198,17 @@ nfs_file_flush(struct file *file, fl_owner_t id) } static ssize_t -nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) +nfs_file_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; ssize_t result; + size_t count = iov_length(iov, nr_segs); #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, buf, count, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos); #endif dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", @@ -214,7 +218,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) - result = generic_file_aio_read(iocb, buf, count, pos); + result = generic_file_aio_read(iocb, iov, nr_segs, pos); return result; } @@ -336,24 +340,22 @@ const struct address_space_operations nfs_file_aops = { #endif }; -/* - * Write to a file (through the page cache). - */ -static ssize_t -nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; ssize_t result; + size_t count = iov_length(iov, nr_segs); #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, buf, count, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos); #endif - dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", + dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (unsigned long) pos); + inode->i_ino, (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -372,7 +374,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t goto out; nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); - result = generic_file_aio_write(iocb, buf, count, pos); + result = generic_file_aio_write(iocb, iov, nr_segs, pos); out: return result; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 585a79d39c9d..0c46f5c86b71 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2176,20 +2176,18 @@ out: /** * ntfs_file_aio_write - */ -static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); + ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, pos, ret); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2bbfa17090cf..d9ba0a931a03 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -961,25 +961,23 @@ static inline int ocfs2_write_should_remove_suid(struct inode *inode) } static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; u32 clusters; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; loff_t newsize, saved_pos; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); /* happy write of zero bytes */ - if (count == 0) + if (iocb->ki_left == 0) return 0; if (!inode) { @@ -1048,7 +1046,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, } else { saved_pos = iocb->ki_pos; } - newsize = count + saved_pos; + newsize = iocb->ki_left + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, @@ -1081,7 +1079,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (!clusters) break; - ret = ocfs2_extend_file(inode, NULL, newsize, count); + ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); @@ -1098,7 +1096,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb); - ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); + ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); @@ -1132,16 +1130,16 @@ out: } static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int ret = 0, rw_level = -1, have_alloc_sem = 0; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); @@ -1185,7 +1183,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, } ocfs2_meta_unlock(inode, 0); - ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); + ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); diff --git a/fs/read_write.c b/fs/read_write.c index d4cb3183c99c..679dd535380f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(struct kiocb *iocb) ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read); ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index c11f6118c9ca..41f24369e47a 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -1334,7 +1334,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t if (err) return err; } - result = generic_file_write(file, buf, count, ppos); + result = do_sync_write(file, buf, count, ppos); if (after_file_end) { /* Now update i_size and remove the savelink */ struct reiserfs_transaction_handle th; @@ -1566,7 +1566,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t } const struct file_operations reiserfs_file_operations = { - .read = generic_file_read, + .read = do_sync_read, .write = reiserfs_file_write, .ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 41cfcba7ce49..4737971c6a39 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -49,50 +49,49 @@ static struct vm_operations_struct xfs_dmapi_file_vm_ops; STATIC inline ssize_t __xfs_file_read( struct kiocb *iocb, - char __user *buf, + const struct iovec *iov, + unsigned long nr_segs, int ioflags, - size_t count, loff_t pos) { - struct iovec iov = {buf, count}; struct file *file = iocb->ki_filp; bhv_vnode_t *vp = vn_from_inode(file->f_dentry->d_inode); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - return bhv_vop_read(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); + return bhv_vop_read(vp, iocb, iov, nr_segs, &iocb->ki_pos, + ioflags, NULL); } STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos); + return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); } STATIC ssize_t xfs_file_aio_read_invis( struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); + return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } STATIC inline ssize_t __xfs_file_write( - struct kiocb *iocb, - const char __user *buf, - int ioflags, - size_t count, - loff_t pos) + struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, + int ioflags, + loff_t pos) { - struct iovec iov = {(void __user *)buf, count}; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; bhv_vnode_t *vp = vn_from_inode(inode); @@ -100,27 +99,28 @@ __xfs_file_write( BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - return bhv_vop_write(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); + return bhv_vop_write(vp, iocb, iov, nr_segs, &iocb->ki_pos, + ioflags, NULL); } STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos); + return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); } STATIC ssize_t xfs_file_aio_write_invis( struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); + return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } STATIC inline ssize_t diff --git a/include/linux/aio.h b/include/linux/aio.h index 8a0193385a9b..58349e58b749 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -112,6 +113,7 @@ struct kiocb { long ki_retried; /* just for testing */ long ki_kicked; /* just for testing */ long ki_queued; /* just for testing */ + struct iovec ki_inline_vec; /* inline vector */ struct list_head ki_list; /* the aio core uses this * for cancellation */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5baf3a153403..257bae16f545 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1097,9 +1097,9 @@ struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); @@ -1704,11 +1704,11 @@ extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t); +extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *); -extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t); +extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, - unsigned long, loff_t *); + unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 98c9b9f667a5..76ff54846ada 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -367,10 +367,12 @@ extern int nfs3_removexattr (struct dentry *, const char *name); */ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, unsigned long); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, - size_t count, loff_t pos); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos); +extern ssize_t nfs_file_direct_read(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos); +extern ssize_t nfs_file_direct_write(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos); /* * linux/fs/nfs/dir.c diff --git a/include/net/sock.h b/include/net/sock.h index edd4d73ce7f5..40bb90ebb2d1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -665,7 +665,6 @@ struct sock_iocb { struct sock *sk; struct scm_cookie *scm; struct msghdr *msg, async_msg; - struct iovec async_iov; struct kiocb *kiocb; }; diff --git a/mm/filemap.c b/mm/filemap.c index c4fe97f5ace0..f6c1d22b504f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1226,12 +1226,11 @@ out: EXPORT_SYMBOL(__generic_file_aio_read); ssize_t -generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct iovec local_iov = { .iov_base = buf, .iov_len = count }; - BUG_ON(iocb->ki_pos != pos); - return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); + return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos); } EXPORT_SYMBOL(generic_file_aio_read); @@ -2315,22 +2314,22 @@ out: current->backing_dev_info = NULL; return written ? written : err; } -EXPORT_SYMBOL(generic_file_aio_write_nolock); -ssize_t -generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) +ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - loff_t pos = *ppos; - ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos); + BUG_ON(iocb->ki_pos != pos); + + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, + &iocb->ki_pos); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; + ssize_t err; err = sync_page_range_nolock(inode, mapping, pos, ret); if (err < 0) @@ -2338,6 +2337,7 @@ generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, } return ret; } +EXPORT_SYMBOL(generic_file_aio_write_nolock); static ssize_t __generic_file_write_nolock(struct file *file, const struct iovec *iov, @@ -2347,8 +2347,9 @@ __generic_file_write_nolock(struct file *file, const struct iovec *iov, ssize_t ret; init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *ppos; ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); - if (ret == -EIOCBQUEUED) + if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); return ret; } @@ -2361,28 +2362,28 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov, ssize_t ret; init_sync_kiocb(&kiocb, file); - ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + kiocb.ki_pos = *ppos; + ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; return ret; } EXPORT_SYMBOL(generic_file_write_nolock); -ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, - &iocb->ki_pos); + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, + &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { diff --git a/net/socket.c b/net/socket.c index 1bc4167e0da8..df92e4252749 100644 --- a/net/socket.c +++ b/net/socket.c @@ -95,10 +95,10 @@ #include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, - size_t size, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, - size_t size, loff_t pos); +static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -664,7 +664,6 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - char __user *ubuf, size_t size, struct sock_iocb *siocb) { if (!is_sync_kiocb(iocb)) { @@ -675,16 +674,13 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, } siocb->kiocb = iocb; - siocb->async_iov.iov_base = ubuf; - siocb->async_iov.iov_len = size; - iocb->private = siocb; return siocb; } static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, const struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; @@ -715,32 +711,33 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; - ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); + ret = do_sock_read(&msg, &iocb, file, iov, nr_segs); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); return ret; } -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t count, loff_t pos) +static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct sock_iocb siocb, *x; if (pos != 0) return -ESPIPE; - if (count == 0) /* Match SYS5 behaviour */ + + if (iocb->ki_left == 0) /* Match SYS5 behaviour */ return 0; - x = alloc_sock_iocb(iocb, ubuf, count, &siocb); + + x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, const struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; @@ -773,28 +770,28 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov, init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; - ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs); + ret = do_sock_write(&msg, &iocb, file, iov, nr_segs); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); return ret; } -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t count, loff_t pos) +static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct sock_iocb siocb, *x; if (pos != 0) return -ESPIPE; - if (count == 0) /* Match SYS5 behaviour */ + + if (iocb->ki_left == 0) /* Match SYS5 behaviour */ return 0; - x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb); + x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); } /* -- cgit v1.2.3-71-gd317 From ee0b3e671baff681d69fbf0db33b47603c0a8280 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 30 Sep 2006 23:28:47 -0700 Subject: [PATCH] Remove readv/writev methods and use aio_read/aio_write instead This patch removes readv() and writev() methods and replaces them with aio_read()/aio_write() methods. Signed-off-by: Badari Pulavarty Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/raw.c | 2 - drivers/net/tun.c | 37 +++++----------- fs/bad_inode.c | 2 - fs/block_dev.c | 2 - fs/cifs/cifsfs.c | 16 ------- fs/compat.c | 44 +++++-------------- fs/ext2/file.c | 2 - fs/ext3/file.c | 2 - fs/fat/file.c | 2 - fs/fuse/dev.c | 37 +++++----------- fs/hostfs/hostfs_kern.c | 2 - fs/jfs/file.c | 2 - fs/ntfs/file.c | 2 - fs/pipe.c | 59 ++++++++++---------------- fs/read_write.c | 101 +++++++++++++++++++++++++++++--------------- fs/read_write.h | 14 ++++++ fs/xfs/linux-2.6/xfs_file.c | 94 ----------------------------------------- include/linux/fs.h | 6 --- mm/filemap.c | 36 ---------------- net/socket.c | 40 ------------------ sound/core/pcm_native.c | 40 +++++++++--------- 21 files changed, 154 insertions(+), 388 deletions(-) create mode 100644 fs/read_write.h (limited to 'include/linux') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 173fb08555d5..490db531e2d8 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -257,8 +257,6 @@ static const struct file_operations raw_fops = { .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, - .readv = generic_file_readv, - .writev = generic_file_writev, .owner = THIS_MODULE, }; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index de8da6dee1b0..bc0face01d25 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -288,11 +288,10 @@ static inline size_t iov_total(const struct iovec *iv, unsigned long count) return len; } -/* Writev */ -static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv, - unsigned long count, loff_t *pos) +static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = iocb->ki_filp->private_data; if (!tun) return -EBADFD; @@ -302,14 +301,6 @@ static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv, return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count)); } -/* Write */ -static ssize_t tun_chr_write(struct file * file, const char __user * buf, - size_t count, loff_t *pos) -{ - struct iovec iv = { (void __user *) buf, count }; - return tun_chr_writev(file, &iv, 1, pos); -} - /* Put packet to the user space buffer */ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, struct sk_buff *skb, @@ -343,10 +334,10 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, return total; } -/* Readv */ -static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv, - unsigned long count, loff_t *pos) +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) { + struct file *file = iocb->ki_filp; struct tun_struct *tun = file->private_data; DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; @@ -426,14 +417,6 @@ static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv, return ret; } -/* Read */ -static ssize_t tun_chr_read(struct file * file, char __user * buf, - size_t count, loff_t *pos) -{ - struct iovec iv = { buf, count }; - return tun_chr_readv(file, &iv, 1, pos); -} - static void tun_setup(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -764,10 +747,10 @@ static int tun_chr_close(struct inode *inode, struct file *file) static struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, - .read = tun_chr_read, - .readv = tun_chr_readv, - .write = tun_chr_write, - .writev = tun_chr_writev, + .read = do_sync_read, + .aio_read = tun_chr_aio_read, + .write = do_sync_write, + .aio_write = tun_chr_aio_write, .poll = tun_chr_poll, .ioctl = tun_chr_ioctl, .open = tun_chr_open, diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 80599ae33966..34e6d7b220c3 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -40,8 +40,6 @@ static const struct file_operations bad_file_ops = .aio_fsync = EIO_ERROR, .fasync = EIO_ERROR, .lock = EIO_ERROR, - .readv = EIO_ERROR, - .writev = EIO_ERROR, .sendfile = EIO_ERROR, .sendpage = EIO_ERROR, .get_unmapped_area = EIO_ERROR, diff --git a/fs/block_dev.c b/fs/block_dev.c index 8c819117310b..0f143094ef1d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1191,8 +1191,6 @@ const struct file_operations def_blk_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif - .readv = generic_file_readv, - .writev = generic_file_write_nolock, .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5abb42a7c53e..c00c654f2e11 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -480,18 +480,6 @@ cifs_get_sb(struct file_system_type *fs_type, return simple_set_mnt(mnt, sb); } -static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct inode *inode = file->f_dentry->d_inode; - ssize_t written; - - written = generic_file_writev(file, iov, nr_segs, ppos); - if (!CIFS_I(inode)->clientCanCacheAll) - filemap_fdatawrite(inode->i_mapping); - return written; -} - static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -577,8 +565,6 @@ struct inode_operations cifs_symlink_inode_ops = { const struct file_operations cifs_file_ops = { .read = do_sync_read, .write = do_sync_write, - .readv = generic_file_readv, - .writev = cifs_file_writev, .aio_read = generic_file_aio_read, .aio_write = cifs_file_aio_write, .open = cifs_open, @@ -620,8 +606,6 @@ const struct file_operations cifs_file_direct_ops = { const struct file_operations cifs_file_nobrl_ops = { .read = do_sync_read, .write = do_sync_write, - .readv = generic_file_readv, - .writev = cifs_file_writev, .aio_read = generic_file_aio_read, .aio_write = cifs_file_aio_write, .open = cifs_open, diff --git a/fs/compat.c b/fs/compat.c index 122b4e3992b5..6b90bf35f61d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -70,6 +70,8 @@ int compat_printk(const char *fmt, ...) return ret; } +#include "read_write.h" + /* * Not all architectures have sys_utime, so implement this in terms * of sys_utimes. @@ -1149,9 +1151,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, const struct compat_iovec __user *uvector, unsigned long nr_segs, loff_t *pos) { - typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); - typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov=iovstack, *vector; @@ -1234,39 +1233,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->readv; + fnv = file->f_op->aio_read; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; - } - if (fnv) { - ret = fnv(file, iov, nr_segs, pos); - goto out; + fnv = file->f_op->aio_write; } - /* Do it by hand, with file-ops */ - ret = 0; - vector = iov; - while (nr_segs > 0) { - void __user * base; - size_t len; - ssize_t nr; - - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(file, base, len, pos); + if (fnv) + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, + pos, fnv); + else + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); - if (nr < 0) { - if (!ret) ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } out: if (iov != iovstack) kfree(iov); @@ -1294,7 +1272,7 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsign goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos); @@ -1317,7 +1295,7 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsig goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index e8bbed9dd268..e893e2be9ed4 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -53,8 +53,6 @@ const struct file_operations ext2_file_operations = { .open = generic_file_open, .release = ext2_release_file, .fsync = ext2_sync_file, - .readv = generic_file_readv, - .writev = generic_file_writev, .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5c762457bc89..e96c388047e0 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -112,8 +112,6 @@ const struct file_operations ext3_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, - .readv = generic_file_readv, - .writev = generic_file_writev, .ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, diff --git a/fs/fat/file.c b/fs/fat/file.c index d50fc47169c1..f4b8f8b3fbdd 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -127,8 +127,6 @@ const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, - .readv = generic_file_readv, - .writev = generic_file_writev, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4fc557c40cc0..66571eafbb1e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -680,14 +680,15 @@ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ -static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *off) +static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int err; struct fuse_req *req; struct fuse_in *in; struct fuse_copy_state cs; unsigned reqsize; + struct file *file = iocb->ki_filp; struct fuse_conn *fc = fuse_get_conn(file); if (!fc) return -EPERM; @@ -761,15 +762,6 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, return err; } -static ssize_t fuse_dev_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *off) -{ - struct iovec iov; - iov.iov_len = nbytes; - iov.iov_base = buf; - return fuse_dev_readv(file, &iov, 1, off); -} - /* Look up request on processing list by unique ID */ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) { @@ -814,15 +806,15 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, * it from the list and copy the rest of the buffer to the request. * The request is finished by calling request_end() */ -static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *off) +static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int err; unsigned nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(file); + struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); if (!fc) return -EPERM; @@ -898,15 +890,6 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, return err; } -static ssize_t fuse_dev_write(struct file *file, const char __user *buf, - size_t nbytes, loff_t *off) -{ - struct iovec iov; - iov.iov_len = nbytes; - iov.iov_base = (char __user *) buf; - return fuse_dev_writev(file, &iov, 1, off); -} - static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; @@ -1041,10 +1024,10 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, .llseek = no_llseek, - .read = fuse_dev_read, - .readv = fuse_dev_readv, - .write = fuse_dev_write, - .writev = fuse_dev_writev, + .read = do_sync_read, + .aio_read = fuse_dev_read, + .write = do_sync_write, + .aio_write = fuse_dev_write, .poll = fuse_dev_poll, .release = fuse_dev_release, .fasync = fuse_dev_fasync, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 322e876c35ed..4908c38a5885 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -389,8 +389,6 @@ static const struct file_operations hostfs_file_fops = { .sendfile = generic_file_sendfile, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .readv = generic_file_readv, - .writev = generic_file_writev, .write = generic_file_write, .mmap = generic_file_mmap, .open = hostfs_file_open, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 1c9745be5ada..f535f2911c12 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -108,8 +108,6 @@ const struct file_operations jfs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .readv = generic_file_readv, - .writev = generic_file_writev, .sendfile = generic_file_sendfile, .fsync = jfs_fsync, .release = jfs_release, diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 0c46f5c86b71..2f9b5a0953ff 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2298,11 +2298,9 @@ const struct file_operations ntfs_file_ops = { .llseek = generic_file_llseek, /* Seek inside file. */ .read = generic_file_read, /* Read from file. */ .aio_read = generic_file_aio_read, /* Async read from file. */ - .readv = generic_file_readv, /* Read from file. */ #ifdef NTFS_RW .write = ntfs_file_write, /* Write to file. */ .aio_write = ntfs_file_aio_write, /* Async write to file. */ - .writev = ntfs_file_writev, /* Write to file. */ /*.release = ,*/ /* Last file is closed. See fs/ext2/file.c:: ext2_release_file() for diff --git a/fs/pipe.c b/fs/pipe.c index f3b6f71e9d0b..2e60e1c8815e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -218,9 +218,10 @@ static struct pipe_buf_operations anon_pipe_buf_ops = { }; static ssize_t -pipe_readv(struct file *filp, const struct iovec *_iov, - unsigned long nr_segs, loff_t *ppos) +pipe_read(struct kiocb *iocb, const struct iovec *_iov, + unsigned long nr_segs, loff_t pos) { + struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; struct pipe_inode_info *pipe; int do_wakeup; @@ -330,17 +331,10 @@ redo: } static ssize_t -pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ - struct iovec iov = { .iov_base = buf, .iov_len = count }; - - return pipe_readv(filp, &iov, 1, ppos); -} - -static ssize_t -pipe_writev(struct file *filp, const struct iovec *_iov, - unsigned long nr_segs, loff_t *ppos) +pipe_write(struct kiocb *iocb, const struct iovec *_iov, + unsigned long nr_segs, loff_t ppos) { + struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; struct pipe_inode_info *pipe; ssize_t ret; @@ -509,15 +503,6 @@ out: return ret; } -static ssize_t -pipe_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return pipe_writev(filp, &iov, 1, ppos); -} - static ssize_t bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { @@ -736,8 +721,8 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) */ const struct file_operations read_fifo_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, + .read = do_sync_read, + .aio_read = pipe_read, .write = bad_pipe_w, .poll = pipe_poll, .ioctl = pipe_ioctl, @@ -749,8 +734,8 @@ const struct file_operations read_fifo_fops = { const struct file_operations write_fifo_fops = { .llseek = no_llseek, .read = bad_pipe_r, - .write = pipe_write, - .writev = pipe_writev, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, @@ -760,10 +745,10 @@ const struct file_operations write_fifo_fops = { const struct file_operations rdwr_fifo_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, - .write = pipe_write, - .writev = pipe_writev, + .read = do_sync_read, + .aio_read = pipe_read, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, @@ -773,8 +758,8 @@ const struct file_operations rdwr_fifo_fops = { static struct file_operations read_pipe_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, + .read = do_sync_read, + .aio_read = pipe_read, .write = bad_pipe_w, .poll = pipe_poll, .ioctl = pipe_ioctl, @@ -786,8 +771,8 @@ static struct file_operations read_pipe_fops = { static struct file_operations write_pipe_fops = { .llseek = no_llseek, .read = bad_pipe_r, - .write = pipe_write, - .writev = pipe_writev, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, @@ -797,10 +782,10 @@ static struct file_operations write_pipe_fops = { static struct file_operations rdwr_pipe_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, - .write = pipe_write, - .writev = pipe_writev, + .read = do_sync_read, + .aio_read = pipe_read, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, diff --git a/fs/read_write.c b/fs/read_write.c index 679dd535380f..32d54cca9bd9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -15,6 +15,7 @@ #include #include #include +#include "read_write.h" #include #include @@ -450,6 +451,62 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ +ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_left = len; + kiocb.ki_nbytes = len; + + for (;;) { + ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; + wait_on_retry_sync_kiocb(&kiocb); + } + + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +/* Do it by hand, with file-ops */ +ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, + unsigned long nr_segs, loff_t *ppos, io_fn_t fn) +{ + struct iovec *vector = iov; + ssize_t ret = 0; + + while (nr_segs > 0) { + void __user *base; + size_t len; + ssize_t nr; + + base = vector->iov_base; + len = vector->iov_len; + vector++; + nr_segs--; + + nr = fn(filp, base, len, ppos); + + if (nr < 0) { + if (!ret) + ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } + + return ret; +} + /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) @@ -457,12 +514,9 @@ static ssize_t do_readv_writev(int type, struct file *file, const struct iovec __user * uvector, unsigned long nr_segs, loff_t *pos) { - typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); - typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *vector; + struct iovec *iov = iovstack; ssize_t ret; int seg; io_fn_t fn; @@ -532,39 +586,18 @@ static ssize_t do_readv_writev(int type, struct file *file, fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->readv; + fnv = file->f_op->aio_read; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; - } - if (fnv) { - ret = fnv(file, iov, nr_segs, pos); - goto out; + fnv = file->f_op->aio_write; } - /* Do it by hand, with file-ops */ - ret = 0; - vector = iov; - while (nr_segs > 0) { - void __user * base; - size_t len; - ssize_t nr; - - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(file, base, len, pos); + if (fnv) + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, + pos, fnv); + else + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); - if (nr < 0) { - if (!ret) ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } out: if (iov != iovstack) kfree(iov); @@ -585,7 +618,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -598,7 +631,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); diff --git a/fs/read_write.h b/fs/read_write.h new file mode 100644 index 000000000000..d07b954c6e0c --- /dev/null +++ b/fs/read_write.h @@ -0,0 +1,14 @@ +/* + * This file is only for sharing some helpers from read_write.c with compat.c. + * Don't use anywhere else. + */ + + +typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); +typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, + unsigned long, loff_t); + +ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); +ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, + unsigned long nr_segs, loff_t *ppos, io_fn_t fn); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 4737971c6a39..d93d8dd1958d 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -123,96 +123,6 @@ xfs_file_aio_write_invis( return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } -STATIC inline ssize_t -__xfs_file_readv( - struct file *file, - const struct iovec *iov, - int ioflags, - unsigned long nr_segs, - loff_t *ppos) -{ - struct inode *inode = file->f_mapping->host; - bhv_vnode_t *vp = vn_from_inode(inode); - struct kiocb kiocb; - ssize_t rval; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *ppos; - - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - rval = bhv_vop_read(vp, &kiocb, iov, nr_segs, - &kiocb.ki_pos, ioflags, NULL); - - *ppos = kiocb.ki_pos; - return rval; -} - -STATIC ssize_t -xfs_file_readv( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_readv(file, iov, 0, nr_segs, ppos); -} - -STATIC ssize_t -xfs_file_readv_invis( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos); -} - -STATIC inline ssize_t -__xfs_file_writev( - struct file *file, - const struct iovec *iov, - int ioflags, - unsigned long nr_segs, - loff_t *ppos) -{ - struct inode *inode = file->f_mapping->host; - bhv_vnode_t *vp = vn_from_inode(inode); - struct kiocb kiocb; - ssize_t rval; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *ppos; - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - - rval = bhv_vop_write(vp, &kiocb, iov, nr_segs, - &kiocb.ki_pos, ioflags, NULL); - - *ppos = kiocb.ki_pos; - return rval; -} - -STATIC ssize_t -xfs_file_writev( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_writev(file, iov, 0, nr_segs, ppos); -} - -STATIC ssize_t -xfs_file_writev_invis( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos); -} - STATIC ssize_t xfs_file_sendfile( struct file *filp, @@ -540,8 +450,6 @@ const struct file_operations xfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, - .readv = xfs_file_readv, - .writev = xfs_file_writev, .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, .sendfile = xfs_file_sendfile, @@ -565,8 +473,6 @@ const struct file_operations xfs_invis_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, - .readv = xfs_file_readv_invis, - .writev = xfs_file_writev_invis, .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, .sendfile = xfs_file_sendfile_invis, diff --git a/include/linux/fs.h b/include/linux/fs.h index 257bae16f545..afb6e6f89a01 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1113,8 +1113,6 @@ struct file_operations { int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); - ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -1734,10 +1732,6 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); -ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); diff --git a/mm/filemap.c b/mm/filemap.c index f6c1d22b504f..48497094ae83 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2421,42 +2421,6 @@ ssize_t generic_file_write(struct file *file, const char __user *buf, } EXPORT_SYMBOL(generic_file_write); -ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} -EXPORT_SYMBOL(generic_file_readv); - -ssize_t generic_file_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret; - - mutex_lock(&inode->i_mutex); - ret = __generic_file_write_nolock(file, iov, nr_segs, ppos); - mutex_unlock(&inode->i_mutex); - - if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - - err = sync_page_range(inode, mapping, *ppos - ret, ret); - if (err < 0) - ret = err; - } - return ret; -} -EXPORT_SYMBOL(generic_file_writev); - /* * Called under i_mutex for writes to S_ISREG files. Returns -EIO if something * went wrong during pagecache shootdown. diff --git a/net/socket.c b/net/socket.c index df92e4252749..01918f7a301a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -110,10 +110,6 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); -static ssize_t sock_readv(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); -static ssize_t sock_writev(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); @@ -136,8 +132,6 @@ static struct file_operations socket_file_ops = { .open = sock_no_open, /* special open code to disallow open via /proc */ .release = sock_close, .fasync = sock_fasync, - .readv = sock_readv, - .writev = sock_writev, .sendpage = sock_sendpage, .splice_write = generic_splice_sendpage, }; @@ -700,23 +694,6 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); } -static ssize_t sock_readv(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb iocb; - struct sock_iocb siocb; - struct msghdr msg; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - - ret = do_sock_read(&msg, &iocb, file, iov, nr_segs); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -759,23 +736,6 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, return __sock_sendmsg(iocb, sock, msg, size); } -static ssize_t sock_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct msghdr msg; - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - - ret = do_sock_write(&msg, &iocb, file, iov, nr_segs); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 891d7140553c..37b4b10850ae 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2852,8 +2852,8 @@ static ssize_t snd_pcm_write(struct file *file, const char __user *buf, return result; } -static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector, - unsigned long count, loff_t * offset) +static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct snd_pcm_file *pcm_file; @@ -2864,22 +2864,22 @@ static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector, void __user **bufs; snd_pcm_uframes_t frames; - pcm_file = file->private_data; + pcm_file = iocb->ki_filp->private_data; substream = pcm_file->substream; snd_assert(substream != NULL, return -ENXIO); runtime = substream->runtime; if (runtime->status->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; - if (count > 1024 || count != runtime->channels) + if (nr_segs > 1024 || nr_segs != runtime->channels) return -EINVAL; - if (!frame_aligned(runtime, _vector->iov_len)) + if (!frame_aligned(runtime, iov->iov_len)) return -EINVAL; - frames = bytes_to_samples(runtime, _vector->iov_len); - bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL); + frames = bytes_to_samples(runtime, iov->iov_len); + bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL); if (bufs == NULL) return -ENOMEM; - for (i = 0; i < count; ++i) - bufs[i] = _vector[i].iov_base; + for (i = 0; i < nr_segs; ++i) + bufs[i] = iov[i].iov_base; result = snd_pcm_lib_readv(substream, bufs, frames); if (result > 0) result = frames_to_bytes(runtime, result); @@ -2887,8 +2887,8 @@ static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector, return result; } -static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector, - unsigned long count, loff_t * offset) +static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; @@ -2898,7 +2898,7 @@ static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector, void __user **bufs; snd_pcm_uframes_t frames; - pcm_file = file->private_data; + pcm_file = iocb->ki_filp->private_data; substream = pcm_file->substream; snd_assert(substream != NULL, result = -ENXIO; goto end); runtime = substream->runtime; @@ -2906,17 +2906,17 @@ static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector, result = -EBADFD; goto end; } - if (count > 128 || count != runtime->channels || - !frame_aligned(runtime, _vector->iov_len)) { + if (nr_segs > 128 || nr_segs != runtime->channels || + !frame_aligned(runtime, iov->iov_len)) { result = -EINVAL; goto end; } - frames = bytes_to_samples(runtime, _vector->iov_len); - bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL); + frames = bytes_to_samples(runtime, iov->iov_len); + bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL); if (bufs == NULL) return -ENOMEM; - for (i = 0; i < count; ++i) - bufs[i] = _vector[i].iov_base; + for (i = 0; i < nr_segs; ++i) + bufs[i] = iov[i].iov_base; result = snd_pcm_lib_writev(substream, bufs, frames); if (result > 0) result = frames_to_bytes(runtime, result); @@ -3426,7 +3426,7 @@ struct file_operations snd_pcm_f_ops[2] = { { .owner = THIS_MODULE, .write = snd_pcm_write, - .writev = snd_pcm_writev, + .aio_write = snd_pcm_aio_write, .open = snd_pcm_playback_open, .release = snd_pcm_release, .poll = snd_pcm_playback_poll, @@ -3438,7 +3438,7 @@ struct file_operations snd_pcm_f_ops[2] = { { .owner = THIS_MODULE, .read = snd_pcm_read, - .readv = snd_pcm_readv, + .aio_read = snd_pcm_aio_read, .open = snd_pcm_capture_open, .release = snd_pcm_release, .poll = snd_pcm_capture_poll, -- cgit v1.2.3-71-gd317 From 543ade1fc901db4c3dbe9fb27241fb977f1f3eea Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 30 Sep 2006 23:28:48 -0700 Subject: [PATCH] Streamline generic_file_* interfaces and filemap cleanups This patch cleans up generic_file_*_read/write() interfaces. Christoph Hellwig gave me the idea for this clean ups. In a nutshell, all filesystems should set .aio_read/.aio_write methods and use do_sync_read/ do_sync_write() as their .read/.write methods. This allows us to cleanup all variants of generic_file_* routines. Final available interfaces: generic_file_aio_read() - read handler generic_file_aio_write() - write handler generic_file_aio_write_nolock() - no lock write handler __generic_file_aio_write_nolock() - internal worker routine Signed-off-by: Badari Pulavarty Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/raw.c | 15 ++------ fs/adfs/file.c | 6 ++-- fs/affs/file.c | 6 ++-- fs/bfs/file.c | 6 ++-- fs/block_dev.c | 12 ++----- fs/ext2/file.c | 4 +-- fs/fuse/file.c | 6 ++-- fs/hfs/inode.c | 6 ++-- fs/hfsplus/inode.c | 6 ++-- fs/hostfs/hostfs_kern.c | 4 +-- fs/hpfs/file.c | 6 ++-- fs/jffs/inode-v23.c | 6 ++-- fs/jffs2/file.c | 6 ++-- fs/jfs/file.c | 4 +-- fs/minix/file.c | 6 ++-- fs/ntfs/file.c | 2 +- fs/qnx4/file.c | 6 ++-- fs/ramfs/file-mmu.c | 6 ++-- fs/ramfs/file-nommu.c | 6 ++-- fs/read_write.c | 3 +- fs/smbfs/file.c | 24 ++++++++----- fs/sysv/file.c | 6 ++-- fs/udf/file.c | 16 +++++---- fs/ufs/file.c | 6 ++-- fs/xfs/linux-2.6/xfs_lrw.c | 4 ++- include/linux/fs.h | 5 --- mm/filemap.c | 87 +++------------------------------------------- 27 files changed, 105 insertions(+), 165 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 490db531e2d8..89b718e326e5 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -238,21 +238,10 @@ out: return err; } -static ssize_t raw_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec local_iov = { - .iov_base = (char __user *)buf, - .iov_len = count - }; - - return generic_file_write_nolock(file, &local_iov, 1, ppos); -} - static const struct file_operations raw_fops = { - .read = generic_file_read, + .read = do_sync_read, .aio_read = generic_file_aio_read, - .write = raw_file_write, + .write = do_sync_write, .aio_write = generic_file_aio_write_nolock, .open = raw_open, .release= raw_release, diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 1014b9f2117b..6101ea679cb1 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -27,10 +27,12 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, .fsync = file_fsync, - .write = generic_file_write, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .sendfile = generic_file_sendfile, }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 3de8590e4f6a..05b5e22de759 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -27,8 +27,10 @@ static int affs_file_release(struct inode *inode, struct file *filp); const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .open = affs_file_open, .release = affs_file_release, diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 3d5aca28a0a0..a9164a87f8de 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -19,8 +19,10 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 0f143094ef1d..bc8f27cc4483 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1154,14 +1154,6 @@ static int blkdev_close(struct inode * inode, struct file * filp) return blkdev_put(bdev); } -static ssize_t blkdev_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_write_nolock(file, &local_iov, 1, ppos); -} - static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); @@ -1181,8 +1173,8 @@ const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, - .read = generic_file_read, - .write = blkdev_file_write, + .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write_nolock, .mmap = generic_file_mmap, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index e893e2be9ed4..2dba473c524a 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -41,8 +41,8 @@ static int ext2_release_file (struct inode * inode, struct file * filp) */ const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .ioctl = ext2_ioctl, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5c4fcd1dbf59..183626868eea 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -753,8 +753,10 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) static const struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = fuse_file_mmap, .open = fuse_open, .flush = fuse_flush, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d05641c35fc9..02f5573e0349 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -601,8 +601,10 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, .fsync = file_fsync, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 0eb1a6092668..9e3675249633 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -282,8 +282,10 @@ static struct inode_operations hfsplus_file_inode_operations = { static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, .fsync = file_fsync, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 4908c38a5885..b6bd33ca3731 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -385,11 +385,11 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) static const struct file_operations hostfs_file_fops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, .sendfile = generic_file_sendfile, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .write = generic_file_write, + .write = do_sync_write, .mmap = generic_file_mmap, .open = hostfs_file_open, .release = NULL, diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index d9eb19b7b8ae..8b94d24855f0 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -113,7 +113,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf, { ssize_t retval; - retval = generic_file_write(file, buf, count, ppos); + retval = do_sync_write(file, buf, count, ppos); if (retval > 0) hpfs_i(file->f_dentry->d_inode)->i_dirty = 1; return retval; @@ -122,8 +122,10 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf, const struct file_operations hpfs_file_ops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .write = hpfs_file_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .release = hpfs_file_release, .fsync = hpfs_file_fsync, diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index f5cf9c93e243..068ef0de8de2 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1632,8 +1632,10 @@ static const struct file_operations jffs_file_operations = { .open = generic_file_open, .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .ioctl = jffs_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs_fsync, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 3ed6e3e120b6..242875f77cb3 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -42,8 +42,10 @@ const struct file_operations jffs2_file_operations = { .llseek = generic_file_llseek, .open = generic_file_open, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .ioctl = jffs2_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs2_fsync, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index f535f2911c12..976e90dc2d1b 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -103,8 +103,8 @@ struct inode_operations jfs_file_inode_operations = { const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, - .write = generic_file_write, - .read = generic_file_read, + .write = do_sync_write, + .read = do_sync_read, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, diff --git a/fs/minix/file.c b/fs/minix/file.c index 420b32882a10..40eac2e60d25 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -17,8 +17,10 @@ int minix_sync_file(struct file *, struct dentry *, int); const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = minix_sync_file, .sendfile = generic_file_sendfile, diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 2f9b5a0953ff..ae2fe0016d2c 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2296,7 +2296,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, const struct file_operations ntfs_file_ops = { .llseek = generic_file_llseek, /* Seek inside file. */ - .read = generic_file_read, /* Read from file. */ + .read = do_sync_read, /* Read from file. */ .aio_read = generic_file_aio_read, /* Async read from file. */ #ifdef NTFS_RW .write = ntfs_file_write, /* Write to file. */ diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 62af4b1348bd..467e5ac7280e 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -22,11 +22,13 @@ const struct file_operations qnx4_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, #ifdef CONFIG_QNX4FS_RW - .write = generic_file_write, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .fsync = qnx4_sync_file, #endif }; diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 86f14cacf641..0947fb57dcf3 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -33,8 +33,10 @@ const struct address_space_operations ramfs_aops = { }; const struct file_operations ramfs_file_operations = { - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 677139b48e00..bfe5dbf1002e 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -36,8 +36,10 @@ const struct address_space_operations ramfs_aops = { const struct file_operations ramfs_file_operations = { .mmap = ramfs_nommu_mmap, .get_unmapped_area = ramfs_nommu_get_unmapped_area, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, .llseek = generic_file_llseek, diff --git a/fs/read_write.c b/fs/read_write.c index 32d54cca9bd9..4ed839bcb91c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -22,7 +22,8 @@ const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_readonly_mmap, .sendfile = generic_file_sendfile, }; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index dae67048baba..50784d13c87b 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -214,13 +214,15 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset, } static ssize_t -smb_file_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) +smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { + struct file * file = iocb->ki_filp; struct dentry * dentry = file->f_dentry; ssize_t status; VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry), - (unsigned long) count, (unsigned long) *ppos); + (unsigned long) iocb->ki_left, (unsigned long) pos); status = smb_revalidate_inode(dentry); if (status) { @@ -233,7 +235,7 @@ smb_file_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) (long)dentry->d_inode->i_size, dentry->d_inode->i_flags, dentry->d_inode->i_atime); - status = generic_file_read(file, buf, count, ppos); + status = generic_file_aio_read(iocb, iov, nr_segs, pos); out: return status; } @@ -317,14 +319,16 @@ const struct address_space_operations smb_file_aops = { * Write to a file (through the page cache). */ static ssize_t -smb_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { + struct file * file = iocb->ki_filp; struct dentry * dentry = file->f_dentry; ssize_t result; VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry), - (unsigned long) count, (unsigned long) *ppos); + (unsigned long) iocb->ki_left, (unsigned long) pos); result = smb_revalidate_inode(dentry); if (result) { @@ -337,8 +341,8 @@ smb_file_write(struct file *file, const char __user *buf, size_t count, loff_t * if (result) goto out; - if (count > 0) { - result = generic_file_write(file, buf, count, ppos); + if (iocb->ki_left > 0) { + result = generic_file_aio_write(iocb, iov, nr_segs, pos); VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n", (long) file->f_pos, (long) dentry->d_inode->i_size, dentry->d_inode->i_mtime, dentry->d_inode->i_atime); @@ -402,8 +406,10 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) const struct file_operations smb_file_operations = { .llseek = remote_llseek, - .read = smb_file_read, - .write = smb_file_write, + .read = do_sync_read, + .aio_read = smb_file_aio_read, + .write = do_sync_write, + .aio_write = smb_file_aio_write, .ioctl = smb_ioctl, .mmap = smb_file_mmap, .open = smb_file_open, diff --git a/fs/sysv/file.c b/fs/sysv/file.c index a59e303135fa..47a4b728f15b 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -21,8 +21,10 @@ */ const struct file_operations sysv_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = sysv_sync_file, .sendfile = generic_file_sendfile, diff --git a/fs/udf/file.c b/fs/udf/file.c index a59e5f33daf6..7aedd552cba1 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -103,19 +103,21 @@ const struct address_space_operations udf_adinicb_aops = { .commit_write = udf_adinicb_commit_write, }; -static ssize_t udf_file_write(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t ppos) { ssize_t retval; + struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; int err, pos; + size_t count = iocb->ki_left; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) pos = inode->i_size; else - pos = *ppos; + pos = ppos; if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + pos + count)) @@ -136,7 +138,7 @@ static ssize_t udf_file_write(struct file * file, const char __user * buf, } } - retval = generic_file_write(file, buf, count, ppos); + retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); if (retval > 0) mark_inode_dirty(inode); @@ -249,11 +251,13 @@ static int udf_release_file(struct inode * inode, struct file * filp) } const struct file_operations udf_file_operations = { - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .ioctl = udf_ioctl, .open = generic_file_open, .mmap = generic_file_mmap, - .write = udf_file_write, + .write = do_sync_write, + .aio_write = udf_file_aio_write, .release = udf_release_file, .fsync = udf_fsync_file, .sendfile = generic_file_sendfile, diff --git a/fs/ufs/file.c b/fs/ufs/file.c index a9c6e5f04fae..1e096323bad4 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -53,8 +53,10 @@ static int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) const struct file_operations ufs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .open = generic_file_open, .fsync = ufs_sync_file, diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 55992b40353c..fa842f1c9fa2 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -279,7 +279,9 @@ xfs_read( xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); - ret = __generic_file_aio_read(iocb, iovp, segs, offset); + + iocb->ki_pos = *offset; + ret = generic_file_aio_read(iocb, iovp, segs, *offset); if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if (ret > 0) diff --git a/include/linux/fs.h b/include/linux/fs.h index afb6e6f89a01..011129f8803e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1699,11 +1699,8 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); -extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, unsigned long, loff_t); @@ -1713,8 +1710,6 @@ extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, loff_t *, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, diff --git a/mm/filemap.c b/mm/filemap.c index 48497094ae83..ec469235985d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1149,13 +1149,14 @@ success: * that can use the page cache directly. */ ssize_t -__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) +generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; ssize_t retval; unsigned long seg; size_t count; + loff_t *ppos = &iocb->ki_pos; count = 0; for (seg = 0; seg < nr_segs; seg++) { @@ -1179,7 +1180,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { - loff_t pos = *ppos, size; + loff_t size; struct address_space *mapping; struct inode *inode; @@ -1223,32 +1224,8 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, out: return retval; } -EXPORT_SYMBOL(__generic_file_aio_read); - -ssize_t -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - BUG_ON(iocb->ki_pos != pos); - return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos); -} EXPORT_SYMBOL(generic_file_aio_read); -ssize_t -generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ - struct iovec local_iov = { .iov_base = buf, .iov_len = count }; - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} -EXPORT_SYMBOL(generic_file_read); - int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { ssize_t written; @@ -2339,38 +2316,6 @@ ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, } EXPORT_SYMBOL(generic_file_aio_write_nolock); -static ssize_t -__generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *ppos; - ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} - -ssize_t -generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *ppos; - ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - *ppos = kiocb.ki_pos; - return ret; -} -EXPORT_SYMBOL(generic_file_write_nolock); - ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -2397,30 +2342,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } EXPORT_SYMBOL(generic_file_aio_write); -ssize_t generic_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; - - mutex_lock(&inode->i_mutex); - ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); - mutex_unlock(&inode->i_mutex); - - if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - ssize_t err; - - err = sync_page_range(inode, mapping, *ppos - ret, ret); - if (err < 0) - ret = err; - } - return ret; -} -EXPORT_SYMBOL(generic_file_write); - /* * Called under i_mutex for writes to S_ISREG files. Returns -EIO if something * went wrong during pagecache shootdown. -- cgit v1.2.3-71-gd317 From eed4e51fb60c3863c134a5e9f6006b29805ead97 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 30 Sep 2006 23:28:49 -0700 Subject: [PATCH] Add vector AIO support This work is initially done by Zach Brown to add support for vectored aio. These are the core changes for AIO to support IOCB_CMD_PREADV/IOCB_CMD_PWRITEV. [akpm@osdl.org: huge build fix] Signed-off-by: Zach Brown Signed-off-by: Christoph Hellwig Signed-off-by: Badari Pulavarty Acked-by: Benjamin LaHaise Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 171 +++++++++++++++++++++++++++++++++--------------- fs/read_write.c | 129 +++++++++++++++++++++--------------- include/linux/aio.h | 4 ++ include/linux/aio_abi.h | 2 + include/linux/fs.h | 5 ++ 5 files changed, 203 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 27ff56540c73..2e0d1505ee36 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) req->ki_retry = NULL; req->ki_dtor = NULL; req->private = NULL; + req->ki_iovec = NULL; INIT_LIST_HEAD(&req->ki_run_list); /* Check if the completion queue has enough free space to @@ -460,6 +461,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) if (req->ki_dtor) req->ki_dtor(req); + if (req->ki_iovec != &req->ki_inline_vec) + kfree(req->ki_iovec); kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; @@ -1301,69 +1304,63 @@ asmlinkage long sys_io_destroy(aio_context_t ctx) return -EINVAL; } -/* - * aio_p{read,write} are the default ki_retry methods for - * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially - * multiple calls to f_op->aio_read(). They loop around partial progress - * instead of returning -EIOCBRETRY because they don't have the means to call - * kick_iocb(). - */ -static ssize_t aio_pread(struct kiocb *iocb) +static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret = 0; - - do { - iocb->ki_inline_vec.iov_base = iocb->ki_buf; - iocb->ki_inline_vec.iov_len = iocb->ki_left; - - ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec, - 1, iocb->ki_pos); - /* - * Can't just depend on iocb->ki_left to determine - * whether we are done. This may have been a short read. - */ - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; + struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg]; + + BUG_ON(ret <= 0); + + while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) { + ssize_t this = min((ssize_t)iov->iov_len, ret); + iov->iov_base += this; + iov->iov_len -= this; + iocb->ki_left -= this; + ret -= this; + if (iov->iov_len == 0) { + iocb->ki_cur_seg++; + iov++; } + } - /* - * For pipes and sockets we return once we have some data; for - * regular files we retry till we complete the entire read or - * find that we can't read any more data (e.g short reads). - */ - } while (ret > 0 && iocb->ki_left > 0 && - !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); - - /* This means we must have transferred all that we could */ - /* No need to retry anymore */ - if ((ret == 0) || (iocb->ki_left == 0)) - ret = iocb->ki_nbytes - iocb->ki_left; - - return ret; + /* the caller should not have done more io than what fit in + * the remaining iovecs */ + BUG_ON(ret > 0 && iocb->ki_left == 0); } -/* see aio_pread() */ -static ssize_t aio_pwrite(struct kiocb *iocb) +static ssize_t aio_rw_vect_retry(struct kiocb *iocb) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t (*rw_op)(struct kiocb *, const struct iovec *, + unsigned long, loff_t); ssize_t ret = 0; + unsigned short opcode; + + if ((iocb->ki_opcode == IOCB_CMD_PREADV) || + (iocb->ki_opcode == IOCB_CMD_PREAD)) { + rw_op = file->f_op->aio_read; + opcode = IOCB_CMD_PREADV; + } else { + rw_op = file->f_op->aio_write; + opcode = IOCB_CMD_PWRITEV; + } do { - iocb->ki_inline_vec.iov_base = iocb->ki_buf; - iocb->ki_inline_vec.iov_len = iocb->ki_left; - - ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec, - 1, iocb->ki_pos); - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; - } - } while (ret > 0 && iocb->ki_left > 0); + ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], + iocb->ki_nr_segs - iocb->ki_cur_seg, + iocb->ki_pos); + if (ret > 0) + aio_advance_iovec(iocb, ret); + + /* retry all partial writes. retry partial reads as long as its a + * regular file. */ + } while (ret > 0 && iocb->ki_left > 0 && + (opcode == IOCB_CMD_PWRITEV || + (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); + /* This means we must have transferred all that we could */ + /* No need to retry anymore */ if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; @@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *iocb) return ret; } +static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb) +{ + ssize_t ret; + + ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, + kiocb->ki_nbytes, 1, + &kiocb->ki_inline_vec, &kiocb->ki_iovec); + if (ret < 0) + goto out; + + kiocb->ki_nr_segs = kiocb->ki_nbytes; + kiocb->ki_cur_seg = 0; + /* ki_nbytes/left now reflect bytes instead of segs */ + kiocb->ki_nbytes = ret; + kiocb->ki_left = ret; + + ret = 0; +out: + return ret; +} + +static ssize_t aio_setup_single_vector(struct kiocb *kiocb) +{ + kiocb->ki_iovec = &kiocb->ki_inline_vec; + kiocb->ki_iovec->iov_base = kiocb->ki_buf; + kiocb->ki_iovec->iov_len = kiocb->ki_left; + kiocb->ki_nr_segs = 1; + kiocb->ki_cur_seg = 0; + kiocb->ki_nbytes = kiocb->ki_left; + return 0; +} + /* * aio_setup_iocb: * Performs the initial checks and aio retry method @@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) ret = security_file_permission(file, MAY_READ); if (unlikely(ret)) break; + ret = aio_setup_single_vector(kiocb); + if (ret) + break; ret = -EINVAL; if (file->f_op->aio_read) - kiocb->ki_retry = aio_pread; + kiocb->ki_retry = aio_rw_vect_retry; break; case IOCB_CMD_PWRITE: ret = -EBADF; @@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) ret = security_file_permission(file, MAY_WRITE); if (unlikely(ret)) break; + ret = aio_setup_single_vector(kiocb); + if (ret) + break; + ret = -EINVAL; + if (file->f_op->aio_write) + kiocb->ki_retry = aio_rw_vect_retry; + break; + case IOCB_CMD_PREADV: + ret = -EBADF; + if (unlikely(!(file->f_mode & FMODE_READ))) + break; + ret = security_file_permission(file, MAY_READ); + if (unlikely(ret)) + break; + ret = aio_setup_vectored_rw(READ, kiocb); + if (ret) + break; + ret = -EINVAL; + if (file->f_op->aio_read) + kiocb->ki_retry = aio_rw_vect_retry; + break; + case IOCB_CMD_PWRITEV: + ret = -EBADF; + if (unlikely(!(file->f_mode & FMODE_WRITE))) + break; + ret = security_file_permission(file, MAY_WRITE); + if (unlikely(ret)) + break; + ret = aio_setup_vectored_rw(WRITE, kiocb); + if (ret) + break; ret = -EINVAL; if (file->f_op->aio_write) - kiocb->ki_retry = aio_pwrite; + kiocb->ki_retry = aio_rw_vect_retry; break; case IOCB_CMD_FDSYNC: ret = -EINVAL; diff --git a/fs/read_write.c b/fs/read_write.c index 4ed839bcb91c..f792000a28e6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -511,6 +511,74 @@ ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer) + { + unsigned long seg; + ssize_t ret; + struct iovec *iov = fast_pointer; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) { + ret = 0; + goto out; + } + + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (nr_segs > UIO_MAXIOV) { + ret = -EINVAL; + goto out; + } + if (nr_segs > fast_segs) { + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) { + ret = -ENOMEM; + goto out; + } + } + if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { + ret = -EFAULT; + goto out; + } + + /* + * According to the Single Unix Specification we should return EINVAL + * if an element length is < 0 when cast to ssize_t or if the + * total length would overflow the ssize_t return value of the + * system call. + */ + ret = 0; + for (seg = 0; seg < nr_segs; seg++) { + void __user *buf = iov[seg].iov_base; + ssize_t len = (ssize_t)iov[seg].iov_len; + + /* see if we we're about to use an invalid len or if + * it's about to overflow ssize_t */ + if (len < 0 || (ret + len < ret)) { + ret = -EINVAL; + goto out; + } + if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { + ret = -EFAULT; + goto out; + } + + ret += len; + } +out: + *ret_pointer = iov; + return ret; +} + static ssize_t do_readv_writev(int type, struct file *file, const struct iovec __user * uvector, unsigned long nr_segs, loff_t *pos) @@ -519,64 +587,20 @@ static ssize_t do_readv_writev(int type, struct file *file, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; ssize_t ret; - int seg; io_fn_t fn; iov_fn_t fnv; - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - ret = 0; - if (nr_segs == 0) - goto out; - - /* - * First get the "struct iovec" from user memory and - * verify all the pointers - */ - ret = -EINVAL; - if (nr_segs > UIO_MAXIOV) - goto out; - if (!file->f_op) - goto out; - if (nr_segs > UIO_FASTIOV) { - ret = -ENOMEM; - iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - goto out; - } - ret = -EFAULT; - if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) + if (!file->f_op) { + ret = -EINVAL; goto out; + } - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t - * - * Be careful here because iov_len is a size_t not an ssize_t - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; - ssize_t len = (ssize_t)iov[seg].iov_len; - - if (len < 0) /* size_t not fitting an ssize_t .. */ - goto out; - if (unlikely(!access_ok(vrfy_dir(type), buf, len))) - goto Efault; - tot_len += len; - if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ - goto out; - } - if (tot_len == 0) { - ret = 0; + ret = rw_copy_check_uvector(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), iovstack, &iov); + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -609,9 +633,6 @@ out: fsnotify_modify(file->f_dentry); } return ret; -Efault: - ret = -EFAULT; - goto out; } ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, diff --git a/include/linux/aio.h b/include/linux/aio.h index 58349e58b749..5722568fc71e 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -7,6 +7,7 @@ #include #include +#include #define AIO_MAXSEGS 4 #define AIO_KIOGRP_NR_ATOMIC 8 @@ -114,6 +115,9 @@ struct kiocb { long ki_kicked; /* just for testing */ long ki_queued; /* just for testing */ struct iovec ki_inline_vec; /* inline vector */ + struct iovec *ki_iovec; + unsigned long ki_nr_segs; + unsigned long ki_cur_seg; struct list_head ki_list; /* the aio core uses this * for cancellation */ diff --git a/include/linux/aio_abi.h b/include/linux/aio_abi.h index 30fdcc89d142..3466b1d0ffd2 100644 --- a/include/linux/aio_abi.h +++ b/include/linux/aio_abi.h @@ -41,6 +41,8 @@ enum { * IOCB_CMD_POLL = 5, */ IOCB_CMD_NOOP = 6, + IOCB_CMD_PREADV = 7, + IOCB_CMD_PWRITEV = 8, }; /* read() from /dev/aio returns these structures. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 011129f8803e..4bb70871873f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1150,6 +1150,11 @@ struct inode_operations { struct seq_file; +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer); + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, -- cgit v1.2.3-71-gd317 From 31608214fe21dc31d8046679054ab033b1fe5cf1 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Sat, 30 Sep 2006 23:28:50 -0700 Subject: [PATCH] clean up unused kiocb variables Signed-off-by: Ken Chen Acked-by: Zach Brown Cc: Suparna Bhattacharya Cc: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 5722568fc71e..0d71c0041f13 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -112,8 +112,6 @@ struct kiocb { char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ long ki_retried; /* just for testing */ - long ki_kicked; /* just for testing */ - long ki_queued; /* just for testing */ struct iovec ki_inline_vec; /* inline vector */ struct iovec *ki_iovec; unsigned long ki_nr_segs; -- cgit v1.2.3-71-gd317 From f3cef7a99469afc159fec3a61b42dc7ca5b6824f Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:28:55 -0700 Subject: [PATCH] csa: basic accounting over taskstats Add some basic accounting fields to the taskstats struct, add a new kernel/tsacct.c to handle basic accounting data handling upon exit. A handle is added to taskstats.c to invoke the basic accounting data handling. Signed-off-by: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Cc: "Michal Piotrowski" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 29 ++++++++++++++---- include/linux/tsacct_kern.h | 19 ++++++++++++ kernel/Makefile | 2 +- kernel/taskstats.c | 4 +++ kernel/tsacct.c | 72 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 include/linux/tsacct_kern.h create mode 100644 kernel/tsacct.c (limited to 'include/linux') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index f1cb6cddd19d..af93a63a5092 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -2,6 +2,7 @@ * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 + * (C) Jay Lan, SGI, 2006 * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License @@ -29,16 +30,18 @@ * c) add new fields after version comment; maintain 64-bit alignment */ -#define TASKSTATS_VERSION 1 + +#define TASKSTATS_VERSION 2 +#define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN + * in linux/sched.h */ struct taskstats { /* Version 1 */ __u16 version; - __u16 padding[3]; /* Userspace should not interpret the padding - * field which can be replaced by useful - * fields if struct taskstats is extended. - */ + __u32 ac_exitcode; /* Exit status */ + __u8 ac_flag; /* Record flags */ + __u8 ac_nice; /* task_nice */ /* Delay accounting fields start * @@ -88,6 +91,22 @@ struct taskstats { __u64 cpu_run_virtual_total; /* Delay accounting fields end */ /* version 1 ends here */ + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + __u8 ac_sched; /* Scheduling discipline */ + __u8 ac_pad[3]; + __u32 ac_uid; /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + __u64 ac_etime; /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault */ + __u64 ac_majflt; /* Major Page Fault */ + /* Basic Accounting Fields end */ }; diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h new file mode 100644 index 000000000000..7e8196a02118 --- /dev/null +++ b/include/linux/tsacct_kern.h @@ -0,0 +1,19 @@ +/* + * tsacct_kern.h - kernel header for system accounting over taskstats interface + * + * Copyright (C) Jay Lan SGI + */ + +#ifndef _LINUX_TSACCT_KERN_H +#define _LINUX_TSACCT_KERN_H + +#include + +#ifdef CONFIG_TASKSTATS +extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk); +#else +static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{} +#endif /* CONFIG_TASKSTATS */ + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index e210e8cf7237..aacaafb28b9d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o -obj-$(CONFIG_TASKSTATS) += taskstats.o +obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c451af2ddb50..6c38dce88e8c 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -198,7 +199,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, */ delayacct_add_tsk(stats, tsk); + + /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; + bacct_add_tsk(stats, tsk); /* Define err: label here if needed */ put_task_struct(tsk); diff --git a/kernel/tsacct.c b/kernel/tsacct.c new file mode 100644 index 000000000000..899067950a88 --- /dev/null +++ b/kernel/tsacct.c @@ -0,0 +1,72 @@ +/* + * tsacct.c - System accounting over taskstats interface + * + * Copyright (C) Jay Lan, + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + + +#define USEC_PER_TICK (USEC_PER_SEC/HZ) +/* + * fill in basic accounting fields + */ +void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{ + struct timespec uptime, ts; + s64 ac_etime; + + BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); + + /* calculate task elapsed time in timespec */ + do_posix_clock_monotonic_gettime(&uptime); + ts = timespec_sub(uptime, current->group_leader->start_time); + /* rebase elapsed time to usec */ + ac_etime = timespec_to_ns(&ts); + do_div(ac_etime, NSEC_PER_USEC); + stats->ac_etime = ac_etime; + stats->ac_btime = xtime.tv_sec - ts.tv_sec; + if (thread_group_leader(tsk)) { + stats->ac_exitcode = tsk->exit_code; + if (tsk->flags & PF_FORKNOEXEC) + stats->ac_flag |= AFORK; + } + if (tsk->flags & PF_SUPERPRIV) + stats->ac_flag |= ASU; + if (tsk->flags & PF_DUMPCORE) + stats->ac_flag |= ACORE; + if (tsk->flags & PF_SIGNALED) + stats->ac_flag |= AXSIG; + stats->ac_nice = task_nice(tsk); + stats->ac_sched = tsk->policy; + stats->ac_uid = tsk->uid; + stats->ac_gid = tsk->gid; + stats->ac_pid = tsk->pid; + stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0; + stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; + stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; + stats->ac_minflt = tsk->min_flt; + stats->ac_majflt = tsk->maj_flt; + /* Each process gets a minimum of one usec cpu time */ + if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) { + stats->ac_stime = 1; + } + + strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); +} + -- cgit v1.2.3-71-gd317 From 9acc1853519a0473620d424105f9d49ea5b4e62e Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:28:58 -0700 Subject: [PATCH] csa: Extended system accounting over taskstats Add extended system accounting handling over taskstats interface. A CONFIG_TASK_XACCT flag is created to enable the extended accounting code. Signed-off-by: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 11 +++++++++++ include/linux/tsacct_kern.h | 9 +++++++++ init/Kconfig | 9 +++++++++ kernel/taskstats.c | 4 ++++ kernel/tsacct.c | 19 +++++++++++++++++++ 5 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index af93a63a5092..3d2c304886b0 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -107,6 +107,17 @@ struct taskstats { __u64 ac_minflt; /* Minor Page Fault */ __u64 ac_majflt; /* Major Page Fault */ /* Basic Accounting Fields end */ + + /* Extended accounting fields start */ + __u64 acct_rss_mem1; /* accumulated rss usage */ + __u64 acct_vm_mem1; /* accumulated virtual memory usage */ + __u64 hiwater_rss; /* High-watermark of RSS usage */ + __u64 hiwater_vm; /* High-water virtual memory usage */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + /* Extended accounting fields end */ }; diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 7e8196a02118..74102dcae67a 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -16,4 +16,13 @@ static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *ts {} #endif /* CONFIG_TASKSTATS */ +#ifdef CONFIG_TASK_XACCT +extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); +#else +static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) +{} +#endif /* CONFIG_TASK_XACCT */ + #endif + + diff --git a/init/Kconfig b/init/Kconfig index d2d72704f875..f7a04d0daf07 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -257,6 +257,15 @@ config CC_OPTIMIZE_FOR_SIZE If unsure, say N. +config TASK_XACCT + bool "Enable extended accounting over taskstats (EXPERIMENTAL)" + depends on TASKSTATS + help + Collect extended task accounting data and send the data + to userland for processing over the taskstats interface. + + Say N if unsure. + config SYSCTL bool diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 6c38dce88e8c..5d6a8c54ee85 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -204,6 +205,9 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, stats->version = TASKSTATS_VERSION; bacct_add_tsk(stats, tsk); + /* fill in extended acct fields */ + xacct_add_tsk(stats, tsk); + /* Define err: label here if needed */ put_task_struct(tsk); return rc; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 899067950a88..410483490cf6 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -70,3 +70,22 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); } + +#ifdef CONFIG_TASK_XACCT +/* + * fill in extended accounting fields + */ +void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) +{ + stats->acct_rss_mem1 = p->acct_rss_mem1; + stats->acct_vm_mem1 = p->acct_vm_mem1; + if (p->mm) { + stats->hiwater_rss = p->mm->hiwater_rss; + stats->hiwater_vm = p->mm->hiwater_vm; + } + stats->read_char = p->rchar; + stats->write_char = p->wchar; + stats->read_syscalls = p->syscr; + stats->write_syscalls = p->syscw; +} +#endif -- cgit v1.2.3-71-gd317 From 8f0ab5147951267134612570604cf8341901a80c Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:28:59 -0700 Subject: [PATCH] csa: convert CONFIG tag for extended accounting routines There were a few accounting data/macros that are used in CSA but are #ifdef'ed inside CONFIG_BSD_PROCESS_ACCT. This patch is to change those ifdef's from CONFIG_BSD_PROCESS_ACCT to CONFIG_TASK_XACCT. A few defines are moved from kernel/acct.c and include/linux/acct.h to kernel/tsacct.c and include/linux/tsacct_kern.h. Signed-off-by: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 2 +- fs/exec.c | 2 +- include/linux/acct.h | 4 ---- include/linux/sched.h | 2 +- include/linux/tsacct_kern.h | 6 ++++++ kernel/acct.c | 30 ------------------------------ kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sched.c | 2 +- kernel/tsacct.c | 30 ++++++++++++++++++++++++++++++ 10 files changed, 42 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index 6b90bf35f61d..13fb08d096c4 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include /* siocdevprivate_ioctl */ diff --git a/fs/exec.c b/fs/exec.c index a8efe35176b0..0db3fc3c5f0f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/acct.h b/include/linux/acct.h index e86bae7324d2..0496d1f09952 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -124,16 +124,12 @@ extern void acct_auto_close(struct super_block *sb); extern void acct_init_pacct(struct pacct_struct *pacct); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); -extern void acct_update_integrals(struct task_struct *tsk); -extern void acct_clear_integrals(struct task_struct *tsk); #else #define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) #define acct_init_pacct(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) -#define acct_update_integrals(x) do { } while (0) -#define acct_clear_integrals(task) do { } while (0) #endif /* diff --git a/include/linux/sched.h b/include/linux/sched.h index fc4a9873ec10..4ddeb0f982fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -981,7 +981,7 @@ struct task_struct { wait_queue_t *io_wait; /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; -#if defined(CONFIG_BSD_PROCESS_ACCT) +#if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ clock_t acct_stimexpd; /* clock_t-converted stime since last update */ diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 74102dcae67a..7e50ac795b0b 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -18,9 +18,15 @@ static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *ts #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); +extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} +static inline void acct_update_integrals(struct task_struct *tsk) +{} +static inline void acct_clear_integrals(struct task_struct *tsk) +{} #endif /* CONFIG_TASK_XACCT */ #endif diff --git a/kernel/acct.c b/kernel/acct.c index f4330acead46..0aad5ca36a81 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -602,33 +602,3 @@ void acct_process(void) do_acct_process(file); fput(file); } - - -/** - * acct_update_integrals - update mm integral fields in task_struct - * @tsk: task_struct for accounting - */ -void acct_update_integrals(struct task_struct *tsk) -{ - if (likely(tsk->mm)) { - long delta = - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; - - if (delta == 0) - return; - tsk->acct_stimexpd = tsk->stime; - tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; - } -} - -/** - * acct_clear_integrals - clear the mm integral fields in task_struct - * @tsk: task_struct whose accounting fields are cleared - */ -void acct_clear_integrals(struct task_struct *tsk) -{ - tsk->acct_stimexpd = 0; - tsk->acct_rss_mem1 = 0; - tsk->acct_vm_mem1 = 0; -} diff --git a/kernel/exit.c b/kernel/exit.c index c189de2927ab..3b47f26985f2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 1c999f3e0b47..89f666491d1f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched.c b/kernel/sched.c index 74f169ac0773..2bbd948f0169 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 410483490cf6..47c71daa416f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -88,4 +88,34 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) stats->read_syscalls = p->syscr; stats->write_syscalls = p->syscw; } + + +/** + * acct_update_integrals - update mm integral fields in task_struct + * @tsk: task_struct for accounting + */ +void acct_update_integrals(struct task_struct *tsk) +{ + if (likely(tsk->mm)) { + long delta = + cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; + + if (delta == 0) + return; + tsk->acct_stimexpd = tsk->stime; + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + } +} + +/** + * acct_clear_integrals - clear the mm integral fields in task_struct + * @tsk: task_struct whose accounting fields are cleared + */ +void acct_clear_integrals(struct task_struct *tsk) +{ + tsk->acct_stimexpd = 0; + tsk->acct_rss_mem1 = 0; + tsk->acct_vm_mem1 = 0; +} #endif -- cgit v1.2.3-71-gd317 From db5fed26b2e0beed939b773dd5896077a1794d65 Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:29:00 -0700 Subject: [PATCH] csa accounting taskstats update ChangeLog: Feedbacks from Andrew Morton: - define TS_COMM_LEN to 32 - change acct_stimexpd field of task_struct to be of cputime_t, which is to be used to save the tsk->stime of last timer interrupt update. - a new Documentation/accounting/taskstats-struct.txt to describe fields of taskstats struct. Feedback from Balbir Singh: - keep the stime of a task to be zero when both stime and utime are zero as recoreded in task_struct. Misc: - convert accumulated RSS/VM from platform dependent pages-ticks to MBytes-usecs in the kernel Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/taskstats-struct.txt | 161 ++++++++++++++++++++++++++ include/linux/sched.h | 2 +- include/linux/taskstats.h | 40 +++++-- kernel/tsacct.c | 25 ++-- 4 files changed, 207 insertions(+), 21 deletions(-) create mode 100644 Documentation/accounting/taskstats-struct.txt (limited to 'include/linux') diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt new file mode 100644 index 000000000000..661c797eaf79 --- /dev/null +++ b/Documentation/accounting/taskstats-struct.txt @@ -0,0 +1,161 @@ +The struct taskstats +-------------------- + +This document contains an explanation of the struct taskstats fields. + +There are three different groups of fields in the struct taskstats: + +1) Common and basic accounting fields + If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and + the common fields and basic accounting fields are collected for + delivery at do_exit() of a task. +2) Delay accounting fields + These fields are placed between + /* Delay accounting fields start */ + and + /* Delay accounting fields end */ + Their values are collected if CONFIG_TASK_DELAY_ACCT is set. +3) Extended accounting fields + These fields are placed between + /* Extended accounting fields start */ + and + /* Extended accounting fields end */ + Their values are collected if CONFIG_TASK_XACCT is set. + +Future extension should add fields to the end of the taskstats struct, and +should not change the relative position of each field within the struct. + + +struct taskstats { + +1) Common and basic accounting fields: + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in . + * Each time the struct is changed, the value should be incremented. + */ + __u16 version; + + /* The exit code of a task. */ + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. + */ + __u8 ac_flag; /* Record flags */ + + /* The value of task_nice() of a task. */ + __u8 ac_nice; /* task_nice */ + + /* The name of the command that started this task. */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + + /* The scheduling discipline as set in task->policy field. */ + __u8 ac_sched; /* Scheduling discipline */ + + __u8 ac_pad[3]; + __u32 ac_uid; /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + + /* The time when a task begins, in [secs] since 1970. */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + + /* The elapsed time of a task, in [usec]. */ + __u64 ac_etime; /* Elapsed time [usec] */ + + /* The user CPU time of a task, in [usec]. */ + __u64 ac_utime; /* User CPU time [usec] */ + + /* The system CPU time of a task, in [usec]. */ + __u64 ac_stime; /* System CPU time [usec] */ + + /* The minor page fault count of a task, as set in task->min_flt. */ + __u64 ac_minflt; /* Minor Page Fault Count */ + + /* The major page fault count of a task, as set in task->maj_flt. */ + __u64 ac_majflt; /* Major Page Fault Count */ + + +2) Delay accounting fields: + /* Delay accounting fields start + * + * All values, until the comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ + + +3) Extended accounting fields + /* Extended accounting fields start */ + + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS usage in duration of a task, in KBytes. */ + __u64 hiwater_rss; /* High-watermark of RSS usage */ + + /* High watermark of VM usage in duration of a task, in KBytes. */ + __u64 hiwater_vm; /* High-water virtual memory usage */ + + /* The following four fields are I/O statistics of a task. */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + + /* Extended accounting fields end */ + +} diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ddeb0f982fb..7ef899c47c29 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -984,7 +984,7 @@ struct task_struct { #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ - clock_t acct_stimexpd; /* clock_t-converted stime since last update */ + cputime_t acct_stimexpd;/* stime since last update */ #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 3d2c304886b0..45248806ae9c 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -32,14 +32,21 @@ #define TASKSTATS_VERSION 2 -#define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ struct taskstats { - /* Version 1 */ + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in . + * Each time the struct is changed, the value should be incremented. + */ __u16 version; - __u32 ac_exitcode; /* Exit status */ + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. + */ __u8 ac_flag; /* Record flags */ __u8 ac_nice; /* task_nice */ @@ -104,15 +111,30 @@ struct taskstats { __u64 ac_etime; /* Elapsed time [usec] */ __u64 ac_utime; /* User CPU time [usec] */ __u64 ac_stime; /* SYstem CPU time [usec] */ - __u64 ac_minflt; /* Minor Page Fault */ - __u64 ac_majflt; /* Major Page Fault */ + __u64 ac_minflt; /* Minor Page Fault Count */ + __u64 ac_majflt; /* Major Page Fault Count */ /* Basic Accounting Fields end */ /* Extended accounting fields start */ - __u64 acct_rss_mem1; /* accumulated rss usage */ - __u64 acct_vm_mem1; /* accumulated virtual memory usage */ - __u64 hiwater_rss; /* High-watermark of RSS usage */ - __u64 hiwater_vm; /* High-water virtual memory usage */ + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS and virtual memory usage in duration of + * a task, in KBytes. + */ + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ + __u64 hiwater_vm; /* High-water VM usage, in KB */ + + /* The following four fields are I/O statistics of a task. */ __u64 read_char; /* bytes read */ __u64 write_char; /* bytes written */ __u64 read_syscalls; /* read syscalls */ diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 47c71daa416f..db443221ba5b 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -20,6 +20,7 @@ #include #include #include +#include #define USEC_PER_TICK (USEC_PER_SEC/HZ) @@ -62,33 +63,35 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; - /* Each process gets a minimum of one usec cpu time */ - if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) { - stats->ac_stime = 1; - } strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); } #ifdef CONFIG_TASK_XACCT + +#define KB 1024 +#define MB (1024*KB) /* * fill in extended accounting fields */ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) { - stats->acct_rss_mem1 = p->acct_rss_mem1; - stats->acct_vm_mem1 = p->acct_vm_mem1; + /* convert pages-jiffies to Mbyte-usec */ + stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; + stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; if (p->mm) { - stats->hiwater_rss = p->mm->hiwater_rss; - stats->hiwater_vm = p->mm->hiwater_vm; + /* adjust to KB unit */ + stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; + stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; } stats->read_char = p->rchar; stats->write_char = p->wchar; stats->read_syscalls = p->syscr; stats->write_syscalls = p->syscw; } - +#undef KB +#undef MB /** * acct_update_integrals - update mm integral fields in task_struct @@ -97,8 +100,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) void acct_update_integrals(struct task_struct *tsk) { if (likely(tsk->mm)) { - long delta = - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; + long delta = cputime_to_jiffies( + cputime_sub(tsk->stime, tsk->acct_stimexpd)); if (delta == 0) return; -- cgit v1.2.3-71-gd317 From 9a53c3a783c2fa9b969628e65695c11c3e51e673 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 30 Sep 2006 23:29:03 -0700 Subject: [PATCH] r/o bind mounts: unlink: monitor i_nlink When a filesystem decrements i_nlink to zero, it means that a write must be performed in order to drop the inode from the filesystem. We're shortly going to have keep filesystems from being remounted r/o between the time that this i_nlink decrement and that write occurs. So, add a little helper function to do the decrements. We'll tie into it in a bit to note when i_nlink hits zero. Signed-off-by: Dave Hansen Acked-by: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/core/inode.c | 7 ++++--- fs/autofs/root.c | 2 +- fs/autofs4/root.c | 2 +- fs/bfs/dir.c | 9 +++------ fs/cifs/inode.c | 10 +++++----- fs/coda/dir.c | 4 ++-- fs/ext2/namei.c | 2 +- fs/ext3/namei.c | 14 +++++++------- fs/hfs/dir.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hpfs/namei.c | 6 +++--- fs/jffs/inode-v23.c | 3 +-- fs/jffs2/dir.c | 6 +++--- fs/jfs/namei.c | 14 ++++++-------- fs/libfs.c | 10 +++++----- fs/minix/namei.c | 2 +- fs/msdos/namei.c | 9 ++++----- fs/nfs/dir.c | 6 +++--- fs/ocfs2/namei.c | 4 ++-- fs/qnx4/namei.c | 6 ++---- fs/reiserfs/namei.c | 6 +++--- fs/sysv/namei.c | 2 +- fs/udf/namei.c | 18 ++++++------------ fs/ufs/namei.c | 2 +- fs/vfat/namei.c | 9 ++++----- include/linux/fs.h | 7 ++++++- ipc/mqueue.c | 2 +- mm/shmem.c | 10 +++++----- 28 files changed, 83 insertions(+), 93 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index df3d152f0493..88002e45a6b4 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -332,7 +332,7 @@ static int usbfs_unlink (struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; mutex_lock(&inode->i_mutex); - dentry->d_inode->i_nlink--; + drop_nlink(dentry->d_inode); dput(dentry); mutex_unlock(&inode->i_mutex); d_delete(dentry); @@ -347,10 +347,11 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&inode->i_mutex); dentry_unhash(dentry); if (usbfs_empty(dentry)) { - dentry->d_inode->i_nlink -= 2; + drop_nlink(dentry->d_inode); + drop_nlink(dentry->d_inode); dput(dentry); inode->i_flags |= S_DEAD; - dir->i_nlink--; + drop_nlink(dir); error = 0; } mutex_unlock(&inode->i_mutex); diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 9cac08d6a873..54ad70731927 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -414,7 +414,7 @@ static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_time = (unsigned long)(struct autofs_dir_ent *)NULL; autofs_hash_delete(ent); - dir->i_nlink--; + drop_nlink(dir); d_drop(dentry); unlock_kernel(); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 563ef9d7da9f..348bec0982b0 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -676,7 +676,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_nlink = 0; if (dir->i_nlink) - dir->i_nlink--; + drop_nlink(dir); return 0; } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index dcf04cb13283..ce05d1643dd1 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -117,8 +117,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, inode->i_ino); if (err) { - inode->i_nlink--; - mark_inode_dirty(inode); + inode_dec_link_count(inode); iput(inode); unlock_kernel(); return err; @@ -196,9 +195,8 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry) mark_buffer_dirty(bh); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); - inode->i_nlink--; inode->i_ctime = dir->i_ctime; - mark_inode_dirty(inode); + inode_dec_link_count(inode); error = 0; out_brelse: @@ -249,9 +247,8 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(old_dir); if (new_inode) { - new_inode->i_nlink--; new_inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(new_inode); + inode_dec_link_count(new_inode); } mark_buffer_dirty(old_bh); error = 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 05f874c7441b..74441a17e186 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -590,7 +590,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) if (!rc) { if (direntry->d_inode) - direntry->d_inode->i_nlink--; + drop_nlink(direntry->d_inode); } else if (rc == -ENOENT) { d_drop(direntry); } else if (rc == -ETXTBSY) { @@ -609,7 +609,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) CIFS_MOUNT_MAP_SPECIAL_CHR); CIFSSMBClose(xid, pTcon, netfid); if (direntry->d_inode) - direntry->d_inode->i_nlink--; + drop_nlink(direntry->d_inode); } } else if (rc == -EACCES) { /* try only if r/o attribute set in local lookup data? */ @@ -663,7 +663,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { if (direntry->d_inode) - direntry->d_inode->i_nlink--; + drop_nlink(direntry->d_inode); } else if (rc == -ETXTBSY) { int oplock = FALSE; __u16 netfid; @@ -684,7 +684,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) CIFS_MOUNT_MAP_SPECIAL_CHR); CIFSSMBClose(xid, pTcon, netfid); if (direntry->d_inode) - direntry->d_inode->i_nlink--; + drop_nlink(direntry->d_inode); } /* BB if rc = -ETXTBUSY goto the rename logic BB */ } @@ -816,7 +816,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { - inode->i_nlink--; + drop_nlink(inode); i_size_write(direntry->d_inode,0); direntry->d_inode->i_nlink = 0; } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 8651ea6a23b7..0a2fd8bb7579 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -367,7 +367,7 @@ int coda_unlink(struct inode *dir, struct dentry *de) } coda_dir_changed(dir, 0); - de->d_inode->i_nlink--; + drop_nlink(de->d_inode); unlock_kernel(); return 0; @@ -394,7 +394,7 @@ int coda_rmdir(struct inode *dir, struct dentry *de) } coda_dir_changed(dir, -1); - de->d_inode->i_nlink--; + drop_nlink(de->d_inode); d_delete(de); unlock_kernel(); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 4ca824985321..e1af5b4cf80c 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -326,7 +326,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, ext2_set_link(new_dir, new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) - new_inode->i_nlink--; + drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { if (dir_de) { diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 235e77b52ea5..14c55adfae83 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1621,7 +1621,7 @@ static inline void ext3_inc_count(handle_t *handle, struct inode *inode) static inline void ext3_dec_count(handle_t *handle, struct inode *inode) { - inode->i_nlink--; + drop_nlink(inode); } static int ext3_add_nondir(handle_t *handle, @@ -1743,7 +1743,7 @@ retry: inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; dir_block = ext3_bread (handle, inode, 0, 1, &err); if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ + drop_nlink(inode); /* is this nlink == 0? */ ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2053,7 +2053,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) ext3_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); - dir->i_nlink--; + drop_nlink(dir); ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); @@ -2104,7 +2104,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; + drop_nlink(inode); if (!inode->i_nlink) ext3_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime; @@ -2326,7 +2326,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, } if (new_inode) { - new_inode->i_nlink--; + drop_nlink(new_inode); new_inode->i_ctime = CURRENT_TIME_SEC; } old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; @@ -2337,9 +2337,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); ext3_journal_dirty_metadata(handle, dir_bh); - old_dir->i_nlink--; + drop_nlink(old_dir); if (new_inode) { - new_inode->i_nlink--; + drop_nlink(new_inode); } else { new_dir->i_nlink++; ext3_update_dx_flag(new_dir); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 7cd8cc03aea7..cfef6ad529a7 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -246,7 +246,7 @@ static int hfs_unlink(struct inode *dir, struct dentry *dentry) if (res) return res; - inode->i_nlink--; + drop_nlink(inode); hfs_delete_inode(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 1f9ece0de326..9ceb0dfaa1cc 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -338,7 +338,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) return res; if (inode->i_nlink > 0) - inode->i_nlink--; + drop_nlink(inode); hfsplus_delete_inode(inode); if (inode->i_ino != cnid && !inode->i_nlink) { if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 59e7dc182a0c..4078b0becc5e 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -434,7 +434,7 @@ again: unlock_kernel(); return -ENOSPC; default: - inode->i_nlink--; + drop_nlink(inode); err = 0; } goto out; @@ -494,7 +494,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOSPC; break; default: - dir->i_nlink--; + drop_nlink(dir); inode->i_nlink = 0; err = 0; } @@ -636,7 +636,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, hpfs_i(i)->i_parent_dir = new_dir->i_ino; if (S_ISDIR(i->i_mode)) { new_dir->i_nlink++; - old_dir->i_nlink--; + drop_nlink(old_dir); } if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) { fnode->up = new_dir->i_ino; diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 068ef0de8de2..3f7899ea4cba 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1052,9 +1052,8 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type) dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); - inode->i_nlink--; inode->i_ctime = dir->i_ctime; - mark_inode_dirty(inode); + inode_dec_link_count(inode); d_delete(dentry); /* This also frees the inode */ diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index edd8371fc6a5..a5e9f2205b33 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -615,7 +615,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) } ret = jffs2_unlink(dir_i, dentry); if (!ret) - dir_i->i_nlink--; + drop_nlink(dir_i); return ret; } @@ -823,7 +823,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, if (victim_f) { /* There was a victim. Kill it off nicely */ - new_dentry->d_inode->i_nlink--; + drop_nlink(new_dentry->d_inode); /* Don't oops if the victim was a dirent pointing to an inode which didn't exist. */ if (victim_f->inocache) { @@ -862,7 +862,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, } if (S_ISDIR(old_dentry->d_inode->i_mode)) - old_dir_i->i_nlink--; + drop_nlink(old_dir_i); new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 295268ad231b..088b85976ac0 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -393,9 +393,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) /* update parent directory's link count corresponding * to ".." entry of the target directory deleted */ - dip->i_nlink--; dip->i_ctime = dip->i_mtime = CURRENT_TIME; - mark_inode_dirty(dip); + inode_dec_link_count(dip); /* * OS/2 could have created EA and/or ACL @@ -515,8 +514,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) mark_inode_dirty(dip); /* update target's inode */ - ip->i_nlink--; - mark_inode_dirty(ip); + inode_dec_link_count(ip); /* * commit zero link count object @@ -835,7 +833,7 @@ static int jfs_link(struct dentry *old_dentry, rc = txCommit(tid, 2, &iplist[0], 0); if (rc) { - ip->i_nlink--; + ip->i_nlink--; /* never instantiated */ iput(ip); } else d_instantiate(dentry, ip); @@ -1155,9 +1153,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_ip->i_ino, JFS_RENAME); if (rc) goto out4; - new_ip->i_nlink--; + drop_nlink(new_ip); if (S_ISDIR(new_ip->i_mode)) { - new_ip->i_nlink--; + drop_nlink(new_ip); if (new_ip->i_nlink) { mutex_unlock(&JFS_IP(new_ip)->commit_mutex); if (old_dir != new_dir) @@ -1223,7 +1221,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out4; } if (S_ISDIR(old_ip->i_mode)) { - old_dir->i_nlink--; + drop_nlink(old_dir); if (old_dir != new_dir) { /* * Change inode number of parent for moved directory diff --git a/fs/libfs.c b/fs/libfs.c index 3793aaa14577..9204feba75ac 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -275,7 +275,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - inode->i_nlink--; + drop_nlink(inode); dput(dentry); return 0; } @@ -285,9 +285,9 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) if (!simple_empty(dentry)) return -ENOTEMPTY; - dentry->d_inode->i_nlink--; + drop_nlink(dentry->d_inode); simple_unlink(dir, dentry); - dir->i_nlink--; + drop_nlink(dir); return 0; } @@ -303,9 +303,9 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_dentry->d_inode) { simple_unlink(new_dir, new_dentry); if (they_are_dirs) - old_dir->i_nlink--; + drop_nlink(old_dir); } else if (they_are_dirs) { - old_dir->i_nlink--; + drop_nlink(old_dir); new_dir->i_nlink++; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 5b6a4540a05b..299bb66e3bde 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -249,7 +249,7 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, minix_set_link(new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) - new_inode->i_nlink--; + drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { if (dir_de) { diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index d220165d4918..635613f2f65a 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -343,7 +343,7 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; - dir->i_nlink--; + drop_nlink(dir); inode->i_nlink = 0; inode->i_ctime = CURRENT_TIME_SEC; @@ -549,7 +549,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, if (err) goto error_dotdot; } - old_dir->i_nlink--; + drop_nlink(old_dir); if (!new_inode) new_dir->i_nlink++; } @@ -566,10 +566,9 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, mark_inode_dirty(old_dir); if (new_inode) { + drop_nlink(new_inode); if (is_dir) - new_inode->i_nlink -= 2; - else - new_inode->i_nlink--; + drop_nlink(new_inode); new_inode->i_ctime = ts; } out: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7432f1a43f3d..26eecb86f9b0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -843,7 +843,7 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) nfs_inode_return_delegation(inode); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { lock_kernel(); - inode->i_nlink--; + drop_nlink(inode); nfs_complete_unlink(dentry); unlock_kernel(); } @@ -1401,7 +1401,7 @@ static int nfs_safe_remove(struct dentry *dentry) error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) - inode->i_nlink--; + drop_nlink(inode); nfs_mark_for_revalidate(inode); nfs_end_data_update(inode); } else @@ -1639,7 +1639,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } } else - new_inode->i_nlink--; + drop_nlink(new_inode); go_ahead: /* diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 849c3b4bb94a..40f83f53053a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -739,7 +739,7 @@ static int ocfs2_link(struct dentry *old_dentry, err = ocfs2_journal_dirty(handle, fe_bh); if (err < 0) { le16_add_cpu(&fe->i_links_count, -1); - inode->i_nlink--; + drop_nlink(inode); mlog_errno(err); goto bail; } @@ -749,7 +749,7 @@ static int ocfs2_link(struct dentry *old_dentry, parent_fe_bh, de_bh); if (err) { le16_add_cpu(&fe->i_links_count, -1); - inode->i_nlink--; + drop_nlink(inode); mlog_errno(err); goto bail; } diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index c3d83f67154a..ad5afa4ea8f3 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -189,8 +189,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry) inode->i_nlink = 0; mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - dir->i_nlink--; - mark_inode_dirty(dir); + inode_dec_link_count(dir); retval = 0; end_rmdir: @@ -234,9 +233,8 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry) mark_buffer_dirty(bh); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); - inode->i_nlink--; inode->i_ctime = dir->i_ctime; - mark_inode_dirty(inode); + inode_dec_link_count(inode); retval = 0; end_unlink: diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index c61710e49c62..c76d427e027b 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -20,7 +20,7 @@ #include #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } -#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--; +#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); // directory item contains array of entry headers. This performs // binary search through that array @@ -994,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink = 1; } - inode->i_nlink--; + drop_nlink(inode); /* * we schedule before doing the add_save_link call, save the link @@ -1475,7 +1475,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (S_ISDIR(new_dentry_inode->i_mode)) { new_dentry_inode->i_nlink = 0; } else { - new_dentry_inode->i_nlink--; + drop_nlink(new_dentry_inode); } new_dentry_inode->i_ctime = ctime; savelink = new_dentry_inode->i_nlink; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index b8a73f716fbe..f7c08db8e34c 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -250,7 +250,7 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, sysv_set_link(new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) - new_inode->i_nlink--; + drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { if (dir_de) { diff --git a/fs/udf/namei.c b/fs/udf/namei.c index ab9a7629d23e..d14d25534aa8 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -878,8 +878,7 @@ static int udf_rmdir(struct inode * dir, struct dentry * dentry) inode->i_nlink); inode->i_nlink = 0; inode->i_size = 0; - mark_inode_dirty(inode); - dir->i_nlink --; + inode_dec_link_count(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); mark_inode_dirty(dir); @@ -923,8 +922,7 @@ static int udf_unlink(struct inode * dir, struct dentry * dentry) goto end_unlink; dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); mark_inode_dirty(dir); - inode->i_nlink--; - mark_inode_dirty(inode); + inode_dec_link_count(inode); inode->i_ctime = dir->i_ctime; retval = 0; @@ -1101,8 +1099,7 @@ out: return err; out_no_entry: - inode->i_nlink--; - mark_inode_dirty(inode); + inode_dec_link_count(inode); iput(inode); goto out; } @@ -1261,9 +1258,8 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, if (new_inode) { - new_inode->i_nlink--; new_inode->i_ctime = current_fs_time(new_inode->i_sb); - mark_inode_dirty(new_inode); + inode_dec_link_count(new_inode); } old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb); mark_inode_dirty(old_dir); @@ -1279,12 +1275,10 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, } else mark_buffer_dirty_inode(dir_bh, old_inode); - old_dir->i_nlink --; - mark_inode_dirty(old_dir); + inode_dec_link_count(old_dir); if (new_inode) { - new_inode->i_nlink --; - mark_inode_dirty(new_inode); + inode_dec_link_count(new_inode); } else { diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index d344b411e261..e84c0ecf0730 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -308,7 +308,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, ufs_set_link(new_dir, new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) - new_inode->i_nlink--; + drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { if (dir_de) { diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 9a8f48bae956..090d74ffa061 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -782,7 +782,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; - dir->i_nlink--; + drop_nlink(dir); inode->i_nlink = 0; inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; @@ -930,7 +930,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto error_dotdot; } - old_dir->i_nlink--; + drop_nlink(old_dir); if (!new_inode) new_dir->i_nlink++; } @@ -947,10 +947,9 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, mark_inode_dirty(old_dir); if (new_inode) { + drop_nlink(new_inode); if (is_dir) - new_inode->i_nlink -= 2; - else - new_inode->i_nlink--; + drop_nlink(new_inode); new_inode->i_ctime = ts; } out: diff --git a/include/linux/fs.h b/include/linux/fs.h index 4bb70871873f..26d3c61116c0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1225,9 +1225,14 @@ static inline void inode_inc_link_count(struct inode *inode) mark_inode_dirty(inode); } -static inline void inode_dec_link_count(struct inode *inode) +static inline void drop_nlink(struct inode *inode) { inode->i_nlink--; +} + +static inline void inode_dec_link_count(struct inode *inode) +{ + drop_nlink(inode); mark_inode_dirty(inode); } diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 840f8a6fb85f..10aa8eeeb112 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -307,7 +307,7 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry) dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME; dir->i_size -= DIRENT_SIZE; - inode->i_nlink--; + drop_nlink(inode); dput(dentry); return 0; } diff --git a/mm/shmem.c b/mm/shmem.c index b96de69f236b..908dd947b1ea 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1772,7 +1772,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) dir->i_size -= BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - inode->i_nlink--; + drop_nlink(inode); dput(dentry); /* Undo the count from "create" - this does all the work */ return 0; } @@ -1782,8 +1782,8 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry) if (!simple_empty(dentry)) return -ENOTEMPTY; - dentry->d_inode->i_nlink--; - dir->i_nlink--; + drop_nlink(dentry->d_inode); + drop_nlink(dir); return shmem_unlink(dir, dentry); } @@ -1804,9 +1804,9 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct if (new_dentry->d_inode) { (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) - old_dir->i_nlink--; + drop_nlink(old_dir); } else if (they_are_dirs) { - old_dir->i_nlink--; + drop_nlink(old_dir); new_dir->i_nlink++; } -- cgit v1.2.3-71-gd317 From d8c76e6f45c111c32a4b3e50a2adc9210737b0d8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 30 Sep 2006 23:29:04 -0700 Subject: [PATCH] r/o bind mount prepwork: inc_nlink() helper This is mostly included for parity with dec_nlink(), where we will have some more hooks. This one should stay pretty darn straightforward for now. Signed-off-by: Dave Hansen Acked-by: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/hw/ipath/ipath_fs.c | 4 ++-- drivers/usb/core/inode.c | 4 ++-- fs/9p/vfs_inode.c | 2 +- fs/autofs/root.c | 2 +- fs/autofs4/root.c | 2 +- fs/bfs/dir.c | 2 +- fs/cifs/inode.c | 2 +- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 4 ++-- fs/configfs/mount.c | 2 +- fs/debugfs/inode.c | 4 ++-- fs/ext3/namei.c | 6 +++--- fs/fuse/control.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hpfs/namei.c | 4 ++-- fs/hugetlbfs/inode.c | 4 ++-- fs/jffs2/dir.c | 6 +++--- fs/jffs2/fs.c | 6 +++--- fs/jfs/namei.c | 6 +++--- fs/libfs.c | 4 ++-- fs/msdos/namei.c | 4 ++-- fs/ocfs2/dlm/dlmfs.c | 6 +++--- fs/ocfs2/namei.c | 8 ++++---- fs/ramfs/inode.c | 4 ++-- fs/reiserfs/namei.c | 6 +++--- fs/sysfs/dir.c | 4 ++-- fs/sysfs/mount.c | 2 +- fs/udf/inode.c | 2 +- fs/udf/namei.c | 6 +++--- fs/vfat/namei.c | 4 ++-- include/linux/fs.h | 7 ++++++- ipc/mqueue.c | 2 +- kernel/cpuset.c | 8 ++++---- mm/shmem.c | 8 ++++---- net/sunrpc/rpc_pipe.c | 6 +++--- security/inode.c | 4 ++-- security/selinux/selinuxfs.c | 4 ++-- 37 files changed, 80 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a507d0b5be6c..d9ff283f725e 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -66,8 +66,8 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { inode->i_op = &simple_dir_inode_operations; - inode->i_nlink++; - dir->i_nlink++; + inc_nlink(inode); + inc_nlink(dir); } inode->i_fop = fops; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 88002e45a6b4..7c77c2d8d300 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -263,7 +263,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); break; } } @@ -295,7 +295,7 @@ static int usbfs_mkdir (struct inode *dir, struct dentry *dentry, int mode) mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; res = usbfs_mknod (dir, dentry, mode, 0); if (!res) - dir->i_nlink++; + inc_nlink(dir); return res; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7a7ec2d1d2f4..5241c600ce28 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -233,7 +233,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) inode->i_op = &v9fs_symlink_inode_operations; break; case S_IFDIR: - inode->i_nlink++; + inc_nlink(inode); if(v9ses->extended) inode->i_op = &v9fs_dir_inode_operations_ext; else diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 54ad70731927..368a1c33a3c8 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -466,7 +466,7 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) ent->dentry = dentry; autofs_hash_insert(dh,ent); - dir->i_nlink++; + inc_nlink(dir); d_instantiate(dentry, iget(dir->i_sb,ino)); unlock_kernel(); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 348bec0982b0..e21bb4668201 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -713,7 +713,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (p_ino && dentry->d_parent != dentry) atomic_inc(&p_ino->count); ino->inode = inode; - dir->i_nlink++; + inc_nlink(dir); dir->i_mtime = CURRENT_TIME; return 0; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index ce05d1643dd1..a650f1d0b85e 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -163,7 +163,7 @@ static int bfs_link(struct dentry * old, struct inode * dir, struct dentry * new unlock_kernel(); return err; } - inode->i_nlink++; + inc_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); atomic_inc(&inode->i_count); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 74441a17e186..76b7fb34101a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -735,7 +735,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) cFYI(1, ("cifs_mkdir returned 0x%x", rc)); d_drop(direntry); } else { - inode->i_nlink++; + inc_nlink(inode); if (pTcon->ses->capabilities & CAP_UNIX) rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb,xid); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 0a2fd8bb7579..0102b28a15fb 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -304,7 +304,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, coda_dir_changed(dir_inode, 0); atomic_inc(&inode->i_count); d_instantiate(de, inode); - inode->i_nlink++; + inc_nlink(inode); out: unlock_kernel(); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 816e8ef64560..8a3b6a1a6ad1 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -139,7 +139,7 @@ static int init_dir(struct inode * inode) inode->i_fop = &configfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); return 0; } @@ -169,7 +169,7 @@ static int create_dir(struct config_item * k, struct dentry * p, if (!error) { error = configfs_create(d, mode, init_dir); if (!error) { - p->d_inode->i_nlink++; + inc_nlink(p->d_inode); (d)->d_op = &configfs_dentry_ops; } else { struct configfs_dirent *sd = d->d_fsdata; diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 3e5fe843e1df..68bd5c93ca52 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -84,7 +84,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_op = &configfs_dir_inode_operations; inode->i_fop = &configfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); } else { pr_debug("configfs: could not get root inode\n"); return -ENOMEM; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 269e649e6dc6..ecf3da9edf21 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -54,7 +54,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); break; } } @@ -87,7 +87,7 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; res = debugfs_mknod(dir, dentry, mode, 0); if (!res) - dir->i_nlink++; + inc_nlink(dir); return res; } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 14c55adfae83..b45c88bd5f73 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1616,7 +1616,7 @@ static int ext3_delete_entry (handle_t *handle, */ static inline void ext3_inc_count(handle_t *handle, struct inode *inode) { - inode->i_nlink++; + inc_nlink(inode); } static inline void ext3_dec_count(handle_t *handle, struct inode *inode) @@ -1775,7 +1775,7 @@ retry: iput (inode); goto out_stop; } - dir->i_nlink++; + inc_nlink(dir); ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); @@ -2341,7 +2341,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, if (new_inode) { drop_nlink(new_inode); } else { - new_dir->i_nlink++; + inc_nlink(new_dir); ext3_update_dx_flag(new_dir); ext3_mark_inode_dirty(handle, new_dir); } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 79ec1f23d4d2..16b39c053d47 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -116,7 +116,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) return 0; parent = fuse_control_sb->s_root; - parent->d_inode->i_nlink++; + inc_nlink(parent->d_inode); sprintf(name, "%llu", (unsigned long long) fc->id); parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, &simple_dir_inode_operations, diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9ceb0dfaa1cc..99b4ed1b87d2 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -298,7 +298,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, if (res) return res; - inode->i_nlink++; + inc_nlink(inode); hfsplus_instantiate(dst_dentry, inode, cnid); atomic_inc(&inode->i_count); inode->i_ctime = CURRENT_TIME_SEC; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 4078b0becc5e..25dd6f81eca7 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -89,7 +89,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) brelse(bh); hpfs_mark_4buffers_dirty(&qbh0); hpfs_brelse4(&qbh0); - dir->i_nlink++; + inc_nlink(dir); insert_inode_hash(result); if (result->i_uid != current->fsuid || @@ -635,7 +635,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, end: hpfs_i(i)->i_parent_dir = new_dir->i_ino; if (S_ISDIR(i->i_mode)) { - new_dir->i_nlink++; + inc_nlink(new_dir); drop_nlink(old_dir); } if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index f5b8f329aca6..5e03b2f67b93 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -377,7 +377,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); break; case S_IFLNK: inode->i_op = &page_symlink_inode_operations; @@ -418,7 +418,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); if (!retval) - dir->i_nlink++; + inc_nlink(dir); return retval; } diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index a5e9f2205b33..9def6adf4a5d 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -588,7 +588,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) } dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); - dir_i->i_nlink++; + inc_nlink(dir_i); jffs2_free_raw_dirent(rd); @@ -836,7 +836,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, /* If it was a directory we moved, and there was no victim, increase i_nlink on its new parent */ if (S_ISDIR(old_dentry->d_inode->i_mode) && !victim_f) - new_dir_i->i_nlink++; + inc_nlink(new_dir_i); /* Unlink the original */ ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i), @@ -848,7 +848,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, /* Oh shit. We really ought to make a single node which can do both atomically */ struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode); down(&f->sem); - old_dentry->d_inode->i_nlink++; + inc_nlink(old_dentry->d_inode); if (f->inocache) f->inocache->nlink++; up(&f->sem); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 72d9909d95ff..7bc1a4201c0c 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -277,13 +277,13 @@ void jffs2_read_inode (struct inode *inode) for (fd=f->dents; fd; fd = fd->next) { if (fd->type == DT_DIR && fd->ino) - inode->i_nlink++; + inc_nlink(inode); } /* and '..' */ - inode->i_nlink++; + inc_nlink(inode); /* Root dir gets i_nlink 3 for some reason */ if (inode->i_ino == 1) - inode->i_nlink++; + inc_nlink(inode); inode->i_op = &jffs2_dir_inode_operations; inode->i_fop = &jffs2_dir_operations; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 088b85976ac0..8cef88170aa4 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -292,7 +292,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) mark_inode_dirty(ip); /* update parent directory inode */ - dip->i_nlink++; /* for '..' from child directory */ + inc_nlink(dip); /* for '..' from child directory */ dip->i_ctime = dip->i_mtime = CURRENT_TIME; mark_inode_dirty(dip); @@ -822,7 +822,7 @@ static int jfs_link(struct dentry *old_dentry, goto free_dname; /* update object inode */ - ip->i_nlink++; /* for new link */ + inc_nlink(ip); /* for new link */ ip->i_ctime = CURRENT_TIME; dir->i_ctime = dir->i_mtime = CURRENT_TIME; mark_inode_dirty(dir); @@ -1206,7 +1206,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out4; } if (S_ISDIR(old_ip->i_mode)) - new_dir->i_nlink++; + inc_nlink(new_dir); } /* * Remove old directory entry diff --git a/fs/libfs.c b/fs/libfs.c index 9204feba75ac..bd08e0e64a8c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -243,7 +243,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den struct inode *inode = old_dentry->d_inode; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - inode->i_nlink++; + inc_nlink(inode); atomic_inc(&inode->i_count); dget(dentry); d_instantiate(dentry, inode); @@ -306,7 +306,7 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(old_dir); } else if (they_are_dirs) { drop_nlink(old_dir); - new_dir->i_nlink++; + inc_nlink(new_dir); } old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 635613f2f65a..fa868c755907 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -389,7 +389,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo); if (err) goto out_free; - dir->i_nlink++; + inc_nlink(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); @@ -551,7 +551,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, } drop_nlink(old_dir); if (!new_inode) - new_dir->i_nlink++; + inc_nlink(new_dir); } err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 0368c6402182..16b8d1ba7066 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -338,7 +338,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_nlink++; + inc_nlink(inode); inode->i_fop = &simple_dir_operations; inode->i_op = &dlmfs_root_inode_operations; @@ -395,7 +395,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, /* directory inodes start off with i_nlink == * 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); break; } @@ -449,7 +449,7 @@ static int dlmfs_mkdir(struct inode * dir, } ip->ip_dlm = dlm; - dir->i_nlink++; + inc_nlink(dir); d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 40f83f53053a..8c370a39e0c1 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -429,7 +429,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dir->i_nlink++; + inc_nlink(dir); } status = ocfs2_add_entry(handle, dentry, inode, @@ -730,7 +730,7 @@ static int ocfs2_link(struct dentry *old_dentry, goto bail; } - inode->i_nlink++; + inc_nlink(inode); inode->i_ctime = CURRENT_TIME; fe->i_links_count = cpu_to_le16(inode->i_nlink); fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); @@ -952,7 +952,7 @@ static int ocfs2_unlink(struct inode *dir, parent_node_bh); if (status < 0) { mlog_errno(status); - dir->i_nlink++; + inc_nlink(dir); } } @@ -1382,7 +1382,7 @@ static int ocfs2_rename(struct inode *old_dir, if (new_inode) { new_inode->i_nlink--; } else { - new_dir->i_nlink++; + inc_nlink(new_dir); mark_inode_dirty(new_dir); } } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index bc0e51662424..2faf4cdf61b0 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -75,7 +75,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); break; case S_IFLNK: inode->i_op = &page_symlink_inode_operations; @@ -113,7 +113,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) { int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); if (!retval) - dir->i_nlink++; + inc_nlink(dir); return retval; } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index c76d427e027b..cf92e89515f2 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -19,7 +19,7 @@ #include #include -#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } +#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); // directory item contains array of entry headers. This performs @@ -1006,7 +1006,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, 0); if (retval < 0) { - inode->i_nlink++; + inc_nlink(inode); goto end_unlink; } inode->i_ctime = CURRENT_TIME_SEC; @@ -1143,7 +1143,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, } /* inc before scheduling so reiserfs_unlink knows we are here */ - inode->i_nlink++; + inc_nlink(inode); retval = journal_begin(&th, dir->i_sb, jbegin_count); if (retval) { diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 5f3d725d1125..3aa3434621ca 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -103,7 +103,7 @@ static int init_dir(struct inode * inode) inode->i_fop = &sysfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); return 0; } @@ -137,7 +137,7 @@ static int create_dir(struct kobject * k, struct dentry * p, if (!error) { error = sysfs_create(*d, mode, init_dir); if (!error) { - p->d_inode->i_nlink++; + inc_nlink(p->d_inode); (*d)->d_op = &sysfs_dentry_ops; d_rehash(*d); } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 40190c489271..20551a1b8a56 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -49,7 +49,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_op = &sysfs_dir_inode_operations; inode->i_fop = &sysfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); } else { pr_debug("sysfs: could not get root inode\n"); return -ENOMEM; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index b223b32db991..ae21a0e59e95 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1165,7 +1165,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_op = &udf_dir_inode_operations; inode->i_fop = &udf_dir_operations; inode->i_mode |= S_IFDIR; - inode->i_nlink ++; + inc_nlink(inode); break; } case ICBTAG_FILE_TYPE_REALTIME: diff --git a/fs/udf/namei.c b/fs/udf/namei.c index d14d25534aa8..e40c95e65117 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -762,7 +762,7 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY; udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - dir->i_nlink++; + inc_nlink(dir); mark_inode_dirty(dir); d_instantiate(dentry, inode); if (fibh.sbh != fibh.ebh) @@ -1147,7 +1147,7 @@ static int udf_link(struct dentry * old_dentry, struct inode * dir, if (fibh.sbh != fibh.ebh) udf_release_data(fibh.ebh); udf_release_data(fibh.sbh); - inode->i_nlink ++; + inc_nlink(inode); inode->i_ctime = current_fs_time(inode->i_sb); mark_inode_dirty(inode); atomic_inc(&inode->i_count); @@ -1282,7 +1282,7 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, } else { - new_dir->i_nlink ++; + inc_nlink(new_dir); mark_inode_dirty(new_dir); } } diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 090d74ffa061..5846ba2d5d9f 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -837,7 +837,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_free; dir->i_version++; - dir->i_nlink++; + inc_nlink(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); @@ -932,7 +932,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, } drop_nlink(old_dir); if (!new_inode) - new_dir->i_nlink++; + inc_nlink(new_dir); } err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 26d3c61116c0..6a5267da565f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1219,9 +1219,14 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } -static inline void inode_inc_link_count(struct inode *inode) +static inline void inc_nlink(struct inode *inode) { inode->i_nlink++; +} + +static inline void inode_inc_link_count(struct inode *inode) +{ + inc_nlink(inode); mark_inode_dirty(inode); } diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 10aa8eeeb112..d75d0ba83360 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -168,7 +168,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, /* all is ok */ info->user = get_uid(u); } else if (S_ISDIR(mode)) { - inode->i_nlink++; + inc_nlink(inode); /* Some things misbehave if size == 0 on a directory */ inode->i_size = 2 * DIRENT_SIZE; inode->i_op = &mqueue_dir_inode_operations; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8c3c400cce91..9d850ae13b1b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -377,7 +377,7 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directories start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); } else { return -ENOMEM; } @@ -1565,7 +1565,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode) inode->i_fop = &simple_dir_operations; /* start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); } else if (S_ISREG(mode)) { inode->i_size = 0; inode->i_fop = &cpuset_file_operations; @@ -1598,7 +1598,7 @@ static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode) error = cpuset_create_file(dentry, S_IFDIR | mode); if (!error) { dentry->d_fsdata = cs; - parent->d_inode->i_nlink++; + inc_nlink(parent->d_inode); cs->dentry = dentry; } dput(dentry); @@ -2033,7 +2033,7 @@ int __init cpuset_init(void) } root = cpuset_mount->mnt_sb->s_root; root->d_fsdata = &top_cpuset; - root->d_inode->i_nlink++; + inc_nlink(root->d_inode); top_cpuset.dentry = root; root->d_inode->i_op = &cpuset_dir_inode_operations; number_of_cpusets = 1; diff --git a/mm/shmem.c b/mm/shmem.c index 908dd947b1ea..bb8ca7ef7094 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1379,7 +1379,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) &sbinfo->policy_nodes); break; case S_IFDIR: - inode->i_nlink++; + inc_nlink(inode); /* Some things misbehave if size == 0 on a directory */ inode->i_size = 2 * BOGO_DIRENT_SIZE; inode->i_op = &shmem_dir_inode_operations; @@ -1715,7 +1715,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) return error; - dir->i_nlink++; + inc_nlink(dir); return 0; } @@ -1750,7 +1750,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - inode->i_nlink++; + inc_nlink(inode); atomic_inc(&inode->i_count); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ d_instantiate(dentry, inode); @@ -1807,7 +1807,7 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct drop_nlink(old_dir); } else if (they_are_dirs) { drop_nlink(old_dir); - new_dir->i_nlink++; + inc_nlink(new_dir); } old_dir->i_size -= BOGO_DIRENT_SIZE; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 700c6e061a04..9a0b41a97f90 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -494,7 +494,7 @@ rpc_get_inode(struct super_block *sb, int mode) case S_IFDIR: inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; - inode->i_nlink++; + inc_nlink(inode); default: break; } @@ -571,7 +571,7 @@ rpc_populate(struct dentry *parent, if (private) rpc_inode_setowner(inode, private); if (S_ISDIR(mode)) - dir->i_nlink++; + inc_nlink(dir); d_add(dentry, inode); } mutex_unlock(&dir->i_mutex); @@ -593,7 +593,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) goto out_err; inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); - dir->i_nlink++; + inc_nlink(dir); inode_dir_notify(dir, DN_CREATE); return 0; out_err: diff --git a/security/inode.c b/security/inode.c index 49ee51529396..9b16e14f3a80 100644 --- a/security/inode.c +++ b/security/inode.c @@ -78,7 +78,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); break; } } @@ -111,7 +111,7 @@ static int mkdir(struct inode *dir, struct dentry *dentry, int mode) mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; res = mknod(dir, dentry, mode, 0); if (!res) - dir->i_nlink++; + inc_nlink(dir); return res; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index bab7b386cb8d..cd244419c980 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1253,10 +1253,10 @@ static int sel_make_dir(struct inode *dir, struct dentry *dentry) inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inode->i_nlink++; + inc_nlink(inode); d_add(dentry, inode); /* bump link count on parent directory, too */ - dir->i_nlink++; + inc_nlink(dir); out: return ret; } -- cgit v1.2.3-71-gd317 From ce71ec36840368b877fb63bd14c8e67ab62d08b1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 30 Sep 2006 23:29:06 -0700 Subject: [PATCH] r/o bind mounts: monitor zeroing of i_nlink Some filesystems, instead of simply decrementing i_nlink, simply zero it during an unlink operation. We need to catch these in addition to the decrement operations. Signed-off-by: Dave Hansen Acked-by: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 4 ++-- fs/cifs/inode.c | 2 +- fs/ext3/namei.c | 2 +- fs/fuse/dir.c | 4 ++-- fs/hfs/dir.c | 2 +- fs/hfsplus/dir.c | 4 ++-- fs/hpfs/namei.c | 4 ++-- fs/jfs/namei.c | 2 +- fs/msdos/namei.c | 4 ++-- fs/nfs/dir.c | 2 +- fs/qnx4/namei.c | 2 +- fs/reiserfs/namei.c | 4 ++-- fs/udf/namei.c | 2 +- fs/vfat/namei.c | 4 ++-- include/linux/fs.h | 5 +++++ 15 files changed, 26 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e21bb4668201..c1493524da4d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -638,7 +638,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dput(ino->dentry); dentry->d_inode->i_size = 0; - dentry->d_inode->i_nlink = 0; + clear_nlink(dentry->d_inode); dir->i_mtime = CURRENT_TIME; @@ -673,7 +673,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) } dput(ino->dentry); dentry->d_inode->i_size = 0; - dentry->d_inode->i_nlink = 0; + clear_nlink(dentry->d_inode); if (dir->i_nlink) drop_nlink(dir); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 76b7fb34101a..6b90ef98e4cf 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -818,7 +818,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) if (!rc) { drop_nlink(inode); i_size_write(direntry->d_inode,0); - direntry->d_inode->i_nlink = 0; + clear_nlink(direntry->d_inode); } cifsInode = CIFS_I(direntry->d_inode); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index b45c88bd5f73..906731a20f1a 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2045,7 +2045,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) "empty directory has nlink!=2 (%d)", inode->i_nlink); inode->i_version++; - inode->i_nlink = 0; + clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is * zero will ensure that the right thing happens during any * recovery. */ diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f85b2a282f13..8605155db171 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -508,7 +508,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) /* Set nlink to zero so the inode can be cleared, if the inode does have more links this will be discovered at the next lookup/getattr */ - inode->i_nlink = 0; + clear_nlink(inode); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); @@ -534,7 +534,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) err = req->out.h.error; fuse_put_request(fc, req); if (!err) { - entry->d_inode->i_nlink = 0; + clear_nlink(entry->d_inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); } else if (err == -EINTR) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index cfef6ad529a7..37d681b4f216 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -273,7 +273,7 @@ static int hfs_rmdir(struct inode *dir, struct dentry *dentry) res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); if (res) return res; - inode->i_nlink = 0; + clear_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; hfs_delete_inode(inode); mark_inode_dirty(inode); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 99b4ed1b87d2..7e309751645f 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -348,7 +348,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) } else inode->i_flags |= S_DEAD; } else - inode->i_nlink = 0; + clear_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); @@ -387,7 +387,7 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); if (res) return res; - inode->i_nlink = 0; + clear_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; hfsplus_delete_inode(inode); mark_inode_dirty(inode); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 25dd6f81eca7..2507e7393f3c 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -495,7 +495,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) break; default: drop_nlink(dir); - inode->i_nlink = 0; + clear_nlink(inode); err = 0; } goto out; @@ -590,7 +590,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, int r; if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 1)) != 2) { if ((nde = map_dirent(new_dir, hpfs_i(new_dir)->i_dno, (char *)new_name, new_len, NULL, &qbh1))) { - new_inode->i_nlink = 0; + clear_nlink(new_inode); copy_de(nde, &de); memcpy(nde->name, new_name, new_len); hpfs_mark_4buffers_dirty(&qbh1); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 8cef88170aa4..b8d16a6aa88f 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -414,7 +414,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) JFS_IP(ip)->acl.flag = 0; /* mark the target directory as deleted */ - ip->i_nlink = 0; + clear_nlink(ip); mark_inode_dirty(ip); rc = txCommit(tid, 2, &iplist[0], 0); diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index fa868c755907..b0f01b3b0536 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -345,7 +345,7 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) goto out; drop_nlink(dir); - inode->i_nlink = 0; + clear_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; fat_detach(inode); out: @@ -430,7 +430,7 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; - inode->i_nlink = 0; + clear_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; fat_detach(inode); out: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 26eecb86f9b0..481f8892a919 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1286,7 +1286,7 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) - dentry->d_inode->i_nlink = 0; + clear_nlink(dentry->d_inode); nfs_end_data_update(dir); unlock_kernel(); diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index ad5afa4ea8f3..733cdf01d645 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -186,7 +186,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry) memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; mark_buffer_dirty(bh); - inode->i_nlink = 0; + clear_nlink(inode); mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; inode_dec_link_count(dir); diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index cf92e89515f2..16e9cff8f15d 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -913,7 +913,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) reiserfs_warning(inode->i_sb, "%s: empty directory has nlink " "!= 2 (%d)", __FUNCTION__, inode->i_nlink); - inode->i_nlink = 0; + clear_nlink(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; reiserfs_update_sd(&th, inode); @@ -1473,7 +1473,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_dentry_inode) { // adjust link number of the victim if (S_ISDIR(new_dentry_inode->i_mode)) { - new_dentry_inode->i_nlink = 0; + clear_nlink(new_dentry_inode); } else { drop_nlink(new_dentry_inode); } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index e40c95e65117..73163325e5ec 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -876,7 +876,7 @@ static int udf_rmdir(struct inode * dir, struct dentry * dentry) udf_warning(inode->i_sb, "udf_rmdir", "empty directory has nlink != 2 (%d)", inode->i_nlink); - inode->i_nlink = 0; + clear_nlink(inode); inode->i_size = 0; inode_dec_link_count(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 5846ba2d5d9f..edb711ff7b05 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -784,7 +784,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) goto out; drop_nlink(dir); - inode->i_nlink = 0; + clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); out: @@ -808,7 +808,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; - inode->i_nlink = 0; + clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); out: diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a5267da565f..3493d2828f7d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1235,6 +1235,11 @@ static inline void drop_nlink(struct inode *inode) inode->i_nlink--; } +static inline void clear_nlink(struct inode *inode) +{ + inode->i_nlink = 0; +} + static inline void inode_dec_link_count(struct inode *inode) { drop_nlink(inode); -- cgit v1.2.3-71-gd317 From 74588d8ba34ff1bda027cfa737972af01ab00c8b Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Sat, 30 Sep 2006 23:29:12 -0700 Subject: [PATCH] Generic ioremap_page_range: implementation This patch adds a generic implementation of ioremap_page_range() in lib/ioremap.c based on the i386 implementation. It differs from the i386 version in the following ways: * The PTE flags are passed as a pgprot_t argument and must be determined up front by the arch-specific code. No additional PTE flags are added. * Uses set_pte_at() instead of set_pte() [bunk@stusta.de: warning fix] ]dhowells@redhat.com: nommu build fix] Signed-off-by: Haavard Skinnemoen Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Mikael Starvik Cc: Andi Kleen Cc: Cc: Ralf Baechle Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: Paul Mundt Signed-off-by: Adrian Bunk Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 4 +++ lib/Makefile | 1 + lib/ioremap.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 lib/ioremap.c (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 420e2fdf26f6..aa3f5af670b5 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -19,8 +19,12 @@ #define _LINUX_IO_H #include +#include void __iowrite32_copy(void __iomem *to, const void *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); +int ioremap_page_range(unsigned long addr, unsigned long end, + unsigned long phys_addr, pgprot_t prot); + #endif /* _LINUX_IO_H */ diff --git a/lib/Makefile b/lib/Makefile index 402762fead70..ddf3e676e1f4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -7,6 +7,7 @@ lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ sha1.o +lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o kref.o kobject_uevent.o klist.o diff --git a/lib/ioremap.c b/lib/ioremap.c new file mode 100644 index 000000000000..29c810ec9813 --- /dev/null +++ b/lib/ioremap.c @@ -0,0 +1,94 @@ +/* + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ +#include +#include +#include + +#include +#include + +static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pte_t *pte; + unsigned long pfn; + + pfn = phys_addr >> PAGE_SHIFT; + pte = pte_alloc_kernel(pmd, addr); + if (!pte) + return -ENOMEM; + do { + BUG_ON(!pte_none(*pte)); + set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); + return 0; +} + +static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pmd_t *pmd; + unsigned long next; + + phys_addr -= addr; + pmd = pmd_alloc(&init_mm, pud, addr); + if (!pmd) + return -ENOMEM; + do { + next = pmd_addr_end(addr, end); + if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (pmd++, addr = next, addr != end); + return 0; +} + +static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pud_t *pud; + unsigned long next; + + phys_addr -= addr; + pud = pud_alloc(&init_mm, pgd, addr); + if (!pud) + return -ENOMEM; + do { + next = pud_addr_end(addr, end); + if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (pud++, addr = next, addr != end); + return 0; +} + +int ioremap_page_range(unsigned long addr, + unsigned long end, unsigned long phys_addr, pgprot_t prot) +{ + pgd_t *pgd; + unsigned long start; + unsigned long next; + int err; + + BUG_ON(addr >= end); + + flush_cache_all(); + + start = addr; + phys_addr -= addr; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); + if (err) + break; + } while (pgd++, addr = next, addr != end); + + flush_tlb_all(); + + return err; +} -- cgit v1.2.3-71-gd317 From d6cbd281d189977b38eac7eb2a4678de19b6b483 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Sep 2006 23:29:26 -0700 Subject: [PATCH] Some cleanup in the pipe code Split the big and hard to read do_pipe function into smaller pieces. This creates new create_write_pipe/free_write_pipe/create_read_pipe functions. These functions are made global so that they can be used by other parts of the kernel. The resulting code is more generic and easier to read and has cleaner error handling and less gotos. [akpm@osdl.org: cleanup] Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 155 ++++++++++++++++++++++++++++++++--------------------- include/linux/fs.h | 3 ++ 2 files changed, 96 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index 2e60e1c8815e..b1626f269a34 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -874,87 +874,118 @@ fail_inode: return NULL; } -int do_pipe(int *fd) +struct file *create_write_pipe(void) { - struct qstr this; - char name[32]; + int err; + struct inode *inode; + struct file *f; struct dentry *dentry; - struct inode * inode; - struct file *f1, *f2; - int error; - int i, j; - - error = -ENFILE; - f1 = get_empty_filp(); - if (!f1) - goto no_files; - - f2 = get_empty_filp(); - if (!f2) - goto close_f1; + char name[32]; + struct qstr this; + f = get_empty_filp(); + if (!f) + return ERR_PTR(-ENFILE); + err = -ENFILE; inode = get_pipe_inode(); if (!inode) - goto close_f12; + goto err_file; - error = get_unused_fd(); - if (error < 0) - goto close_f12_inode; - i = error; - - error = get_unused_fd(); - if (error < 0) - goto close_f12_inode_i; - j = error; - - error = -ENOMEM; sprintf(name, "[%lu]", inode->i_ino); this.name = name; this.len = strlen(name); this.hash = inode->i_ino; /* will go */ + err = -ENOMEM; dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); if (!dentry) - goto close_f12_inode_i_j; + goto err_inode; dentry->d_op = &pipefs_dentry_operations; d_add(dentry, inode); - f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); - f1->f_dentry = f2->f_dentry = dget(dentry); - f1->f_mapping = f2->f_mapping = inode->i_mapping; - - /* read file */ - f1->f_pos = f2->f_pos = 0; - f1->f_flags = O_RDONLY; - f1->f_op = &read_pipe_fops; - f1->f_mode = FMODE_READ; - f1->f_version = 0; - - /* write file */ - f2->f_flags = O_WRONLY; - f2->f_op = &write_pipe_fops; - f2->f_mode = FMODE_WRITE; - f2->f_version = 0; - - fd_install(i, f1); - fd_install(j, f2); - fd[0] = i; - fd[1] = j; + f->f_vfsmnt = mntget(pipe_mnt); + f->f_dentry = dentry; + f->f_mapping = inode->i_mapping; - return 0; + f->f_flags = O_WRONLY; + f->f_op = &write_pipe_fops; + f->f_mode = FMODE_WRITE; + f->f_version = 0; -close_f12_inode_i_j: - put_unused_fd(j); -close_f12_inode_i: - put_unused_fd(i); -close_f12_inode: + return f; + + err_inode: free_pipe_info(inode); iput(inode); -close_f12: - put_filp(f2); -close_f1: - put_filp(f1); -no_files: - return error; + err_file: + put_filp(f); + return ERR_PTR(err); +} + +void free_write_pipe(struct file *f) +{ + mntput(f->f_vfsmnt); + dput(f->f_dentry); + put_filp(f); +} + +struct file *create_read_pipe(struct file *wrf) +{ + struct file *f = get_empty_filp(); + if (!f) + return ERR_PTR(-ENFILE); + + /* Grab pipe from the writer */ + f->f_vfsmnt = mntget(wrf->f_vfsmnt); + f->f_dentry = dget(wrf->f_dentry); + f->f_mapping = wrf->f_dentry->d_inode->i_mapping; + + f->f_pos = 0; + f->f_flags = O_RDONLY; + f->f_op = &read_pipe_fops; + f->f_mode = FMODE_READ; + f->f_version = 0; + + return f; +} + +int do_pipe(int *fd) +{ + struct file *fw, *fr; + int error; + int fdw, fdr; + + fw = create_write_pipe(); + if (IS_ERR(fw)) + return PTR_ERR(fw); + fr = create_read_pipe(fw); + error = PTR_ERR(fr); + if (IS_ERR(fr)) + goto err_write_pipe; + + error = get_unused_fd(); + if (error < 0) + goto err_read_pipe; + fdr = error; + + error = get_unused_fd(); + if (error < 0) + goto err_fdr; + fdw = error; + + fd_install(fdr, fr); + fd_install(fdw, fw); + fd[0] = fdr; + fd[1] = fdw; + + return 0; + + err_fdr: + put_unused_fd(fdr); + err_read_pipe: + put_filp(fr); + err_write_pipe: + free_write_pipe(fw); + return error; } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 3493d2828f7d..2e29a2edaeec 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,6 +1641,9 @@ static inline void allow_write_access(struct file *file) atomic_inc(&file->f_dentry->d_inode->i_writecount); } extern int do_pipe(int *); +extern struct file *create_read_pipe(struct file *f); +extern struct file *create_write_pipe(void); +extern void free_write_pipe(struct file *); extern int open_namei(int dfd, const char *, int, int, struct nameidata *); extern int may_open(struct nameidata *, int, int); -- cgit v1.2.3-71-gd317 From e239ca540594cff00adcce163dc332b27015d8e5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Sep 2006 23:29:27 -0700 Subject: [PATCH] Create call_usermodehelper_pipe() A new member in the ever growing family of call_usermode* functions is born. The new call_usermodehelper_pipe() function allows to pipe data to the stdin of the called user mode progam and behaves otherwise like the normal call_usermodehelp() (except that it always waits for the child to finish) Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 4 ++++ kernel/kmod.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 0db22a1ab474..10f505c8431d 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -47,4 +47,8 @@ call_usermodehelper(char *path, char **argv, char **envp, int wait) extern void usermodehelper_init(void); +struct file; +extern int call_usermodehelper_pipe(char *path, char *argv[], char *envp[], + struct file **filp); + #endif /* __LINUX_KMOD_H__ */ diff --git a/kernel/kmod.c b/kernel/kmod.c index 842f8015d7fd..5c63c53014a9 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -122,6 +122,7 @@ struct subprocess_info { struct key *ring; int wait; int retval; + struct file *stdin; }; /* @@ -145,12 +146,26 @@ static int ____call_usermodehelper(void *data) key_put(old_session); + /* Install input pipe when needed */ + if (sub_info->stdin) { + struct files_struct *f = current->files; + struct fdtable *fdt; + /* no races because files should be private here */ + sys_close(0); + fd_install(0, sub_info->stdin); + spin_lock(&f->file_lock); + fdt = files_fdtable(f); + FD_SET(0, fdt->open_fds); + FD_CLR(0, fdt->close_on_exec); + spin_unlock(&f->file_lock); + } + /* We can run anywhere, unlike our parent keventd(). */ set_cpus_allowed(current, CPU_MASK_ALL); retval = -EPERM; if (current->fs->root) - retval = execve(sub_info->path, sub_info->argv,sub_info->envp); + retval = execve(sub_info->path, sub_info->argv, sub_info->envp); /* Exec failed? */ sub_info->retval = retval; @@ -268,6 +283,44 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, } EXPORT_SYMBOL(call_usermodehelper_keys); +int call_usermodehelper_pipe(char *path, char **argv, char **envp, + struct file **filp) +{ + DECLARE_COMPLETION(done); + struct subprocess_info sub_info = { + .complete = &done, + .path = path, + .argv = argv, + .envp = envp, + .retval = 0, + }; + struct file *f; + DECLARE_WORK(work, __call_usermodehelper, &sub_info); + + if (!khelper_wq) + return -EBUSY; + + if (path[0] == '\0') + return 0; + + f = create_write_pipe(); + if (!f) + return -ENOMEM; + *filp = f; + + f = create_read_pipe(f); + if (!f) { + free_write_pipe(*filp); + return -ENOMEM; + } + sub_info.stdin = f; + + queue_work(khelper_wq, &work); + wait_for_completion(&done); + return sub_info.retval; +} +EXPORT_SYMBOL(call_usermodehelper_pipe); + void __init usermodehelper_init(void) { khelper_wq = create_singlethread_workqueue("khelper"); -- cgit v1.2.3-71-gd317 From 5a73fdc5ea836d2edc5e02890b51185fe304d7d3 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 30 Sep 2006 23:29:38 -0700 Subject: [PATCH] Some config.h removals During tracking down a PAE compile failure, I found that config.h was being included in a bunch of places in i386 code. It is no longer necessary, so drop it. Signed-off-by: Zachary Amsden Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/boot/video.S | 2 -- arch/i386/kernel/nmi.c | 1 - arch/i386/lib/delay.c | 1 - include/linux/unwind.h | 2 -- 4 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 8c2a6faeeae5..2c5b5cc55f79 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -11,8 +11,6 @@ * */ -#include /* for CONFIG_VIDEO_* */ - /* Enable autodetection of SVGA adapters and modes. */ #undef CONFIG_VIDEO_SVGA diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 0fc4997fb143..3e8e3adb0489 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -13,7 +13,6 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ -#include #include #include #include diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index 3c0714c4b669..f6edb11364df 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c @@ -11,7 +11,6 @@ */ #include -#include #include #include diff --git a/include/linux/unwind.h b/include/linux/unwind.h index ce48e2cd37a2..73e1751d03dd 100644 --- a/include/linux/unwind.h +++ b/include/linux/unwind.h @@ -12,8 +12,6 @@ * is not much point in implementing the full Dwarf2 unwind API. */ -#include - struct module; #ifdef CONFIG_STACK_UNWIND -- cgit v1.2.3-71-gd317