diff options
83 files changed, 1321 insertions, 1005 deletions
diff --git a/Documentation/input/iforce-protocol.txt b/Documentation/input/iforce-protocol.txt index 95df4ca70e71..8777d2d321e3 100644 --- a/Documentation/input/iforce-protocol.txt +++ b/Documentation/input/iforce-protocol.txt @@ -1,254 +1,254 @@ -** Introduction
-This document describes what I managed to discover about the protocol used to
-specify force effects to I-Force 2.0 devices. None of this information comes
-from Immerse. That's why you should not trust what is written in this
-document. This document is intended to help understanding the protocol.
-This is not a reference. Comments and corrections are welcome. To contact me,
-send an email to: deneux@ifrance.com
-
-** WARNING **
-I may not be held responsible for any dammage or harm caused if you try to
-send data to your I-Force device based on what you read in this document.
-
-** Preliminary Notes:
-All values are hexadecimal with big-endian encoding (msb on the left). Beware,
-values inside packets are encoded using little-endian. Bytes whose roles are
-unknown are marked ??? Information that needs deeper inspection is marked (?)
-
-** General form of a packet **
-This is how packets look when the device uses the rs232 to communicate.
-2B OP LEN DATA CS
-CS is the checksum. It is equal to the exclusive or of all bytes.
-
-When using USB:
-OP DATA
-The 2B, LEN and CS fields have disappeared, probably because USB handles frames and
-data corruption is handled or unsignificant.
-
-First, I describe effects that are sent by the device to the computer
-
-** Device input state
-This packet is used to indicate the state of each button and the value of each
-axis
-OP= 01 for a joystick, 03 for a wheel
-LEN= Varies from device to device
-00 X-Axis lsb
-01 X-Axis msb
-02 Y-Axis lsb, or gas pedal for a wheel
-03 Y-Axis msb, or brake pedal for a wheel
-04 Throttle
-05 Buttons
-06 Lower 4 bits: Buttons
- Upper 4 bits: Hat
-07 Rudder
-
-** Device effects states
-OP= 02
-LEN= Varies
-00 ? Bit 1 (Value 2) is the value of the deadman switch
-01 Bit 8 is set if the effect is playing. Bits 0 to 7 are the effect id.
-02 ??
-03 Address of parameter block changed (lsb)
-04 Address of parameter block changed (msb)
-05 Address of second parameter block changed (lsb)
-... depending on the number of parameter blocks updated
-
-** Force effect **
-OP= 01
-LEN= 0e
-00 Channel (when playing several effects at the same time, each must be assigned a channel)
-01 Wave form
- Val 00 Constant
- Val 20 Square
- Val 21 Triangle
- Val 22 Sine
- Val 23 Sawtooth up
- Val 24 Sawtooth down
- Val 40 Spring (Force = f(pos))
- Val 41 Friction (Force = f(velocity)) and Inertia (Force = f(acceleration))
-
-
-02 Axes affected and trigger
- Bits 4-7: Val 2 = effect along one axis. Byte 05 indicates direction
- Val 4 = X axis only. Byte 05 must contain 5a
- Val 8 = Y axis only. Byte 05 must contain b4
- Val c = X and Y axes. Bytes 05 must contain 60
- Bits 0-3: Val 0 = No trigger
- Val x+1 = Button x triggers the effect
- When the whole byte is 0, cancel the previously set trigger
-
-03-04 Duration of effect (little endian encoding, in ms)
-
-05 Direction of effect, if applicable. Else, see 02 for value to assign.
-
-06-07 Minimum time between triggering.
-
-08-09 Address of periodicity or magnitude parameters
-0a-0b Address of attack and fade parameters, or ffff if none.
-*or*
-08-09 Address of interactive parameters for X-axis, or ffff if not applicable
-0a-0b Address of interactive parameters for Y-axis, or ffff if not applicable
-
-0c-0d Delay before execution of effect (little endian encoding, in ms)
-
-
-** Time based parameters **
-
-*** Attack and fade ***
-OP= 02
-LEN= 08
-00-01 Address where to store the parameteres
-02-03 Duration of attack (little endian encoding, in ms)
-04 Level at end of attack. Signed byte.
-05-06 Duration of fade.
-07 Level at end of fade.
-
-*** Magnitude ***
-OP= 03
-LEN= 03
-00-01 Address
-02 Level. Signed byte.
-
-*** Periodicity ***
-OP= 04
-LEN= 07
-00-01 Address
-02 Magnitude. Signed byte.
-03 Offset. Signed byte.
-04 Phase. Val 00 = 0 deg, Val 40 = 90 degs.
-05-06 Period (little endian encoding, in ms)
-
-** Interactive parameters **
-OP= 05
-LEN= 0a
-00-01 Address
-02 Positive Coeff
-03 Negative Coeff
-04+05 Offset (center)
-06+07 Dead band (Val 01F4 = 5000 (decimal))
-08 Positive saturation (Val 0a = 1000 (decimal) Val 64 = 10000 (decimal))
-09 Negative saturation
-
-The encoding is a bit funny here: For coeffs, these are signed values. The
-maximum value is 64 (100 decimal), the min is 9c.
-For the offset, the minimum value is FE0C, the maximum value is 01F4.
-For the deadband, the minimum value is 0, the max is 03E8.
-
-** Controls **
-OP= 41
-LEN= 03
-00 Channel
-01 Start/Stop
- Val 00: Stop
- Val 01: Start and play once.
- Val 41: Start and play n times (See byte 02 below)
-02 Number of iterations n.
-
-** Init **
-
-*** Querying features ***
-OP= ff
-Query command. Length varies according to the query type.
-The general format of this packet is:
-ff 01 QUERY [INDEX] CHECKSUM
-reponses are of the same form:
-FF LEN QUERY VALUE_QUERIED CHECKSUM2
-where LEN = 1 + length(VALUE_QUERIED)
-
-**** Query ram size ****
-QUERY = 42 ('B'uffer size)
-The device should reply with the same packet plus two additionnal bytes
-containing the size of the memory:
-ff 03 42 03 e8 CS would mean that the device has 1000 bytes of ram available.
-
-**** Query number of effects ****
-QUERY = 4e ('N'umber of effects)
-The device should respond by sending the number of effects that can be played
-at the same time (one byte)
-ff 02 4e 14 CS would stand for 20 effects.
-
-**** Vendor's id ****
-QUERY = 4d ('M'anufacturer)
-Query the vendors'id (2 bytes)
-
-**** Product id *****
-QUERY = 50 ('P'roduct)
-Query the product id (2 bytes)
-
-**** Open device ****
-QUERY = 4f ('O'pen)
-No data returned.
-
-**** Close device *****
-QUERY = 43 ('C')lose
-No data returned.
-
-**** Query effect ****
-QUERY = 45 ('E')
-Send effect type.
-Returns nonzero if supported (2 bytes)
-
-**** Firmware Version ****
-QUERY = 56 ('V'ersion)
-Sends back 3 bytes - major, minor, subminor
-
-*** Initialisation of the device ***
-
-**** Set Control ****
-!!! Device dependent, can be different on different models !!!
-OP= 40 <idx> <val> [<val>]
-LEN= 2 or 3
-00 Idx
- Idx 00 Set dead zone (0..2048)
- Idx 01 Ignore Deadman sensor (0..1)
- Idx 02 Enable comm watchdog (0..1)
- Idx 03 Set the strength of the spring (0..100)
- Idx 04 Enable or disable the spring (0/1)
- Idx 05 Set axis saturation threshold (0..2048)
-
-**** Set Effect State ****
-OP= 42 <val>
-LEN= 1
-00 State
- Bit 3 Pause force feedback
- Bit 2 Enable force feedback
- Bit 0 Stop all effects
-
-**** Set overall gain ****
-OP= 43 <val>
-LEN= 1
-00 Gain
- Val 00 = 0%
- Val 40 = 50%
- Val 80 = 100%
-
-** Parameter memory **
-
-Each device has a certain amount of memory to store parameters of effects.
-The amount of RAM may vary, I encountered values from 200 to 1000 bytes. Below
-is the amount of memory apparently needed for every set of parameters:
- - period : 0c
- - magnitude : 02
- - attack and fade : 0e
- - interactive : 08
-
-** Appendix: How to study the protocol ? **
-
-1. Generate effects using the force editor provided with the DirectX SDK, or use Immersion Studio (freely available at their web site in the developer section: www.immersion.com)
-2. Start a soft spying RS232 or USB (depending on where you connected your joystick/wheel). I used ComPortSpy from fCoder (alpha version!)
-3. Play the effect, and watch what happens on the spy screen.
-
-A few words about ComPortSpy:
-At first glance, this soft seems, hum, well... buggy. In fact, data appear with a few seconds latency. Personnaly, I restart it every time I play an effect.
-Remember it's free (as in free beer) and alpha!
-
-** URLS **
-Check www.immerse.com for Immersion Studio, and www.fcoder.com for ComPortSpy.
-
-** Author of this document **
-Johann Deneux <deneux@ifrance.com>
-Home page at http://www.esil.univ-mrs.fr/~jdeneux/projects/ff/
-
-Additions by Vojtech Pavlik.
-
-I-Force is trademark of Immersion Corp.
+** Introduction +This document describes what I managed to discover about the protocol used to +specify force effects to I-Force 2.0 devices. None of this information comes +from Immerse. That's why you should not trust what is written in this +document. This document is intended to help understanding the protocol. +This is not a reference. Comments and corrections are welcome. To contact me, +send an email to: deneux@ifrance.com + +** WARNING ** +I may not be held responsible for any dammage or harm caused if you try to +send data to your I-Force device based on what you read in this document. + +** Preliminary Notes: +All values are hexadecimal with big-endian encoding (msb on the left). Beware, +values inside packets are encoded using little-endian. Bytes whose roles are +unknown are marked ??? Information that needs deeper inspection is marked (?) + +** General form of a packet ** +This is how packets look when the device uses the rs232 to communicate. +2B OP LEN DATA CS +CS is the checksum. It is equal to the exclusive or of all bytes. + +When using USB: +OP DATA +The 2B, LEN and CS fields have disappeared, probably because USB handles frames and +data corruption is handled or unsignificant. + +First, I describe effects that are sent by the device to the computer + +** Device input state +This packet is used to indicate the state of each button and the value of each +axis +OP= 01 for a joystick, 03 for a wheel +LEN= Varies from device to device +00 X-Axis lsb +01 X-Axis msb +02 Y-Axis lsb, or gas pedal for a wheel +03 Y-Axis msb, or brake pedal for a wheel +04 Throttle +05 Buttons +06 Lower 4 bits: Buttons + Upper 4 bits: Hat +07 Rudder + +** Device effects states +OP= 02 +LEN= Varies +00 ? Bit 1 (Value 2) is the value of the deadman switch +01 Bit 8 is set if the effect is playing. Bits 0 to 7 are the effect id. +02 ?? +03 Address of parameter block changed (lsb) +04 Address of parameter block changed (msb) +05 Address of second parameter block changed (lsb) +... depending on the number of parameter blocks updated + +** Force effect ** +OP= 01 +LEN= 0e +00 Channel (when playing several effects at the same time, each must be assigned a channel) +01 Wave form + Val 00 Constant + Val 20 Square + Val 21 Triangle + Val 22 Sine + Val 23 Sawtooth up + Val 24 Sawtooth down + Val 40 Spring (Force = f(pos)) + Val 41 Friction (Force = f(velocity)) and Inertia (Force = f(acceleration)) + + +02 Axes affected and trigger + Bits 4-7: Val 2 = effect along one axis. Byte 05 indicates direction + Val 4 = X axis only. Byte 05 must contain 5a + Val 8 = Y axis only. Byte 05 must contain b4 + Val c = X and Y axes. Bytes 05 must contain 60 + Bits 0-3: Val 0 = No trigger + Val x+1 = Button x triggers the effect + When the whole byte is 0, cancel the previously set trigger + +03-04 Duration of effect (little endian encoding, in ms) + +05 Direction of effect, if applicable. Else, see 02 for value to assign. + +06-07 Minimum time between triggering. + +08-09 Address of periodicity or magnitude parameters +0a-0b Address of attack and fade parameters, or ffff if none. +*or* +08-09 Address of interactive parameters for X-axis, or ffff if not applicable +0a-0b Address of interactive parameters for Y-axis, or ffff if not applicable + +0c-0d Delay before execution of effect (little endian encoding, in ms) + + +** Time based parameters ** + +*** Attack and fade *** +OP= 02 +LEN= 08 +00-01 Address where to store the parameteres +02-03 Duration of attack (little endian encoding, in ms) +04 Level at end of attack. Signed byte. +05-06 Duration of fade. +07 Level at end of fade. + +*** Magnitude *** +OP= 03 +LEN= 03 +00-01 Address +02 Level. Signed byte. + +*** Periodicity *** +OP= 04 +LEN= 07 +00-01 Address +02 Magnitude. Signed byte. +03 Offset. Signed byte. +04 Phase. Val 00 = 0 deg, Val 40 = 90 degs. +05-06 Period (little endian encoding, in ms) + +** Interactive parameters ** +OP= 05 +LEN= 0a +00-01 Address +02 Positive Coeff +03 Negative Coeff +04+05 Offset (center) +06+07 Dead band (Val 01F4 = 5000 (decimal)) +08 Positive saturation (Val 0a = 1000 (decimal) Val 64 = 10000 (decimal)) +09 Negative saturation + +The encoding is a bit funny here: For coeffs, these are signed values. The +maximum value is 64 (100 decimal), the min is 9c. +For the offset, the minimum value is FE0C, the maximum value is 01F4. +For the deadband, the minimum value is 0, the max is 03E8. + +** Controls ** +OP= 41 +LEN= 03 +00 Channel +01 Start/Stop + Val 00: Stop + Val 01: Start and play once. + Val 41: Start and play n times (See byte 02 below) +02 Number of iterations n. + +** Init ** + +*** Querying features *** +OP= ff +Query command. Length varies according to the query type. +The general format of this packet is: +ff 01 QUERY [INDEX] CHECKSUM +reponses are of the same form: +FF LEN QUERY VALUE_QUERIED CHECKSUM2 +where LEN = 1 + length(VALUE_QUERIED) + +**** Query ram size **** +QUERY = 42 ('B'uffer size) +The device should reply with the same packet plus two additionnal bytes +containing the size of the memory: +ff 03 42 03 e8 CS would mean that the device has 1000 bytes of ram available. + +**** Query number of effects **** +QUERY = 4e ('N'umber of effects) +The device should respond by sending the number of effects that can be played +at the same time (one byte) +ff 02 4e 14 CS would stand for 20 effects. + +**** Vendor's id **** +QUERY = 4d ('M'anufacturer) +Query the vendors'id (2 bytes) + +**** Product id ***** +QUERY = 50 ('P'roduct) +Query the product id (2 bytes) + +**** Open device **** +QUERY = 4f ('O'pen) +No data returned. + +**** Close device ***** +QUERY = 43 ('C')lose +No data returned. + +**** Query effect **** +QUERY = 45 ('E') +Send effect type. +Returns nonzero if supported (2 bytes) + +**** Firmware Version **** +QUERY = 56 ('V'ersion) +Sends back 3 bytes - major, minor, subminor + +*** Initialisation of the device *** + +**** Set Control **** +!!! Device dependent, can be different on different models !!! +OP= 40 <idx> <val> [<val>] +LEN= 2 or 3 +00 Idx + Idx 00 Set dead zone (0..2048) + Idx 01 Ignore Deadman sensor (0..1) + Idx 02 Enable comm watchdog (0..1) + Idx 03 Set the strength of the spring (0..100) + Idx 04 Enable or disable the spring (0/1) + Idx 05 Set axis saturation threshold (0..2048) + +**** Set Effect State **** +OP= 42 <val> +LEN= 1 +00 State + Bit 3 Pause force feedback + Bit 2 Enable force feedback + Bit 0 Stop all effects + +**** Set overall gain **** +OP= 43 <val> +LEN= 1 +00 Gain + Val 00 = 0% + Val 40 = 50% + Val 80 = 100% + +** Parameter memory ** + +Each device has a certain amount of memory to store parameters of effects. +The amount of RAM may vary, I encountered values from 200 to 1000 bytes. Below +is the amount of memory apparently needed for every set of parameters: + - period : 0c + - magnitude : 02 + - attack and fade : 0e + - interactive : 08 + +** Appendix: How to study the protocol ? ** + +1. Generate effects using the force editor provided with the DirectX SDK, or use Immersion Studio (freely available at their web site in the developer section: www.immersion.com) +2. Start a soft spying RS232 or USB (depending on where you connected your joystick/wheel). I used ComPortSpy from fCoder (alpha version!) +3. Play the effect, and watch what happens on the spy screen. + +A few words about ComPortSpy: +At first glance, this soft seems, hum, well... buggy. In fact, data appear with a few seconds latency. Personnaly, I restart it every time I play an effect. +Remember it's free (as in free beer) and alpha! + +** URLS ** +Check www.immerse.com for Immersion Studio, and www.fcoder.com for ComPortSpy. + +** Author of this document ** +Johann Deneux <deneux@ifrance.com> +Home page at http://www.esil.univ-mrs.fr/~jdeneux/projects/ff/ + +Additions by Vojtech Pavlik. + +I-Force is trademark of Immersion Corp. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 586b6f85d4e0..4d175c751246 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1,5 +1,5 @@ - Kernel Parameters - ~~~~~~~~~~~~~~~~~ + Kernel Parameters + ~~~~~~~~~~~~~~~~~ The following is a consolidated list of the kernel parameters as implemented (mostly) by the __setup() macro and sorted into English Dictionary order @@ -1462,7 +1462,7 @@ and is between 256 and 4096 characters. It is defined in the file reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode Format: <reboot_mode>[,<reboot_mode2>[,...]] - See arch/*/kernel/reboot.c or arch/*/kernel/process.c + See arch/*/kernel/reboot.c or arch/*/kernel/process.c reserve= [KNL,BUGS] Force the kernel to ignore some iomem area @@ -1550,12 +1550,12 @@ and is between 256 and 4096 characters. It is defined in the file selinux_compat_net = [SELINUX] Set initial selinux_compat_net flag value. - Format: { "0" | "1" } - 0 -- use new secmark-based packet controls - 1 -- use legacy packet controls - Default value is 0 (preferred). - Value can be changed at runtime via - /selinux/compat_net. + Format: { "0" | "1" } + 0 -- use new secmark-based packet controls + 1 -- use legacy packet controls + Default value is 0 (preferred). + Value can be changed at runtime via + /selinux/compat_net. serialnumber [BUGS=X86-32] @@ -1954,7 +1954,7 @@ and is between 256 and 4096 characters. It is defined in the file norandmaps Don't use address space randomization Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space - unwind_debug=N N > 0 will enable dwarf2 unwinder debugging + unwind_debug=N N > 0 will enable dwarf2 unwinder debugging This is useful to get more information why you got a "dwarf2 unwinder stuck" diff --git a/MAINTAINERS b/MAINTAINERS index 9c54a5ef0ba7..9a91d9e3f1f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2622,8 +2622,8 @@ P: Harald Welte P: Jozsef Kadlecsik P: Patrick McHardy M: kaber@trash.net -L: netfilter-devel@lists.netfilter.org -L: netfilter@lists.netfilter.org (subscribers-only) +L: netfilter-devel@vger.kernel.org +L: netfilter@vger.kernel.org L: coreteam@netfilter.org W: http://www.netfilter.org/ W: http://www.iptables.org/ @@ -2676,7 +2676,7 @@ M: jmorris@namei.org P: Hideaki YOSHIFUJI M: yoshfuji@linux-ipv6.org P: Patrick McHardy -M: kaber@coreworks.de +M: kaber@trash.net L: netdev@vger.kernel.org T: git kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6.git S: Maintained @@ -1,8 +1,8 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 23 -EXTRAVERSION =-rc6 -NAME = Pink Farting Weasel +EXTRAVERSION =-rc7 +NAME = Arr Matey! A Hairy Bilge Rat! # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff --git a/arch/i386/boot/header.S b/arch/i386/boot/header.S index 7f4a2c53bd76..f3140e596d40 100644 --- a/arch/i386/boot/header.S +++ b/arch/i386/boot/header.S @@ -275,7 +275,7 @@ die: hlt jmp die - .size die, .-due + .size die, .-die .section ".initdata", "a" setup_corrupt: diff --git a/arch/i386/boot/video.c b/arch/i386/boot/video.c index 693f20d3102e..e4ba897bf9a3 100644 --- a/arch/i386/boot/video.c +++ b/arch/i386/boot/video.c @@ -147,7 +147,7 @@ int mode_defined(u16 mode) } /* Set mode (without recalc) */ -static int raw_set_mode(u16 mode) +static int raw_set_mode(u16 mode, u16 *real_mode) { int nmode, i; struct card_info *card; @@ -165,8 +165,10 @@ static int raw_set_mode(u16 mode) if ((mode == nmode && visible) || mode == mi->mode || - mode == (mi->y << 8)+mi->x) + mode == (mi->y << 8)+mi->x) { + *real_mode = mi->mode; return card->set_mode(mi); + } if (visible) nmode++; @@ -178,7 +180,7 @@ static int raw_set_mode(u16 mode) if (mode >= card->xmode_first && mode < card->xmode_first+card->xmode_n) { struct mode_info mix; - mix.mode = mode; + *real_mode = mix.mode = mode; mix.x = mix.y = 0; return card->set_mode(&mix); } @@ -223,6 +225,7 @@ static void vga_recalc_vertical(void) static int set_mode(u16 mode) { int rv; + u16 real_mode; /* Very special mode numbers... */ if (mode == VIDEO_CURRENT_MODE) @@ -232,13 +235,16 @@ static int set_mode(u16 mode) else if (mode == EXTENDED_VGA) mode = VIDEO_8POINT; - rv = raw_set_mode(mode); + rv = raw_set_mode(mode, &real_mode); if (rv) return rv; if (mode & VIDEO_RECALC) vga_recalc_vertical(); + /* Save the canonical mode number for the kernel, not + an alias, size specification or menu position */ + boot_params.hdr.vid_mode = real_mode; return 0; } diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index ed0a0f2c1597..f22ba8534d26 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -151,51 +151,30 @@ bogus_real_magic: #define VIDEO_FIRST_V7 0x0900 # Setting of user mode (AX=mode ID) => CF=success + +# For now, we only handle VESA modes (0x0200..0x03ff). To handle other +# modes, we should probably compile in the video code from the boot +# directory. mode_set: movw %ax, %bx -#if 0 - cmpb $0xff, %ah - jz setalias - - testb $VIDEO_RECALC>>8, %ah - jnz _setrec - - cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah - jnc setres - - cmpb $VIDEO_FIRST_SPECIAL>>8, %ah - jz setspc - - cmpb $VIDEO_FIRST_V7>>8, %ah - jz setv7 -#endif - - cmpb $VIDEO_FIRST_VESA>>8, %ah - jnc check_vesa -#if 0 - orb %ah, %ah - jz setmenu -#endif - - decb %ah -# jz setbios Add bios modes later + subb $VIDEO_FIRST_VESA>>8, %bh + cmpb $2, %bh + jb check_vesa -setbad: clc +setbad: + clc ret check_vesa: - subb $VIDEO_FIRST_VESA>>8, %bh orw $0x4000, %bx # Use linear frame buffer movw $0x4f02, %ax # VESA BIOS mode set call int $0x10 cmpw $0x004f, %ax # AL=4f if implemented - jnz _setbad # AH=0 if OK + jnz setbad # AH=0 if OK stc ret -_setbad: jmp setbad - .code32 ALIGN diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index f0c37511d8da..f01bfcd4bdee 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c @@ -623,8 +623,8 @@ static unsigned long xen_read_cr2_direct(void) static void xen_write_cr4(unsigned long cr4) { - /* never allow TSC to be disabled */ - native_write_cr4(cr4 & ~X86_CR4_TSD); + /* Just ignore cr4 changes; Xen doesn't allow us to do + anything anyway. */ } static unsigned long xen_read_cr3(void) diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index b8a5e75ba0ab..3e634f2f5443 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -55,7 +55,7 @@ EXPORT_SYMBOL(dec_kn_slot_size); int dec_tc_bus; -spinlock_t ioasic_ssr_lock; +DEFINE_SPINLOCK(ioasic_ssr_lock); volatile u32 *ioasic_base; diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c index ac04f0adc408..6648fde20b96 100644 --- a/arch/mips/kernel/cpu-bugs64.c +++ b/arch/mips/kernel/cpu-bugs64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003, 2004 Maciej W. Rozycki + * Copyright (C) 2003, 2004, 2007 Maciej W. Rozycki * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -29,7 +29,7 @@ static inline void align_mod(const int align, const int mod) ".endr\n\t" ".set pop" : - : "rn" (align), "rn" (mod)); + : GCC_IMM_ASM (align), GCC_IMM_ASM (mod)); } static inline void mult_sh_align_mod(long *v1, long *v2, long *w, diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 727a6699f2f4..c627cf86d1e3 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -239,7 +239,7 @@ static void snapshot_tb_and_purr(void *data) struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); local_irq_save(flags); - p->tb = mftb(); + p->tb = get_tb_or_rtc(); p->purr = mfspr(SPRN_PURR); wmb(); p->initialized = 1; @@ -317,7 +317,7 @@ static void snapshot_purr(void) */ void snapshot_timebase(void) { - __get_cpu_var(last_jiffy) = get_tb(); + __get_cpu_var(last_jiffy) = get_tb_or_rtc(); snapshot_purr(); } @@ -684,6 +684,8 @@ void timer_interrupt(struct pt_regs * regs) write_seqlock(&xtime_lock); tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy; + if (__USE_RTC() && tb_next_jiffy >= 1000000000) + tb_next_jiffy -= 1000000000; if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) { tb_last_jiffy = tb_next_jiffy; do_timer(1); @@ -977,7 +979,7 @@ void __init time_init(void) tb_to_ns_scale = scale; tb_to_ns_shift = shift; /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ - boot_tb = get_tb(); + boot_tb = get_tb_or_rtc(); tm = get_boot_time(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index cef01e4e8989..213fa31ac537 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -98,6 +98,18 @@ static struct vdso_patch_def vdso_patches[] = { CPU_FTR_USE_TB, 0, "__kernel_gettimeofday", NULL }, + { + CPU_FTR_USE_TB, 0, + "__kernel_clock_gettime", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_clock_getres", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_get_tbfreq", NULL + }, }; /* diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index c784edd40ea7..5bebe7fbe056 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -579,7 +579,7 @@ static struct spu *find_victim(struct spu_context *ctx) list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; - if (tmp->prio > ctx->prio && + if (tmp && tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } @@ -611,9 +611,9 @@ static struct spu *find_victim(struct spu_context *ctx) mutex_lock(&cbe_spu_info[node].list_mutex); cbe_spu_info[node].nr_active--; + spu_unbind_context(spu, victim); mutex_unlock(&cbe_spu_info[node].list_mutex); - spu_unbind_context(spu, victim); victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; mutex_unlock(&victim->state_mutex); diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 8d7f7c1cb9c6..6c2be26f1d7d 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -117,7 +117,7 @@ extern void sigio_handler(int sig, union uml_pt_regs *regs); extern void copy_sc(union uml_pt_regs *regs, void *from); -unsigned long to_irq_stack(int sig, unsigned long *mask_out); +extern unsigned long to_irq_stack(unsigned long *mask_out); unsigned long from_irq_stack(int nested); #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 9870febdbead..cf0dd9cf8c43 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -518,13 +518,13 @@ int init_aio_irq(int irq, char *name, irq_handler_t handler) static unsigned long pending_mask; -unsigned long to_irq_stack(int sig, unsigned long *mask_out) +unsigned long to_irq_stack(unsigned long *mask_out) { struct thread_info *ti; unsigned long mask, old; int nested; - mask = xchg(&pending_mask, 1 << sig); + mask = xchg(&pending_mask, *mask_out); if(mask != 0){ /* If any interrupts come in at this point, we want to * make sure that their bits aren't lost by our @@ -534,7 +534,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out) * and pending_mask contains a bit for each interrupt * that came in. */ - old = 1 << sig; + old = *mask_out; do { old |= mask; mask = xchg(&pending_mask, old); @@ -550,6 +550,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out) task = cpu_tasks[ti->cpu].task; tti = task_thread_info(task); + *ti = *tti; ti->real_thread = tti; task->stack = ti; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 6f92f732d253..c3ecc2a84e0c 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -320,7 +320,8 @@ int os_file_size(char *file, unsigned long long *size_out) } if(S_ISBLK(buf.ust_mode)){ - int fd, blocks; + int fd; + long blocks; fd = os_open_file(file, of_read(OPENFLAGS()), 0); if(fd < 0){ diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 18e5c8b67eb8..b98f7ea2d2f6 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -119,7 +119,7 @@ void (*handlers[_NSIG])(int sig, struct sigcontext *sc); void handle_signal(int sig, struct sigcontext *sc) { - unsigned long pending = 0; + unsigned long pending = 1UL << sig; do { int nested, bail; @@ -134,7 +134,7 @@ void handle_signal(int sig, struct sigcontext *sc) * have to return, and the upper handler will deal * with this interrupt. */ - bail = to_irq_stack(sig, &pending); + bail = to_irq_stack(&pending); if(bail) return; diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S index 13f1480cbec9..a06f2bcabef9 100644 --- a/arch/x86_64/kernel/acpi/wakeup.S +++ b/arch/x86_64/kernel/acpi/wakeup.S @@ -81,7 +81,7 @@ wakeup_code: testl $2, realmode_flags - wakeup_code jz 1f mov video_mode - wakeup_code, %ax - call mode_seta + call mode_set 1: movw $0xb800, %ax @@ -291,52 +291,31 @@ no_longmode: #define VIDEO_FIRST_V7 0x0900 # Setting of user mode (AX=mode ID) => CF=success + +# For now, we only handle VESA modes (0x0200..0x03ff). To handle other +# modes, we should probably compile in the video code from the boot +# directory. .code16 -mode_seta: +mode_set: movw %ax, %bx -#if 0 - cmpb $0xff, %ah - jz setalias - - testb $VIDEO_RECALC>>8, %ah - jnz _setrec - - cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah - jnc setres - - cmpb $VIDEO_FIRST_SPECIAL>>8, %ah - jz setspc - - cmpb $VIDEO_FIRST_V7>>8, %ah - jz setv7 -#endif - - cmpb $VIDEO_FIRST_VESA>>8, %ah - jnc check_vesaa -#if 0 - orb %ah, %ah - jz setmenu -#endif - - decb %ah -# jz setbios Add bios modes later + subb $VIDEO_FIRST_VESA>>8, %bh + cmpb $2, %bh + jb check_vesa -setbada: clc +setbad: + clc ret -check_vesaa: - subb $VIDEO_FIRST_VESA>>8, %bh +check_vesa: orw $0x4000, %bx # Use linear frame buffer movw $0x4f02, %ax # VESA BIOS mode set call int $0x10 cmpw $0x004f, %ax # AL=4f if implemented - jnz _setbada # AH=0 if OK + jnz setbad # AH=0 if OK stc ret -_setbada: jmp setbada - wakeup_stack_begin: # Stack grows down .org 0xff0 diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 327c9f2fa626..54816adb8e93 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -374,6 +374,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; + /* + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. + */ + if (user_mode_vm(regs)) + error_code |= PF_USER; + again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 06f212ff2b4f..c16820325d7b 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -418,10 +418,12 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* ATI */ { PCI_VDEVICE(ATI, 0x4380), board_ahci_sb600 }, /* ATI SB600 */ - { PCI_VDEVICE(ATI, 0x4390), board_ahci_sb600 }, /* ATI SB700 IDE */ - { PCI_VDEVICE(ATI, 0x4391), board_ahci_sb600 }, /* ATI SB700 AHCI */ - { PCI_VDEVICE(ATI, 0x4392), board_ahci_sb600 }, /* ATI SB700 nraid5 */ - { PCI_VDEVICE(ATI, 0x4393), board_ahci_sb600 }, /* ATI SB700 raid5 */ + { PCI_VDEVICE(ATI, 0x4390), board_ahci_sb600 }, /* ATI SB700/800 */ + { PCI_VDEVICE(ATI, 0x4391), board_ahci_sb600 }, /* ATI SB700/800 */ + { PCI_VDEVICE(ATI, 0x4392), board_ahci_sb600 }, /* ATI SB700/800 */ + { PCI_VDEVICE(ATI, 0x4393), board_ahci_sb600 }, /* ATI SB700/800 */ + { PCI_VDEVICE(ATI, 0x4394), board_ahci_sb600 }, /* ATI SB700/800 */ + { PCI_VDEVICE(ATI, 0x4395), board_ahci_sb600 }, /* ATI SB700/800 */ /* VIA */ { PCI_VDEVICE(VIA, 0x3349), board_ahci_vt8251 }, /* VIA VT8251 */ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c43de9a710db..772be09b4689 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3778,6 +3778,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Maxtor 6L250S0", "BANC1G10", ATA_HORKAGE_NONCQ }, { "Maxtor 6B200M0", "BANC1BM0", ATA_HORKAGE_NONCQ }, { "Maxtor 6B200M0", "BANC1B10", ATA_HORKAGE_NONCQ }, + { "Maxtor 7B250S0", "BANC1B70", ATA_HORKAGE_NONCQ, }, + { "Maxtor 7B300S0", "BANC1B70", ATA_HORKAGE_NONCQ }, + { "Maxtor 7V300F0", "VA111630", ATA_HORKAGE_NONCQ }, { "HITACHI HDS7250SASUN500G 0621KTAWSD", "K2AOAJ0AHITACHI", ATA_HORKAGE_NONCQ }, /* NCQ hard hangs device under heavier load, needs hard power cycle */ @@ -3794,6 +3797,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ, }, { "FUJITSU MHV2080BH", "00840028", ATA_HORKAGE_NONCQ, }, { "ST9160821AS", "3.CLF", ATA_HORKAGE_NONCQ, }, + { "ST3160812AS", "3.AD", ATA_HORKAGE_NONCQ, }, { "SAMSUNG HD401LJ", "ZZ100-15", ATA_HORKAGE_NONCQ, }, /* devices which puke on READ_NATIVE_MAX */ diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 1cce2198baaf..8023167bbbeb 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -297,7 +297,7 @@ void ata_bmdma_start (struct ata_queued_cmd *qc) dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD); iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD); - /* Strictly, one may wish to issue a readb() here, to + /* Strictly, one may wish to issue an ioread8() here, to * flush the mmio write. However, control also passes * to the hardware at this point, and it will interrupt * us when we are to resume control. So, in effect, @@ -307,6 +307,9 @@ void ata_bmdma_start (struct ata_queued_cmd *qc) * is expected, so I think it is best to not add a readb() * without first all the MMIO ATA cards/mobos. * Or maybe I'm just being paranoid. + * + * FIXME: The posting of this write means I/O starts are + * unneccessarily delayed for MMIO */ } diff --git a/drivers/base/core.c b/drivers/base/core.c index e6738bcbe5a9..6de33d7a29ba 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -679,14 +679,26 @@ static int device_add_class_symlinks(struct device *dev) goto out_subsys; } if (dev->parent) { - error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, - "device"); - if (error) - goto out_busid; #ifdef CONFIG_SYSFS_DEPRECATED { - char * class_name = make_class_name(dev->class->name, - &dev->kobj); + struct device *parent = dev->parent; + char *class_name; + + /* + * In old sysfs stacked class devices had 'device' + * link pointing to real device instead of parent + */ + while (parent->class && !parent->bus && parent->parent) + parent = parent->parent; + + error = sysfs_create_link(&dev->kobj, + &parent->kobj, + "device"); + if (error) + goto out_busid; + + class_name = make_class_name(dev->class->name, + &dev->kobj); if (class_name) error = sysfs_create_link(&dev->parent->kobj, &dev->kobj, class_name); @@ -694,6 +706,11 @@ static int device_add_class_symlinks(struct device *dev) if (error) goto out_device; } +#else + error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, + "device"); + if (error) + goto out_busid; #endif } return 0; diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 504a95d888b2..84d6aa500e26 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -31,6 +31,7 @@ #include <linux/genhd.h> #include <linux/hdreg.h> #include <linux/blkpg.h> +#include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/mm.h> diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 35ab1a9f8e8b..8955e7ff759a 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -176,7 +176,7 @@ struct agp_bridge_data { #define I830_GMCH_MEM_MASK 0x1 #define I830_GMCH_MEM_64M 0x1 #define I830_GMCH_MEM_128M 0 -#define I830_GMCH_GMS_MASK 0xF0 +#define I830_GMCH_GMS_MASK 0x70 #define I830_GMCH_GMS_DISABLED 0x00 #define I830_GMCH_GMS_LOCAL 0x10 #define I830_GMCH_GMS_STOLEN_512 0x20 @@ -190,6 +190,7 @@ struct agp_bridge_data { #define INTEL_I830_ERRSTS 0x92 /* Intel 855GM/852GM registers */ +#define I855_GMCH_GMS_MASK 0xF0 #define I855_GMCH_GMS_STOLEN_0M 0x0 #define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) #define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 7c69bf259caa..a5d0e95a227a 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -511,7 +511,7 @@ static void intel_i830_init_gtt_entries(void) */ if (IS_G33) size = 0; - switch (gmch_ctrl & I830_GMCH_GMS_MASK) { + switch (gmch_ctrl & I855_GMCH_GMS_MASK) { case I855_GMCH_GMS_STOLEN_1M: gtt_entries = MB(1) - KB(size); break; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 9b07f7851061..dd441ff4af56 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2215,7 +2215,8 @@ static int ipmi_pci_resume(struct pci_dev *pdev) static struct pci_device_id ipmi_pci_devices[] = { { PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) }, - { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) } + { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ipmi_pci_devices); diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index c08a4152ee8f..049a46cc9f87 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -67,7 +67,7 @@ /* * Page types allocated by the device. */ -enum { +enum mspec_page_type { MSPEC_FETCHOP = 1, MSPEC_CACHED, MSPEC_UNCACHED @@ -83,15 +83,25 @@ static int is_sn2; * One of these structures is allocated when an mspec region is mmaped. The * structure is pointed to by the vma->vm_private_data field in the vma struct. * This structure is used to record the addresses of the mspec pages. + * This structure is shared by all vma's that are split off from the + * original vma when split_vma()'s are done. + * + * The refcnt is incremented atomically because mm->mmap_sem does not + * protect in fork case where multiple tasks share the vma_data. */ struct vma_data { atomic_t refcnt; /* Number of vmas sharing the data. */ - spinlock_t lock; /* Serialize access to the vma. */ + spinlock_t lock; /* Serialize access to this structure. */ int count; /* Number of pages allocated. */ - int type; /* Type of pages allocated. */ + enum mspec_page_type type; /* Type of pages allocated. */ + int flags; /* See VMD_xxx below. */ + unsigned long vm_start; /* Original (unsplit) base. */ + unsigned long vm_end; /* Original (unsplit) end. */ unsigned long maddr[0]; /* Array of MSPEC addresses. */ }; +#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */ + /* used on shub2 to clear FOP cache in the HUB */ static unsigned long scratch_page[MAX_NUMNODES]; #define SH2_AMO_CACHE_ENTRIES 4 @@ -129,8 +139,8 @@ mspec_zero_block(unsigned long addr, int len) * mspec_open * * Called when a device mapping is created by a means other than mmap - * (via fork, etc.). Increments the reference count on the underlying - * mspec data so it is not freed prematurely. + * (via fork, munmap, etc.). Increments the reference count on the + * underlying mspec data so it is not freed prematurely. */ static void mspec_open(struct vm_area_struct *vma) @@ -151,34 +161,44 @@ static void mspec_close(struct vm_area_struct *vma) { struct vma_data *vdata; - int i, pages, result, vdata_size; + int index, last_index, result; + unsigned long my_page; vdata = vma->vm_private_data; - if (!atomic_dec_and_test(&vdata->refcnt)) - return; - pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - vdata_size = sizeof(struct vma_data) + pages * sizeof(long); - for (i = 0; i < pages; i++) { - if (vdata->maddr[i] == 0) + BUG_ON(vma->vm_start < vdata->vm_start || vma->vm_end > vdata->vm_end); + + spin_lock(&vdata->lock); + index = (vma->vm_start - vdata->vm_start) >> PAGE_SHIFT; + last_index = (vma->vm_end - vdata->vm_start) >> PAGE_SHIFT; + for (; index < last_index; index++) { + if (vdata->maddr[index] == 0) continue; /* * Clear the page before sticking it back * into the pool. */ - result = mspec_zero_block(vdata->maddr[i], PAGE_SIZE); + my_page = vdata->maddr[index]; + vdata->maddr[index] = 0; + spin_unlock(&vdata->lock); + result = mspec_zero_block(my_page, PAGE_SIZE); if (!result) - uncached_free_page(vdata->maddr[i]); + uncached_free_page(my_page); else printk(KERN_WARNING "mspec_close(): " "failed to zero page %i\n", result); + spin_lock(&vdata->lock); } + spin_unlock(&vdata->lock); - if (vdata_size <= PAGE_SIZE) - kfree(vdata); - else + if (!atomic_dec_and_test(&vdata->refcnt)) + return; + + if (vdata->flags & VMD_VMALLOCED) vfree(vdata); + else + kfree(vdata); } @@ -195,7 +215,8 @@ mspec_nopfn(struct vm_area_struct *vma, unsigned long address) int index; struct vma_data *vdata = vma->vm_private_data; - index = (address - vma->vm_start) >> PAGE_SHIFT; + BUG_ON(address < vdata->vm_start || address >= vdata->vm_end); + index = (address - vdata->vm_start) >> PAGE_SHIFT; maddr = (volatile unsigned long) vdata->maddr[index]; if (maddr == 0) { maddr = uncached_alloc_page(numa_node_id()); @@ -237,10 +258,11 @@ static struct vm_operations_struct mspec_vm_ops = { * underlying pages. */ static int -mspec_mmap(struct file *file, struct vm_area_struct *vma, int type) +mspec_mmap(struct file *file, struct vm_area_struct *vma, + enum mspec_page_type type) { struct vma_data *vdata; - int pages, vdata_size; + int pages, vdata_size, flags = 0; if (vma->vm_pgoff != 0) return -EINVAL; @@ -255,12 +277,17 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, int type) vdata_size = sizeof(struct vma_data) + pages * sizeof(long); if (vdata_size <= PAGE_SIZE) vdata = kmalloc(vdata_size, GFP_KERNEL); - else + else { vdata = vmalloc(vdata_size); + flags = VMD_VMALLOCED; + } if (!vdata) return -ENOMEM; memset(vdata, 0, vdata_size); + vdata->vm_start = vma->vm_start; + vdata->vm_end = vma->vm_end; + vdata->flags = flags; vdata->type = type; spin_lock_init(&vdata->lock); vdata->refcnt = ATOMIC_INIT(1); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index eba1adbc1b6a..4754769eda97 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -487,6 +487,7 @@ static inline int idedisk_supports_lba48(const struct hd_driveid *id) */ static const struct drive_list_entry hpa_list[] = { { "ST340823A", NULL }, + { "ST320413A", NULL }, { NULL, NULL } }; diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 4b13cd9a027d..f19eb6daeefd 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1802,9 +1802,7 @@ pmac_ide_dma_check(ide_drive_t *drive) { struct hd_driveid *id = drive->id; ide_hwif_t *hwif = HWIF(drive); - pmac_ide_hwif_t* pmif = (pmac_ide_hwif_t *)hwif->hwif_data; int enable = 1; - int map; drive->using_dma = 0; if (drive->media == ide_floppy) diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index ee45259573c8..98fd985a32ff 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -1273,7 +1273,7 @@ static void __exit ieee1394_cleanup(void) unregister_chrdev_region(IEEE1394_CORE_DEV, 256); } -fs_initcall(ieee1394_init); /* same as ohci1394 */ +module_init(ieee1394_init); module_exit(ieee1394_cleanup); /* Exported symbols */ diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index 5667c8102efc..372c5c16eb31 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -3537,7 +3537,5 @@ static int __init ohci1394_init(void) return pci_register_driver(&ohci1394_pci_driver); } -/* Register before most other device drivers. - * Useful for remote debugging via physical DMA, e.g. using firescope. */ -fs_initcall(ohci1394_init); +module_init(ohci1394_init); module_exit(ohci1394_cleanup); diff --git a/drivers/media/video/usbvision/usbvision-cards.c b/drivers/media/video/usbvision/usbvision-cards.c index 380564cd3317..f09eb102731b 100644 --- a/drivers/media/video/usbvision/usbvision-cards.c +++ b/drivers/media/video/usbvision/usbvision-cards.c @@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] = { { USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL }, { USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM }, { USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV }, + { }, /* terminate list */ }; MODULE_DEVICE_TABLE (usb, usbvision_table); diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index cff969d05d4a..6f32a35eb106 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -816,7 +816,8 @@ static void __devexit cafe_nand_remove(struct pci_dev *pdev) } static struct pci_device_id cafe_nand_tbl[] = { - { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 } + { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 }, + { 0, } }; MODULE_DEVICE_TABLE(pci, cafe_nand_tbl); diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 854d80c330ec..66eed22cbd21 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -54,8 +54,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.6.4" -#define DRV_MODULE_RELDATE "August 3, 2007" +#define DRV_MODULE_VERSION "1.6.5" +#define DRV_MODULE_RELDATE "September 20, 2007" #define RUN_AT(x) (jiffies + (x)) @@ -6727,7 +6727,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) } else if (CHIP_NUM(bp) == CHIP_NUM_5706 || CHIP_NUM(bp) == CHIP_NUM_5708) bp->phy_flags |= PHY_CRC_FIX_FLAG; - else if (CHIP_ID(bp) == CHIP_ID_5709_A0) + else if (CHIP_ID(bp) == CHIP_ID_5709_A0 || + CHIP_ID(bp) == CHIP_ID_5709_A1) bp->phy_flags |= PHY_DIS_EARLY_DAC_FLAG; if ((CHIP_ID(bp) == CHIP_ID_5708_A0) || diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 1c42266bf889..556962f9612d 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -3094,9 +3094,12 @@ static void myri10ge_remove(struct pci_dev *pdev) } #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 0x0008 +#define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9 0x0009 static struct pci_device_id myri10ge_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E)}, + {PCI_DEVICE + (PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9)}, {0}, }; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 0cc4369cacba..cb230f44d6fc 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -409,6 +409,7 @@ int phy_mii_ioctl(struct phy_device *phydev, return 0; } +EXPORT_SYMBOL(phy_mii_ioctl); /** * phy_start_aneg - start auto-negotiation for this PHY device diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 0d7f570b9a54..9b30cd600a64 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -879,8 +879,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) dev->hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, data_len); - if (dev_queue_xmit(skb) < 0) - goto abort; + dev_queue_xmit(skb); return 1; diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 266e8b38fe10..abe91cb595f4 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -491,44 +491,46 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) u16 hdrflags; u16 tunnel_id, session_id; int length; - struct udphdr *uh; + int offset; tunnel = pppol2tp_sock_to_tunnel(sock); if (tunnel == NULL) goto error; + /* UDP always verifies the packet length. */ + __skb_pull(skb, sizeof(struct udphdr)); + /* Short packet? */ - if (skb->len < sizeof(struct udphdr)) { + if (!pskb_may_pull(skb, 12)) { PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); goto error; } /* Point to L2TP header */ - ptr = skb->data + sizeof(struct udphdr); + ptr = skb->data; /* Get L2TP header flags */ hdrflags = ntohs(*(__be16*)ptr); /* Trace packet contents, if enabled */ if (tunnel->debug & PPPOL2TP_MSG_DATA) { + length = min(16u, skb->len); + if (!pskb_may_pull(skb, length)) + goto error; + printk(KERN_DEBUG "%s: recv: ", tunnel->name); - for (length = 0; length < 16; length++) - printk(" %02X", ptr[length]); + offset = 0; + do { + printk(" %02X", ptr[offset]); + } while (++offset < length); + printk("\n"); } /* Get length of L2TP packet */ - uh = (struct udphdr *) skb_transport_header(skb); - length = ntohs(uh->len) - sizeof(struct udphdr); - - /* Too short? */ - if (length < 12) { - PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO, - "%s: recv short L2TP packet (len=%d)\n", tunnel->name, length); - goto error; - } + length = skb->len; /* If type is control packet, it is handled by userspace. */ if (hdrflags & L2TP_HDRFLAG_T) { @@ -606,7 +608,6 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) "%s: recv data has no seq numbers when required. " "Discarding\n", session->name); session->stats.rx_seq_discards++; - session->stats.rx_errors++; goto discard; } @@ -625,7 +626,6 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) "%s: recv data has no seq numbers when required. " "Discarding\n", session->name); session->stats.rx_seq_discards++; - session->stats.rx_errors++; goto discard; } @@ -634,10 +634,14 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) } /* If offset bit set, skip it. */ - if (hdrflags & L2TP_HDRFLAG_O) - ptr += 2 + ntohs(*(__be16 *) ptr); + if (hdrflags & L2TP_HDRFLAG_O) { + offset = ntohs(*(__be16 *)ptr); + skb->transport_header += 2 + offset; + if (!pskb_may_pull(skb, skb_transport_offset(skb) + 2)) + goto discard; + } - skb_pull(skb, ptr - skb->data); + __skb_pull(skb, skb_transport_offset(skb)); /* Skip PPP header, if present. In testing, Microsoft L2TP clients * don't send the PPP header (PPP header compression enabled), but @@ -673,7 +677,6 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) */ if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) { session->stats.rx_seq_discards++; - session->stats.rx_errors++; PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG, "%s: oos pkt %hu len %d discarded, " "waiting for %hu, reorder_q_len=%d\n", @@ -698,6 +701,7 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) return 0; discard: + session->stats.rx_errors++; kfree_skb(skb); sock_put(session->sock); @@ -958,7 +962,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) int data_len = skb->len; struct inet_sock *inet; __wsum csum = 0; - struct sk_buff *skb2 = NULL; struct udphdr *uh; unsigned int len; @@ -989,41 +992,30 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) */ headroom = NET_SKB_PAD + sizeof(struct iphdr) + sizeof(struct udphdr) + hdr_len + sizeof(ppph); - if (skb_headroom(skb) < headroom) { - skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) - goto abort; - } else - skb2 = skb; - - /* Check that the socket has room */ - if (atomic_read(&sk_tun->sk_wmem_alloc) < sk_tun->sk_sndbuf) - skb_set_owner_w(skb2, sk_tun); - else - goto discard; + if (skb_cow_head(skb, headroom)) + goto abort; /* Setup PPP header */ - skb_push(skb2, sizeof(ppph)); - skb2->data[0] = ppph[0]; - skb2->data[1] = ppph[1]; + __skb_push(skb, sizeof(ppph)); + skb->data[0] = ppph[0]; + skb->data[1] = ppph[1]; /* Setup L2TP header */ - skb_push(skb2, hdr_len); - pppol2tp_build_l2tp_header(session, skb2->data); + pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len)); /* Setup UDP header */ inet = inet_sk(sk_tun); - skb_push(skb2, sizeof(struct udphdr)); - skb_reset_transport_header(skb2); - uh = (struct udphdr *) skb2->data; + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); uh->source = inet->sport; uh->dest = inet->dport; uh->len = htons(sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len); uh->check = 0; - /* Calculate UDP checksum if configured to do so */ + /* *BROKEN* Calculate UDP checksum if configured to do so */ if (sk_tun->sk_no_check != UDP_CSUM_NOXMIT) - csum = udp_csum_outgoing(sk_tun, skb2); + csum = udp_csum_outgoing(sk_tun, skb); /* Debug */ if (session->send_seq) @@ -1036,7 +1028,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (session->debug & PPPOL2TP_MSG_DATA) { int i; - unsigned char *datap = skb2->data; + unsigned char *datap = skb->data; printk(KERN_DEBUG "%s: xmit:", session->name); for (i = 0; i < data_len; i++) { @@ -1049,18 +1041,18 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) printk("\n"); } - memset(&(IPCB(skb2)->opt), 0, sizeof(IPCB(skb2)->opt)); - IPCB(skb2)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - nf_reset(skb2); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); + nf_reset(skb); /* Get routing info from the tunnel socket */ - dst_release(skb2->dst); - skb2->dst = sk_dst_get(sk_tun); + dst_release(skb->dst); + skb->dst = sk_dst_get(sk_tun); /* Queue the packet to IP for output */ - len = skb2->len; - rc = ip_queue_xmit(skb2, 1); + len = skb->len; + rc = ip_queue_xmit(skb, 1); /* Update stats */ if (rc >= 0) { @@ -1073,17 +1065,12 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) session->stats.tx_errors++; } - /* Free the original skb */ - kfree_skb(skb); - return 1; -discard: - /* Free the new skb. Caller will free original skb. */ - if (skb2 != skb) - kfree_skb(skb2); abort: - return 0; + /* Free the original skb */ + kfree_skb(skb); + return 1; } /***************************************************************************** @@ -1326,12 +1313,14 @@ static struct sock *pppol2tp_prepare_tunnel_socket(int fd, u16 tunnel_id, goto err; } + sk = sock->sk; + /* Quick sanity checks */ - err = -ESOCKTNOSUPPORT; - if (sock->type != SOCK_DGRAM) { + err = -EPROTONOSUPPORT; + if (sk->sk_protocol != IPPROTO_UDP) { PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR, - "tunl %hu: fd %d wrong type, got %d, expected %d\n", - tunnel_id, fd, sock->type, SOCK_DGRAM); + "tunl %hu: fd %d wrong protocol, got %d, expected %d\n", + tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); goto err; } err = -EAFNOSUPPORT; @@ -1343,7 +1332,6 @@ static struct sock *pppol2tp_prepare_tunnel_socket(int fd, u16 tunnel_id, } err = -ENOTCONN; - sk = sock->sk; /* Check if this socket has already been prepped */ tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data; diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 5d812de65d90..eaffe551d1d8 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -51,7 +51,7 @@ #include "sky2.h" #define DRV_NAME "sky2" -#define DRV_VERSION "1.17" +#define DRV_VERSION "1.18" #define PFX DRV_NAME " " /* @@ -118,12 +118,15 @@ static const struct pci_device_id sky2_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4351) }, /* 88E8036 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4352) }, /* 88E8038 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4353) }, /* 88E8039 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4354) }, /* 88E8040 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4356) }, /* 88EC033 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x435A) }, /* 88E8048 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4360) }, /* 88E8052 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4361) }, /* 88E8050 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4362) }, /* 88E8053 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4363) }, /* 88E8055 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4364) }, /* 88E8056 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4365) }, /* 88E8070 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4366) }, /* 88EC036 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4367) }, /* 88EC032 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4368) }, /* 88EC034 */ @@ -147,6 +150,7 @@ static const char *yukon2_name[] = { "Extreme", /* 0xb5 */ "EC", /* 0xb6 */ "FE", /* 0xb7 */ + "FE+", /* 0xb8 */ }; static void sky2_set_multicast(struct net_device *dev); @@ -217,8 +221,7 @@ static void sky2_power_on(struct sky2_hw *hw) else sky2_write8(hw, B2_Y2_CLK_GATE, 0); - if (hw->chip_id == CHIP_ID_YUKON_EC_U || - hw->chip_id == CHIP_ID_YUKON_EX) { + if (hw->flags & SKY2_HW_ADV_POWER_CTL) { u32 reg; sky2_pci_write32(hw, PCI_DEV_REG3, 0); @@ -311,10 +314,8 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) struct sky2_port *sky2 = netdev_priv(hw->dev[port]); u16 ctrl, ct1000, adv, pg, ledctrl, ledover, reg; - if (sky2->autoneg == AUTONEG_ENABLE - && !(hw->chip_id == CHIP_ID_YUKON_XL - || hw->chip_id == CHIP_ID_YUKON_EC_U - || hw->chip_id == CHIP_ID_YUKON_EX)) { + if (sky2->autoneg == AUTONEG_ENABLE && + !(hw->flags & SKY2_HW_NEWER_PHY)) { u16 ectrl = gm_phy_read(hw, port, PHY_MARV_EXT_CTRL); ectrl &= ~(PHY_M_EC_M_DSC_MSK | PHY_M_EC_S_DSC_MSK | @@ -334,7 +335,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL); if (sky2_is_copper(hw)) { - if (hw->chip_id == CHIP_ID_YUKON_FE) { + if (!(hw->flags & SKY2_HW_GIGABIT)) { /* enable automatic crossover */ ctrl |= PHY_M_PC_MDI_XMODE(PHY_M_PC_ENA_AUTO) >> 1; } else { @@ -346,9 +347,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) /* downshift on PHY 88E1112 and 88E1149 is changed */ if (sky2->autoneg == AUTONEG_ENABLE - && (hw->chip_id == CHIP_ID_YUKON_XL - || hw->chip_id == CHIP_ID_YUKON_EC_U - || hw->chip_id == CHIP_ID_YUKON_EX)) { + && (hw->flags & SKY2_HW_NEWER_PHY)) { /* set downshift counter to 3x and enable downshift */ ctrl &= ~PHY_M_PC_DSC_MSK; ctrl |= PHY_M_PC_DSC(2) | PHY_M_PC_DOWN_S_ENA; @@ -364,7 +363,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl); /* special setup for PHY 88E1112 Fiber */ - if (hw->chip_id == CHIP_ID_YUKON_XL && !sky2_is_copper(hw)) { + if (hw->chip_id == CHIP_ID_YUKON_XL && (hw->flags & SKY2_HW_FIBRE_PHY)) { pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); /* Fiber: select 1000BASE-X only mode MAC Specific Ctrl Reg. */ @@ -455,7 +454,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) gma_write16(hw, port, GM_GP_CTRL, reg); - if (hw->chip_id != CHIP_ID_YUKON_FE) + if (hw->flags & SKY2_HW_GIGABIT) gm_phy_write(hw, port, PHY_MARV_1000T_CTRL, ct1000); gm_phy_write(hw, port, PHY_MARV_AUNE_ADV, adv); @@ -479,6 +478,23 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) gm_phy_write(hw, port, PHY_MARV_FE_LED_PAR, ctrl); break; + case CHIP_ID_YUKON_FE_P: + /* Enable Link Partner Next Page */ + ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL); + ctrl |= PHY_M_PC_ENA_LIP_NP; + + /* disable Energy Detect and enable scrambler */ + ctrl &= ~(PHY_M_PC_ENA_ENE_DT | PHY_M_PC_DIS_SCRAMB); + gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl); + + /* set LED2 -> ACT, LED1 -> LINK, LED0 -> SPEED */ + ctrl = PHY_M_FELP_LED2_CTRL(LED_PAR_CTRL_ACT_BL) | + PHY_M_FELP_LED1_CTRL(LED_PAR_CTRL_LINK) | + PHY_M_FELP_LED0_CTRL(LED_PAR_CTRL_SPEED); + + gm_phy_write(hw, port, PHY_MARV_FE_LED_PAR, ctrl); + break; + case CHIP_ID_YUKON_XL: pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); @@ -548,7 +564,13 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) /* set page register to 0 */ gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 0); + } else if (hw->chip_id == CHIP_ID_YUKON_FE_P && + hw->chip_rev == CHIP_REV_YU_FE2_A0) { + /* apply workaround for integrated resistors calibration */ + gm_phy_write(hw, port, PHY_MARV_PAGE_ADDR, 17); + gm_phy_write(hw, port, PHY_MARV_PAGE_DATA, 0x3f60); } else if (hw->chip_id != CHIP_ID_YUKON_EX) { + /* no effect on Yukon-XL */ gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl); if (sky2->autoneg == AUTONEG_DISABLE || sky2->speed == SPEED_100) { @@ -669,25 +691,25 @@ static void sky2_wol_init(struct sky2_port *sky2) static void sky2_set_tx_stfwd(struct sky2_hw *hw, unsigned port) { - if (hw->chip_id == CHIP_ID_YUKON_EX && hw->chip_rev != CHIP_REV_YU_EX_A0) { + struct net_device *dev = hw->dev[port]; + + if (dev->mtu <= ETH_DATA_LEN) sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), - TX_STFW_ENA | - (hw->dev[port]->mtu > ETH_DATA_LEN) ? TX_JUMBO_ENA : TX_JUMBO_DIS); - } else { - if (hw->dev[port]->mtu > ETH_DATA_LEN) { - /* set Tx GMAC FIFO Almost Empty Threshold */ - sky2_write32(hw, SK_REG(port, TX_GMF_AE_THR), - (ECU_JUMBO_WM << 16) | ECU_AE_THR); + TX_JUMBO_DIS | TX_STFW_ENA); - sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), - TX_JUMBO_ENA | TX_STFW_DIS); + else if (hw->chip_id != CHIP_ID_YUKON_EC_U) + sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), + TX_STFW_ENA | TX_JUMBO_ENA); + else { + /* set Tx GMAC FIFO Almost Empty Threshold */ + sky2_write32(hw, SK_REG(port, TX_GMF_AE_THR), + (ECU_JUMBO_WM << 16) | ECU_AE_THR); - /* Can't do offload because of lack of store/forward */ - hw->dev[port]->features &= ~(NETIF_F_TSO | NETIF_F_SG - | NETIF_F_ALL_CSUM); - } else - sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), - TX_JUMBO_DIS | TX_STFW_ENA); + sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), + TX_JUMBO_ENA | TX_STFW_DIS); + + /* Can't do offload because of lack of store/forward */ + dev->features &= ~(NETIF_F_TSO | NETIF_F_SG | NETIF_F_ALL_CSUM); } } @@ -773,7 +795,8 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) /* Configure Rx MAC FIFO */ sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_CLR); rx_reg = GMF_OPER_ON | GMF_RX_F_FL_ON; - if (hw->chip_id == CHIP_ID_YUKON_EX) + if (hw->chip_id == CHIP_ID_YUKON_EX || + hw->chip_id == CHIP_ID_YUKON_FE_P) rx_reg |= GMF_RX_OVER_ON; sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T), rx_reg); @@ -782,13 +805,18 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) sky2_write16(hw, SK_REG(port, RX_GMF_FL_MSK), GMR_FS_ANY_ERR); /* Set threshold to 0xa (64 bytes) + 1 to workaround pause bug */ - sky2_write16(hw, SK_REG(port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF+1); + reg = RX_GMF_FL_THR_DEF + 1; + /* Another magic mystery workaround from sk98lin */ + if (hw->chip_id == CHIP_ID_YUKON_FE_P && + hw->chip_rev == CHIP_REV_YU_FE2_A0) + reg = 0x178; + sky2_write16(hw, SK_REG(port, RX_GMF_FL_THR), reg); /* Configure Tx MAC FIFO */ sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_CLR); sky2_write16(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_OPER_ON); - if (hw->chip_id == CHIP_ID_YUKON_EC_U || hw->chip_id == CHIP_ID_YUKON_EX) { + if (!(hw->flags & SKY2_HW_RAMBUFFER)) { sky2_write8(hw, SK_REG(port, RX_GMF_LP_THR), 768/8); sky2_write8(hw, SK_REG(port, RX_GMF_UP_THR), 1024/8); @@ -967,19 +995,15 @@ static void sky2_rx_unmap_skb(struct pci_dev *pdev, struct rx_ring_info *re) */ static void rx_set_checksum(struct sky2_port *sky2) { - struct sky2_rx_le *le; + struct sky2_rx_le *le = sky2_next_rx(sky2); - if (sky2->hw->chip_id != CHIP_ID_YUKON_EX) { - le = sky2_next_rx(sky2); - le->addr = cpu_to_le32((ETH_HLEN << 16) | ETH_HLEN); - le->ctrl = 0; - le->opcode = OP_TCPSTART | HW_OWNER; - - sky2_write32(sky2->hw, - Q_ADDR(rxqaddr[sky2->port], Q_CSR), - sky2->rx_csum ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); - } + le->addr = cpu_to_le32((ETH_HLEN << 16) | ETH_HLEN); + le->ctrl = 0; + le->opcode = OP_TCPSTART | HW_OWNER; + sky2_write32(sky2->hw, + Q_ADDR(rxqaddr[sky2->port], Q_CSR), + sky2->rx_csum ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); } /* @@ -1175,7 +1199,8 @@ static int sky2_rx_start(struct sky2_port *sky2) sky2_prefetch_init(hw, rxq, sky2->rx_le_map, RX_LE_SIZE - 1); - rx_set_checksum(sky2); + if (!(hw->flags & SKY2_HW_NEW_LE)) + rx_set_checksum(sky2); /* Space needed for frame data + headers rounded up */ size = roundup(sky2->netdev->mtu + ETH_HLEN + VLAN_HLEN, 8); @@ -1246,7 +1271,7 @@ static int sky2_up(struct net_device *dev) struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; unsigned port = sky2->port; - u32 ramsize, imask; + u32 imask; int cap, err = -ENOMEM; struct net_device *otherdev = hw->dev[sky2->port^1]; @@ -1301,13 +1326,13 @@ static int sky2_up(struct net_device *dev) sky2_mac_init(hw, port); - /* Register is number of 4K blocks on internal RAM buffer. */ - ramsize = sky2_read8(hw, B2_E_0) * 4; - printk(KERN_INFO PFX "%s: ram buffer %dK\n", dev->name, ramsize); - - if (ramsize > 0) { + if (hw->flags & SKY2_HW_RAMBUFFER) { + /* Register is number of 4K blocks on internal RAM buffer. */ + u32 ramsize = sky2_read8(hw, B2_E_0) * 4; u32 rxspace; + printk(KERN_DEBUG PFX "%s: ram buffer %dK\n", dev->name, ramsize); + if (ramsize < 16) rxspace = ramsize / 2; else @@ -1436,13 +1461,15 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) /* Check for TCP Segmentation Offload */ mss = skb_shinfo(skb)->gso_size; if (mss != 0) { - if (hw->chip_id != CHIP_ID_YUKON_EX) + + if (!(hw->flags & SKY2_HW_NEW_LE)) mss += ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); if (mss != sky2->tx_last_mss) { le = get_tx_le(sky2); le->addr = cpu_to_le32(mss); - if (hw->chip_id == CHIP_ID_YUKON_EX) + + if (hw->flags & SKY2_HW_NEW_LE) le->opcode = OP_MSS | HW_OWNER; else le->opcode = OP_LRGLEN | HW_OWNER; @@ -1468,8 +1495,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) /* Handle TCP checksum offload */ if (skb->ip_summed == CHECKSUM_PARTIAL) { /* On Yukon EX (some versions) encoding change. */ - if (hw->chip_id == CHIP_ID_YUKON_EX - && hw->chip_rev != CHIP_REV_YU_EX_B0) + if (hw->flags & SKY2_HW_AUTO_TX_SUM) ctrl |= CALSUM; /* auto checksum */ else { const unsigned offset = skb_transport_offset(skb); @@ -1622,9 +1648,6 @@ static int sky2_down(struct net_device *dev) if (netif_msg_ifdown(sky2)) printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); - if (netif_carrier_ok(dev) && --hw->active == 0) - del_timer(&hw->watchdog_timer); - /* Stop more packets from being queued */ netif_stop_queue(dev); @@ -1708,11 +1731,15 @@ static int sky2_down(struct net_device *dev) static u16 sky2_phy_speed(const struct sky2_hw *hw, u16 aux) { - if (!sky2_is_copper(hw)) + if (hw->flags & SKY2_HW_FIBRE_PHY) return SPEED_1000; - if (hw->chip_id == CHIP_ID_YUKON_FE) - return (aux & PHY_M_PS_SPEED_100) ? SPEED_100 : SPEED_10; + if (!(hw->flags & SKY2_HW_GIGABIT)) { + if (aux & PHY_M_PS_SPEED_100) + return SPEED_100; + else + return SPEED_10; + } switch (aux & PHY_M_PS_SPEED_MSK) { case PHY_M_PS_SPEED_1000: @@ -1745,17 +1772,13 @@ static void sky2_link_up(struct sky2_port *sky2) netif_carrier_on(sky2->netdev); - if (hw->active++ == 0) - mod_timer(&hw->watchdog_timer, jiffies + 1); - + mod_timer(&hw->watchdog_timer, jiffies + 1); /* Turn on link LED */ sky2_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_ON | LINKLED_BLINK_OFF | LINKLED_LINKSYNC_OFF); - if (hw->chip_id == CHIP_ID_YUKON_XL - || hw->chip_id == CHIP_ID_YUKON_EC_U - || hw->chip_id == CHIP_ID_YUKON_EX) { + if (hw->flags & SKY2_HW_NEWER_PHY) { u16 pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); u16 led = PHY_M_LEDC_LOS_CTRL(1); /* link active */ @@ -1800,11 +1823,6 @@ static void sky2_link_down(struct sky2_port *sky2) netif_carrier_off(sky2->netdev); - /* Stop watchdog if both ports are not active */ - if (--hw->active == 0) - del_timer(&hw->watchdog_timer); - - /* Turn on link LED */ sky2_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_OFF); @@ -1847,7 +1865,7 @@ static int sky2_autoneg_done(struct sky2_port *sky2, u16 aux) /* Since the pause result bits seem to in different positions on * different chips. look at registers. */ - if (!sky2_is_copper(hw)) { + if (hw->flags & SKY2_HW_FIBRE_PHY) { /* Shift for bits in fiber PHY */ advert &= ~(ADVERTISE_PAUSE_CAP|ADVERTISE_PAUSE_ASYM); lpa &= ~(LPA_PAUSE_CAP|LPA_PAUSE_ASYM); @@ -1958,7 +1976,9 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) return -EINVAL; - if (new_mtu > ETH_DATA_LEN && hw->chip_id == CHIP_ID_YUKON_FE) + if (new_mtu > ETH_DATA_LEN && + (hw->chip_id == CHIP_ID_YUKON_FE || + hw->chip_id == CHIP_ID_YUKON_FE_P)) return -EINVAL; if (!netif_running(dev)) { @@ -1975,7 +1995,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) synchronize_irq(hw->pdev->irq); - if (hw->chip_id == CHIP_ID_YUKON_EC_U || hw->chip_id == CHIP_ID_YUKON_EX) + if (!(hw->flags & SKY2_HW_RAMBUFFER)) sky2_set_tx_stfwd(hw, port); ctl = gma_read16(hw, port, GM_GP_CTRL); @@ -2103,6 +2123,13 @@ static struct sk_buff *sky2_receive(struct net_device *dev, struct sky2_port *sky2 = netdev_priv(dev); struct rx_ring_info *re = sky2->rx_ring + sky2->rx_next; struct sk_buff *skb = NULL; + u16 count = (status & GMR_FS_LEN) >> 16; + +#ifdef SKY2_VLAN_TAG_USED + /* Account for vlan tag */ + if (sky2->vlgrp && (status & GMR_FS_VLAN)) + count -= VLAN_HLEN; +#endif if (unlikely(netif_msg_rx_status(sky2))) printk(KERN_DEBUG PFX "%s: rx slot %u status 0x%x len %d\n", @@ -2117,7 +2144,8 @@ static struct sk_buff *sky2_receive(struct net_device *dev, if (!(status & GMR_FS_RX_OK)) goto resubmit; - if (status >> 16 != length) + /* if length reported by DMA does not match PHY, packet was truncated */ + if (length != count) goto len_mismatch; if (length < copybreak) @@ -2133,6 +2161,10 @@ len_mismatch: /* Truncation of overlength packets causes PHY length to not match MAC length */ ++sky2->net_stats.rx_length_errors; + if (netif_msg_rx_err(sky2) && net_ratelimit()) + pr_info(PFX "%s: rx length mismatch: length %d status %#x\n", + dev->name, length, status); + goto resubmit; error: ++sky2->net_stats.rx_errors; @@ -2202,7 +2234,7 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do) } /* This chip reports checksum status differently */ - if (hw->chip_id == CHIP_ID_YUKON_EX) { + if (hw->flags & SKY2_HW_NEW_LE) { if (sky2->rx_csum && (le->css & (CSS_ISIPV4 | CSS_ISIPV6)) && (le->css & CSS_TCPUDPCSOK)) @@ -2243,8 +2275,14 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do) if (!sky2->rx_csum) break; - if (hw->chip_id == CHIP_ID_YUKON_EX) + /* If this happens then driver assuming wrong format */ + if (unlikely(hw->flags & SKY2_HW_NEW_LE)) { + if (net_ratelimit()) + printk(KERN_NOTICE "%s: unexpected" + " checksum status\n", + dev->name); break; + } /* Both checksum counters are programmed to start at * the same offset, so unless there is a problem they @@ -2436,20 +2474,72 @@ static void sky2_le_error(struct sky2_hw *hw, unsigned port, sky2_write32(hw, Q_ADDR(q, Q_CSR), BMU_CLR_IRQ_CHK); } -/* Check for lost IRQ once a second */ +static int sky2_rx_hung(struct net_device *dev) +{ + struct sky2_port *sky2 = netdev_priv(dev); + struct sky2_hw *hw = sky2->hw; + unsigned port = sky2->port; + unsigned rxq = rxqaddr[port]; + u32 mac_rp = sky2_read32(hw, SK_REG(port, RX_GMF_RP)); + u8 mac_lev = sky2_read8(hw, SK_REG(port, RX_GMF_RLEV)); + u8 fifo_rp = sky2_read8(hw, Q_ADDR(rxq, Q_RP)); + u8 fifo_lev = sky2_read8(hw, Q_ADDR(rxq, Q_RL)); + + /* If idle and MAC or PCI is stuck */ + if (sky2->check.last == dev->last_rx && + ((mac_rp == sky2->check.mac_rp && + mac_lev != 0 && mac_lev >= sky2->check.mac_lev) || + /* Check if the PCI RX hang */ + (fifo_rp == sky2->check.fifo_rp && + fifo_lev != 0 && fifo_lev >= sky2->check.fifo_lev))) { + printk(KERN_DEBUG PFX "%s: hung mac %d:%d fifo %d (%d:%d)\n", + dev->name, mac_lev, mac_rp, fifo_lev, fifo_rp, + sky2_read8(hw, Q_ADDR(rxq, Q_WP))); + return 1; + } else { + sky2->check.last = dev->last_rx; + sky2->check.mac_rp = mac_rp; + sky2->check.mac_lev = mac_lev; + sky2->check.fifo_rp = fifo_rp; + sky2->check.fifo_lev = fifo_lev; + return 0; + } +} + static void sky2_watchdog(unsigned long arg) { struct sky2_hw *hw = (struct sky2_hw *) arg; + struct net_device *dev; + /* Check for lost IRQ once a second */ if (sky2_read32(hw, B0_ISRC)) { - struct net_device *dev = hw->dev[0]; - + dev = hw->dev[0]; if (__netif_rx_schedule_prep(dev)) __netif_rx_schedule(dev); + } else { + int i, active = 0; + + for (i = 0; i < hw->ports; i++) { + dev = hw->dev[i]; + if (!netif_running(dev)) + continue; + ++active; + + /* For chips with Rx FIFO, check if stuck */ + if ((hw->flags & SKY2_HW_RAMBUFFER) && + sky2_rx_hung(dev)) { + pr_info(PFX "%s: receiver hang detected\n", + dev->name); + schedule_work(&hw->restart_work); + return; + } + } + + if (active == 0) + return; } - if (hw->active > 0) - mod_timer(&hw->watchdog_timer, round_jiffies(jiffies + HZ)); + mod_timer(&hw->watchdog_timer, round_jiffies(jiffies + HZ)); } /* Hardware/software error handling */ @@ -2546,17 +2636,25 @@ static void sky2_netpoll(struct net_device *dev) #endif /* Chip internal frequency for clock calculations */ -static inline u32 sky2_mhz(const struct sky2_hw *hw) +static u32 sky2_mhz(const struct sky2_hw *hw) { switch (hw->chip_id) { case CHIP_ID_YUKON_EC: case CHIP_ID_YUKON_EC_U: case CHIP_ID_YUKON_EX: - return 125; /* 125 Mhz */ + return 125; + case CHIP_ID_YUKON_FE: - return 100; /* 100 Mhz */ - default: /* YUKON_XL */ - return 156; /* 156 Mhz */ + return 100; + + case CHIP_ID_YUKON_FE_P: + return 50; + + case CHIP_ID_YUKON_XL: + return 156; + + default: + BUG(); } } @@ -2581,23 +2679,62 @@ static int __devinit sky2_init(struct sky2_hw *hw) sky2_write8(hw, B0_CTST, CS_RST_CLR); hw->chip_id = sky2_read8(hw, B2_CHIP_ID); - if (hw->chip_id < CHIP_ID_YUKON_XL || hw->chip_id > CHIP_ID_YUKON_FE) { + hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4; + + switch(hw->chip_id) { + case CHIP_ID_YUKON_XL: + hw->flags = SKY2_HW_GIGABIT + | SKY2_HW_NEWER_PHY + | SKY2_HW_RAMBUFFER; + break; + + case CHIP_ID_YUKON_EC_U: + hw->flags = SKY2_HW_GIGABIT + | SKY2_HW_NEWER_PHY + | SKY2_HW_ADV_POWER_CTL; + break; + + case CHIP_ID_YUKON_EX: + hw->flags = SKY2_HW_GIGABIT + | SKY2_HW_NEWER_PHY + | SKY2_HW_NEW_LE + | SKY2_HW_ADV_POWER_CTL; + + /* New transmit checksum */ + if (hw->chip_rev != CHIP_REV_YU_EX_B0) + hw->flags |= SKY2_HW_AUTO_TX_SUM; + break; + + case CHIP_ID_YUKON_EC: + /* This rev is really old, and requires untested workarounds */ + if (hw->chip_rev == CHIP_REV_YU_EC_A1) { + dev_err(&hw->pdev->dev, "unsupported revision Yukon-EC rev A1\n"); + return -EOPNOTSUPP; + } + hw->flags = SKY2_HW_GIGABIT | SKY2_HW_RAMBUFFER; + break; + + case CHIP_ID_YUKON_FE: + hw->flags = SKY2_HW_RAMBUFFER; + break; + + case CHIP_ID_YUKON_FE_P: + hw->flags = SKY2_HW_NEWER_PHY + | SKY2_HW_NEW_LE + | SKY2_HW_AUTO_TX_SUM + | SKY2_HW_ADV_POWER_CTL; + break; + default: dev_err(&hw->pdev->dev, "unsupported chip type 0x%x\n", hw->chip_id); return -EOPNOTSUPP; } - hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4; + hw->pmd_type = sky2_read8(hw, B2_PMD_TYP); + if (hw->pmd_type == 'L' || hw->pmd_type == 'S' || hw->pmd_type == 'P') + hw->flags |= SKY2_HW_FIBRE_PHY; - /* This rev is really old, and requires untested workarounds */ - if (hw->chip_id == CHIP_ID_YUKON_EC && hw->chip_rev == CHIP_REV_YU_EC_A1) { - dev_err(&hw->pdev->dev, "unsupported revision Yukon-%s (0x%x) rev %d\n", - yukon2_name[hw->chip_id - CHIP_ID_YUKON_XL], - hw->chip_id, hw->chip_rev); - return -EOPNOTSUPP; - } - hw->pmd_type = sky2_read8(hw, B2_PMD_TYP); hw->ports = 1; t8 = sky2_read8(hw, B2_Y2_HW_RES); if ((t8 & CFG_DUAL_MAC_MSK) == CFG_DUAL_MAC_MSK) { @@ -2791,7 +2928,9 @@ static int sky2_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) sky2->wol = wol->wolopts; - if (hw->chip_id == CHIP_ID_YUKON_EC_U || hw->chip_id == CHIP_ID_YUKON_EX) + if (hw->chip_id == CHIP_ID_YUKON_EC_U || + hw->chip_id == CHIP_ID_YUKON_EX || + hw->chip_id == CHIP_ID_YUKON_FE_P) sky2_write32(hw, B0_CTST, sky2->wol ? Y2_HW_WOL_ON : Y2_HW_WOL_OFF); @@ -2809,7 +2948,7 @@ static u32 sky2_supported_modes(const struct sky2_hw *hw) | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP; - if (hw->chip_id != CHIP_ID_YUKON_FE) + if (hw->flags & SKY2_HW_GIGABIT) modes |= SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full; return modes; @@ -2829,13 +2968,6 @@ static int sky2_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) ecmd->supported = sky2_supported_modes(hw); ecmd->phy_address = PHY_ADDR_MARV; if (sky2_is_copper(hw)) { - ecmd->supported = SUPPORTED_10baseT_Half - | SUPPORTED_10baseT_Full - | SUPPORTED_100baseT_Half - | SUPPORTED_100baseT_Full - | SUPPORTED_1000baseT_Half - | SUPPORTED_1000baseT_Full - | SUPPORTED_Autoneg | SUPPORTED_TP; ecmd->port = PORT_TP; ecmd->speed = sky2->speed; } else { @@ -3791,6 +3923,13 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, sky2->hw = hw; sky2->msg_enable = netif_msg_init(debug, default_msg); + /* This chip has hardware problems that generates + * bogus PHY receive status so by default shut up the message. + */ + if (hw->chip_id == CHIP_ID_YUKON_FE_P && + hw->chip_rev == CHIP_REV_YU_FE2_A0) + sky2->msg_enable &= ~NETIF_MSG_RX_ERR; + /* Auto speed and flow control */ sky2->autoneg = AUTONEG_ENABLE; sky2->flow_mode = FC_BOTH; @@ -3846,7 +3985,7 @@ static irqreturn_t __devinit sky2_test_intr(int irq, void *dev_id) return IRQ_NONE; if (status & Y2_IS_IRQ_SW) { - hw->msi = 1; + hw->flags |= SKY2_HW_USE_MSI; wake_up(&hw->msi_wait); sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ); } @@ -3874,9 +4013,9 @@ static int __devinit sky2_test_msi(struct sky2_hw *hw) sky2_write8(hw, B0_CTST, CS_ST_SW_IRQ); sky2_read8(hw, B0_CTST); - wait_event_timeout(hw->msi_wait, hw->msi, HZ/10); + wait_event_timeout(hw->msi_wait, (hw->flags & SKY2_HW_USE_MSI), HZ/10); - if (!hw->msi) { + if (!(hw->flags & SKY2_HW_USE_MSI)) { /* MSI test failed, go back to INTx mode */ dev_info(&pdev->dev, "No interrupt generated using MSI, " "switching to INTx mode.\n"); @@ -4009,7 +4148,8 @@ static int __devinit sky2_probe(struct pci_dev *pdev, goto err_out_free_netdev; } - err = request_irq(pdev->irq, sky2_intr, hw->msi ? 0 : IRQF_SHARED, + err = request_irq(pdev->irq, sky2_intr, + (hw->flags & SKY2_HW_USE_MSI) ? 0 : IRQF_SHARED, dev->name, hw); if (err) { dev_err(&pdev->dev, "cannot assign irq %d\n", pdev->irq); @@ -4042,7 +4182,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev, return 0; err_out_unregister: - if (hw->msi) + if (hw->flags & SKY2_HW_USE_MSI) pci_disable_msi(pdev); unregister_netdev(dev); err_out_free_netdev: @@ -4091,7 +4231,7 @@ static void __devexit sky2_remove(struct pci_dev *pdev) sky2_read8(hw, B0_CTST); free_irq(pdev->irq, hw); - if (hw->msi) + if (hw->flags & SKY2_HW_USE_MSI) pci_disable_msi(pdev); pci_free_consistent(pdev, STATUS_LE_BYTES, hw->st_le, hw->st_dma); pci_release_regions(pdev); @@ -4159,7 +4299,9 @@ static int sky2_resume(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D0, 0); /* Re-enable all clocks */ - if (hw->chip_id == CHIP_ID_YUKON_EX || hw->chip_id == CHIP_ID_YUKON_EC_U) + if (hw->chip_id == CHIP_ID_YUKON_EX || + hw->chip_id == CHIP_ID_YUKON_EC_U || + hw->chip_id == CHIP_ID_YUKON_FE_P) sky2_pci_write32(hw, PCI_DEV_REG3, 0); sky2_reset(hw); diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index 72e12b7cfa40..69cd98400fe6 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h @@ -470,18 +470,24 @@ enum { CHIP_ID_YUKON_EX = 0xb5, /* Chip ID for YUKON-2 Extreme */ CHIP_ID_YUKON_EC = 0xb6, /* Chip ID for YUKON-2 EC */ CHIP_ID_YUKON_FE = 0xb7, /* Chip ID for YUKON-2 FE */ - + CHIP_ID_YUKON_FE_P = 0xb8, /* Chip ID for YUKON-2 FE+ */ +}; +enum yukon_ec_rev { CHIP_REV_YU_EC_A1 = 0, /* Chip Rev. for Yukon-EC A1/A0 */ CHIP_REV_YU_EC_A2 = 1, /* Chip Rev. for Yukon-EC A2 */ CHIP_REV_YU_EC_A3 = 2, /* Chip Rev. for Yukon-EC A3 */ - +}; +enum yukon_ec_u_rev { CHIP_REV_YU_EC_U_A0 = 1, CHIP_REV_YU_EC_U_A1 = 2, CHIP_REV_YU_EC_U_B0 = 3, - +}; +enum yukon_fe_rev { CHIP_REV_YU_FE_A1 = 1, CHIP_REV_YU_FE_A2 = 2, - +}; +enum yukon_fe_p_rev { + CHIP_REV_YU_FE2_A0 = 0, }; enum yukon_ex_rev { CHIP_REV_YU_EX_A0 = 1, @@ -1668,7 +1674,7 @@ enum { /* Receive Frame Status Encoding */ enum { - GMR_FS_LEN = 0xffff<<16, /* Bit 31..16: Rx Frame Length */ + GMR_FS_LEN = 0x7fff<<16, /* Bit 30..16: Rx Frame Length */ GMR_FS_VLAN = 1<<13, /* VLAN Packet */ GMR_FS_JABBER = 1<<12, /* Jabber Packet */ GMR_FS_UN_SIZE = 1<<11, /* Undersize Packet */ @@ -1729,6 +1735,10 @@ enum { GMF_RX_CTRL_DEF = GMF_OPER_ON | GMF_RX_F_FL_ON, }; +/* TX_GMF_EA 32 bit Tx GMAC FIFO End Address */ +enum { + TX_DYN_WM_ENA = 3, /* Yukon-FE+ specific */ +}; /* TX_GMF_CTRL_T 32 bit Tx GMAC FIFO Control/Test */ enum { @@ -2017,6 +2027,14 @@ struct sky2_port { u16 rx_tag; struct vlan_group *vlgrp; #endif + struct { + unsigned long last; + u32 mac_rp; + u8 mac_lev; + u8 fifo_rp; + u8 fifo_lev; + } check; + dma_addr_t rx_le_map; dma_addr_t tx_le_map; @@ -2040,12 +2058,20 @@ struct sky2_hw { void __iomem *regs; struct pci_dev *pdev; struct net_device *dev[2]; + unsigned long flags; +#define SKY2_HW_USE_MSI 0x00000001 +#define SKY2_HW_FIBRE_PHY 0x00000002 +#define SKY2_HW_GIGABIT 0x00000004 +#define SKY2_HW_NEWER_PHY 0x00000008 +#define SKY2_HW_RAMBUFFER 0x00000010 /* chip has RAM FIFO */ +#define SKY2_HW_NEW_LE 0x00000020 /* new LSOv2 format */ +#define SKY2_HW_AUTO_TX_SUM 0x00000040 /* new IP decode for Tx */ +#define SKY2_HW_ADV_POWER_CTL 0x00000080 /* additional PHY power regs */ u8 chip_id; u8 chip_rev; u8 pmd_type; u8 ports; - u8 active; struct sky2_status_le *st_le; u32 st_idx; @@ -2053,13 +2079,12 @@ struct sky2_hw { struct timer_list watchdog_timer; struct work_struct restart_work; - int msi; wait_queue_head_t msi_wait; }; static inline int sky2_is_copper(const struct sky2_hw *hw) { - return !(hw->pmd_type == 'L' || hw->pmd_type == 'S' || hw->pmd_type == 'P'); + return !(hw->flags & SKY2_HW_FIBRE_PHY); } /* Register accessor for memory mapped device */ diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 46da5714932c..5ab3492817d1 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -61,7 +61,7 @@ struct rtc_plat_data { struct rtc_device *rtc; void __iomem *ioaddr; - unsigned long baseaddr; + resource_size_t baseaddr; unsigned long last_jiffies; int irq; unsigned int irqen; diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index b2e5481ba3b6..67291b0f8283 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -55,7 +55,7 @@ struct rtc_plat_data { void __iomem *ioaddr_rtc; size_t size_nvram; size_t size; - unsigned long baseaddr; + resource_size_t baseaddr; unsigned long last_jiffies; }; diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index bca57bb94939..e348ba684050 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -58,6 +58,7 @@ struct uart_sunsab_port { unsigned char interrupt_mask1;/* ISR1 masking */ unsigned char pvr_dtr_bit; /* Which PVR bit is DTR */ unsigned char pvr_dsr_bit; /* Which PVR bit is DSR */ + unsigned int gis_shift; int type; /* SAB82532 version */ /* Setting configuration bits while the transmitter is active @@ -305,13 +306,15 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id) struct tty_struct *tty; union sab82532_irq_status status; unsigned long flags; + unsigned char gis; spin_lock_irqsave(&up->port.lock, flags); status.stat = 0; - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA0) + gis = readb(&up->regs->r.gis) >> up->gis_shift; + if (gis & 1) status.sreg.isr0 = readb(&up->regs->r.isr0); - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA1) + if (gis & 2) status.sreg.isr1 = readb(&up->regs->r.isr1); tty = NULL; @@ -327,35 +330,6 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id) transmit_chars(up, &status); } - spin_unlock(&up->port.lock); - - if (tty) - tty_flip_buffer_push(tty); - - up++; - - spin_lock(&up->port.lock); - - status.stat = 0; - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB0) - status.sreg.isr0 = readb(&up->regs->r.isr0); - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB1) - status.sreg.isr1 = readb(&up->regs->r.isr1); - - tty = NULL; - if (status.stat) { - if ((status.sreg.isr0 & (SAB82532_ISR0_TCD | SAB82532_ISR0_TIME | - SAB82532_ISR0_RFO | SAB82532_ISR0_RPF)) || - (status.sreg.isr1 & SAB82532_ISR1_BRK)) - - tty = receive_chars(up, &status); - if ((status.sreg.isr0 & SAB82532_ISR0_CDSC) || - (status.sreg.isr1 & (SAB82532_ISR1_BRK | SAB82532_ISR1_CSC))) - check_status(up, &status); - if (status.sreg.isr1 & (SAB82532_ISR1_ALLS | SAB82532_ISR1_XPR)) - transmit_chars(up, &status); - } - spin_unlock_irqrestore(&up->port.lock, flags); if (tty) @@ -539,6 +513,10 @@ static int sunsab_startup(struct uart_port *port) struct uart_sunsab_port *up = (struct uart_sunsab_port *) port; unsigned long flags; unsigned char tmp; + int err = request_irq(up->port.irq, sunsab_interrupt, + IRQF_SHARED, "sab", up); + if (err) + return err; spin_lock_irqsave(&up->port.lock, flags); @@ -641,6 +619,7 @@ static void sunsab_shutdown(struct uart_port *port) #endif spin_unlock_irqrestore(&up->port.lock, flags); + free_irq(up->port.irq, up); } /* @@ -1008,9 +987,11 @@ static int __devinit sunsab_init_one(struct uart_sunsab_port *up, if ((up->port.line & 0x1) == 0) { up->pvr_dsr_bit = (1 << 0); up->pvr_dtr_bit = (1 << 1); + up->gis_shift = 2; } else { up->pvr_dsr_bit = (1 << 3); up->pvr_dtr_bit = (1 << 2); + up->gis_shift = 0; } up->cached_pvr = (1 << 1) | (1 << 2) | (1 << 4); writeb(up->cached_pvr, &up->regs->w.pvr); @@ -1023,19 +1004,6 @@ static int __devinit sunsab_init_one(struct uart_sunsab_port *up, up->tec_timeout = SAB82532_MAX_TEC_TIMEOUT; up->cec_timeout = SAB82532_MAX_CEC_TIMEOUT; - if (!(up->port.line & 0x01)) { - int err; - - err = request_irq(up->port.irq, sunsab_interrupt, - IRQF_SHARED, "sab", up); - if (err) { - of_iounmap(&op->resource[0], - up->port.membase, - sizeof(union sab82532_async_regs)); - return err; - } - } - return 0; } @@ -1051,52 +1019,60 @@ static int __devinit sab_probe(struct of_device *op, const struct of_device_id * 0, (inst * 2) + 0); if (err) - return err; + goto out; err = sunsab_init_one(&up[1], op, sizeof(union sab82532_async_regs), (inst * 2) + 1); - if (err) { - of_iounmap(&op->resource[0], - up[0].port.membase, - sizeof(union sab82532_async_regs)); - free_irq(up[0].port.irq, &up[0]); - return err; - } + if (err) + goto out1; sunserial_console_match(SUNSAB_CONSOLE(), op->node, &sunsab_reg, up[0].port.line); - uart_add_one_port(&sunsab_reg, &up[0].port); sunserial_console_match(SUNSAB_CONSOLE(), op->node, &sunsab_reg, up[1].port.line); - uart_add_one_port(&sunsab_reg, &up[1].port); + + err = uart_add_one_port(&sunsab_reg, &up[0].port); + if (err) + goto out2; + + err = uart_add_one_port(&sunsab_reg, &up[1].port); + if (err) + goto out3; dev_set_drvdata(&op->dev, &up[0]); inst++; return 0; -} - -static void __devexit sab_remove_one(struct uart_sunsab_port *up) -{ - struct of_device *op = to_of_device(up->port.dev); - uart_remove_one_port(&sunsab_reg, &up->port); - if (!(up->port.line & 1)) - free_irq(up->port.irq, up); +out3: + uart_remove_one_port(&sunsab_reg, &up[0].port); +out2: of_iounmap(&op->resource[0], - up->port.membase, + up[1].port.membase, sizeof(union sab82532_async_regs)); +out1: + of_iounmap(&op->resource[0], + up[0].port.membase, + sizeof(union sab82532_async_regs)); +out: + return err; } static int __devexit sab_remove(struct of_device *op) { struct uart_sunsab_port *up = dev_get_drvdata(&op->dev); - sab_remove_one(&up[0]); - sab_remove_one(&up[1]); + uart_remove_one_port(&sunsab_reg, &up[1].port); + uart_remove_one_port(&sunsab_reg, &up[0].port); + of_iounmap(&op->resource[0], + up[1].port.membase, + sizeof(union sab82532_async_regs)); + of_iounmap(&op->resource[0], + up[0].port.membase, + sizeof(union sab82532_async_regs)); dev_set_drvdata(&op->dev, NULL); @@ -1143,6 +1119,7 @@ static int __init sunsab_init(void) sunsab_reg.minor = sunserial_current_minor; sunsab_reg.nr = num_channels; + sunsab_reg.cons = SUNSAB_CONSOLE(); err = uart_register_driver(&sunsab_reg); if (err) { diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c index b21d0dec9283..6a47682d8614 100644 --- a/drivers/video/intelfb/intelfbhw.c +++ b/drivers/video/intelfb/intelfbhw.c @@ -1352,7 +1352,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo, /* turn off PLL */ tmp = INREG(dpll_reg); - dpll_reg &= ~DPLL_VCO_ENABLE; + tmp &= ~DPLL_VCO_ENABLE; OUTREG(dpll_reg, tmp); /* Set PLL parameters */ diff --git a/fs/exec.c b/fs/exec.c index c21a8cc06277..073b0b8c6d05 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -50,7 +50,6 @@ #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> -#include <linux/signalfd.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk) * and we can just re-use it all. */ if (atomic_read(&oldsighand->count) <= 1) { - signalfd_detach(tsk); exit_itimers(sig); return 0; } @@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk) sig->flags = 0; no_thread_group: - signalfd_detach(tsk); exit_itimers(sig); if (leader) release_task(leader); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 1586807b8177..c1fa1908dba0 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -140,7 +140,8 @@ struct dx_frame struct dx_map_entry { u32 hash; - u32 offs; + u16 offs; + u16 size; }; #ifdef CONFIG_EXT3_INDEX @@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir, entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); + + if (dx_get_limit(entries) != dx_root_limit(dir, + root->info.info_length)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != root limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); + if (!count || count > dx_get_limit(entries)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: no count or count > limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } + p = entries + 1; q = entries + count - 1; while (p <= q) @@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); + if (dx_get_limit(entries) != dx_node_limit (dir)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != node limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } frame++; + frame->bh = NULL; } fail2: while (frame >= frame_in) { @@ -432,6 +455,10 @@ fail2: frame--; } fail: + if (*err == ERR_BAD_DX_DIR) + ext3_warning(dir->i_sb, __FUNCTION__, + "Corrupt dir inode %ld, running e2fsck is " + "recommended.", dir->i_ino); return NULL; } @@ -671,6 +698,10 @@ errout: * Directory block splitting, compacting */ +/* + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { @@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, ext3fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); + map_tail->offs = (u16) ((char *) de - base); + map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); } @@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, return count; } +/* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; @@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb, } #ifdef CONFIG_EXT3_INDEX +/* + * Move count entries from end of map between two memory locations. + * Returns pointer to last entry moved. + */ static struct ext3_dir_entry_2 * dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) { @@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) return (struct ext3_dir_entry_2 *) (to - rec_len); } +/* + * Compact each dir entry in the range to the minimal rec_len. + * Returns pointer to last entry in range. + */ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) { struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; @@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) return prev; } +/* + * Split a full leaf block to make room for a new dir entry. + * Allocate a new block, and move entries so that they are approx. equally full. + * Returns pointer to de in block into which the new entry will be inserted. + */ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error) @@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split; + unsigned split, move, size, i; struct ext3_dir_entry_2 *de = NULL, *de2; int err = 0; @@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, count = dx_make_map ((struct ext3_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - split = count/2; // need to adjust to actual middle dx_sort_map (map, count); + /* Split the existing block in the middle, size-wise */ + size = 0; + move = 0; + for (i = count-1; i >= 0; i--) { + /* is more than half of this entry in 2nd half of the block? */ + if (size + map[i].size/2 > blocksize/2) + break; + size += map[i].size; + move++; + } + /* map index at which we will split */ + split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index da224974af78..5fdb862e71c4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -140,7 +140,8 @@ struct dx_frame struct dx_map_entry { u32 hash; - u32 offs; + u16 offs; + u16 size; }; #ifdef CONFIG_EXT4_INDEX @@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir, entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); + + if (dx_get_limit(entries) != dx_root_limit(dir, + root->info.info_length)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != root limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); + if (!count || count > dx_get_limit(entries)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: no count or count > limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } + p = entries + 1; q = entries + count - 1; while (p <= q) @@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); + if (dx_get_limit(entries) != dx_node_limit (dir)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != node limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } frame++; + frame->bh = NULL; } fail2: while (frame >= frame_in) { @@ -432,6 +455,10 @@ fail2: frame--; } fail: + if (*err == ERR_BAD_DX_DIR) + ext4_warning(dir->i_sb, __FUNCTION__, + "Corrupt dir inode %ld, running e2fsck is " + "recommended.", dir->i_ino); return NULL; } @@ -671,6 +698,10 @@ errout: * Directory block splitting, compacting */ +/* + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { @@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, ext4fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); + map_tail->offs = (u16) ((char *) de - base); + map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); } @@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, return count; } +/* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; @@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb, } #ifdef CONFIG_EXT4_INDEX +/* + * Move count entries from end of map between two memory locations. + * Returns pointer to last entry moved. + */ static struct ext4_dir_entry_2 * dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) { @@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) return (struct ext4_dir_entry_2 *) (to - rec_len); } +/* + * Compact each dir entry in the range to the minimal rec_len. + * Returns pointer to last entry in range. + */ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) { struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; @@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) return prev; } +/* + * Split a full leaf block to make room for a new dir entry. + * Allocate a new block, and move entries so that they are approx. equally full. + * Returns pointer to de in block into which the new entry will be inserted. + */ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error) @@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split; + unsigned split, move, size, i; struct ext4_dir_entry_2 *de = NULL, *de2; int err = 0; @@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, count = dx_make_map ((struct ext4_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - split = count/2; // need to adjust to actual middle dx_sort_map (map, count); + /* Split the existing block in the middle, size-wise */ + size = 0; + move = 0; + for (i = count-1; i >= 0; i--) { + /* is more than half of this entry in 2nd half of the block? */ + if (size + map[i].size/2 > blocksize/2) + break; + size += map[i].size; + move++; + } + /* map index at which we will split */ + split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8ed593766f16..b878528b64c1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void) unregister_shrinker(&acl_shrinker); #ifdef CONFIG_NFS_V4 unregister_filesystem(&nfs4_fs_type); - nfs_unregister_sysctl(); #endif + nfs_unregister_sysctl(); unregister_filesystem(&nfs_fs_type); } diff --git a/fs/signalfd.c b/fs/signalfd.c index a8e293d30034..aefb0be07942 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -11,8 +11,10 @@ * Now using anonymous inode source. * Thanks to Oleg Nesterov for useful code review and suggestions. * More comments and suggestions from Arnd Bergmann. - * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> + * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> * Retrieve multiple signals with one read() call + * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org> + * Attach to the sighand only during read() and poll(). */ #include <linux/file.h> @@ -27,102 +29,12 @@ #include <linux/signalfd.h> struct signalfd_ctx { - struct list_head lnk; - wait_queue_head_t wqh; sigset_t sigmask; - struct task_struct *tsk; }; -struct signalfd_lockctx { - struct task_struct *tsk; - unsigned long flags; -}; - -/* - * Tries to acquire the sighand lock. We do not increment the sighand - * use count, and we do not even pin the task struct, so we need to - * do it inside an RCU read lock, and we must be prepared for the - * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand - * being detached. We return 0 if the sighand has been detached, or - * 1 if we were able to pin the sighand lock. - */ -static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) -{ - struct sighand_struct *sighand = NULL; - - rcu_read_lock(); - lk->tsk = rcu_dereference(ctx->tsk); - if (likely(lk->tsk != NULL)) - sighand = lock_task_sighand(lk->tsk, &lk->flags); - rcu_read_unlock(); - - if (!sighand) - return 0; - - if (!ctx->tsk) { - unlock_task_sighand(lk->tsk, &lk->flags); - return 0; - } - - if (lk->tsk->tgid == current->tgid) - lk->tsk = current; - - return 1; -} - -static void signalfd_unlock(struct signalfd_lockctx *lk) -{ - unlock_task_sighand(lk->tsk, &lk->flags); -} - -/* - * This must be called with the sighand lock held. - */ -void signalfd_deliver(struct task_struct *tsk, int sig) -{ - struct sighand_struct *sighand = tsk->sighand; - struct signalfd_ctx *ctx, *tmp; - - BUG_ON(!sig); - list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { - /* - * We use a negative signal value as a way to broadcast that the - * sighand has been orphaned, so that we can notify all the - * listeners about this. Remember the ctx->sigmask is inverted, - * so if the user is interested in a signal, that corresponding - * bit will be zero. - */ - if (sig < 0) { - if (ctx->tsk == tsk) { - ctx->tsk = NULL; - list_del_init(&ctx->lnk); - wake_up(&ctx->wqh); - } - } else { - if (!sigismember(&ctx->sigmask, sig)) - wake_up(&ctx->wqh); - } - } -} - -static void signalfd_cleanup(struct signalfd_ctx *ctx) -{ - struct signalfd_lockctx lk; - - /* - * This is tricky. If the sighand is gone, we do not need to remove - * context from the list, the list itself won't be there anymore. - */ - if (signalfd_lock(ctx, &lk)) { - list_del(&ctx->lnk); - signalfd_unlock(&lk); - } - kfree(ctx); -} - static int signalfd_release(struct inode *inode, struct file *file) { - signalfd_cleanup(file->private_data); + kfree(file->private_data); return 0; } @@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; unsigned int events = 0; - struct signalfd_lockctx lk; - poll_wait(file, &ctx->wqh, wait); + poll_wait(file, ¤t->sighand->signalfd_wqh, wait); - /* - * Let the caller get a POLLIN in this case, ala socket recv() when - * the peer disconnects. - */ - if (signalfd_lock(ctx, &lk)) { - if ((lk.tsk == current && - next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || - next_signal(&lk.tsk->signal->shared_pending, - &ctx->sigmask) > 0) - events |= POLLIN; - signalfd_unlock(&lk); - } else + spin_lock_irq(¤t->sighand->siglock); + if (next_signal(¤t->pending, &ctx->sigmask) || + next_signal(¤t->signal->shared_pending, + &ctx->sigmask)) events |= POLLIN; + spin_unlock_irq(¤t->sighand->siglock); return events; } @@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, int nonblock) { ssize_t ret; - struct signalfd_lockctx lk; DECLARE_WAITQUEUE(wait, current); - if (!signalfd_lock(ctx, &lk)) - return 0; - - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); + spin_lock_irq(¤t->sighand->siglock); + ret = dequeue_signal(current, &ctx->sigmask, info); switch (ret) { case 0: if (!nonblock) break; ret = -EAGAIN; default: - signalfd_unlock(&lk); + spin_unlock_irq(¤t->sighand->siglock); return ret; } - add_wait_queue(&ctx->wqh, &wait); + add_wait_queue(¤t->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); - signalfd_unlock(&lk); + ret = dequeue_signal(current, &ctx->sigmask, info); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } + spin_unlock_irq(¤t->sighand->siglock); schedule(); - ret = signalfd_lock(ctx, &lk); - if (unlikely(!ret)) { - /* - * Let the caller read zero byte, ala socket - * recv() when the peer disconnect. This test - * must be done before doing a dequeue_signal(), - * because if the sighand has been orphaned, - * the dequeue_signal() call is going to crash - * because ->sighand will be long gone. - */ - break; - } + spin_lock_irq(¤t->sighand->siglock); } + spin_unlock_irq(¤t->sighand->siglock); - remove_wait_queue(&ctx->wqh, &wait); + remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); return ret; } /* - * Returns either the size of a "struct signalfd_siginfo", or zero if the - * sighand we are attached to, has been orphaned. The "count" parameter - * must be at least the size of a "struct signalfd_siginfo". + * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative + * error code. The "count" parameter must be at least the size of a + * "struct signalfd_siginfo". */ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, return -EINVAL; siginfo = (struct signalfd_siginfo __user *) buf; - do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) @@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, nonblock = 1; } while (--count); - return total ? total : ret; + return total ? total: ret; } static const struct file_operations signalfd_fops = { @@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = { .read = signalfd_read, }; -/* - * Create a file descriptor that is associated with our signal - * state. We can pass it around to others if we want to, but - * it will always be _our_ signal state. - */ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) { int error; sigset_t sigmask; struct signalfd_ctx *ctx; - struct sighand_struct *sighand; struct file *file; struct inode *inode; - struct signalfd_lockctx lk; if (sizemask != sizeof(sigset_t) || copy_from_user(&sigmask, user_mask, sizeof(sigmask))) @@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas if (!ctx) return -ENOMEM; - init_waitqueue_head(&ctx->wqh); ctx->sigmask = sigmask; - ctx->tsk = current->group_leader; - - sighand = current->sighand; - /* - * Add this fd to the list of signal listeners. - */ - spin_lock_irq(&sighand->siglock); - list_add_tail(&ctx->lnk, &sighand->signalfd_list); - spin_unlock_irq(&sighand->siglock); /* * When we call this, the initialization must be complete, since @@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas fput(file); return -EINVAL; } - /* - * We need to be prepared of the fact that the sighand this fd - * is attached to, has been detched. In that case signalfd_lock() - * will return 0, and we'll just skip setting the new mask. - */ - if (signalfd_lock(ctx, &lk)) { - ctx->sigmask = sigmask; - signalfd_unlock(&lk); - } - wake_up(&ctx->wqh); + spin_lock_irq(¤t->sighand->siglock); + ctx->sigmask = sigmask; + spin_unlock_irq(¤t->sighand->siglock); + + wake_up(¤t->sighand->signalfd_wqh); fput(file); } return ufd; err_fdalloc: - signalfd_cleanup(ctx); + kfree(ctx); return error; } diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d9c40fe64195..5f152f60d74d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -181,6 +181,7 @@ xfs_setfilesize( ip->i_d.di_size = isize; ip->i_update_core = 1; ip->i_update_size = 1; + mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode)); } xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4528f9a3f304..491d1f4f202d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -415,8 +415,10 @@ xfs_fs_write_inode( if (vp) { vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - if (sync) + if (sync) { + filemap_fdatawait(inode->i_mapping); flags |= FLUSH_SYNC; + } error = bhv_vop_iflush(vp, flags); if (error == EAGAIN) error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index d7e136143066..fa25b7dcc6c3 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t { #define XFS_BLI_UDQUOT_BUF 0x4 #define XFS_BLI_PDQUOT_BUF 0x8 #define XFS_BLI_GDQUOT_BUF 0x10 +/* + * This flag indicates that the buffer contains newly allocated + * inodes. + */ +#define XFS_BLI_INODE_NEW_BUF 0x20 #define XFS_BLI_CHUNK 128 #define XFS_BLI_SHIFT 7 diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index ce2278611bb7..16f8e175167d 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -467,8 +467,7 @@ void xfs_filestream_flush( xfs_mount_t *mp) { - /* point in time flush, so keep the reaper running */ - xfs_mru_cache_flush(mp->m_filestream, 1); + xfs_mru_cache_flush(mp->m_filestream); } /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8ae6e8e5f3db..dacb19739cc2 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer( /*ARGSUSED*/ STATIC void xlog_recover_do_reg_buffer( + xfs_mount_t *mp, xlog_recover_item_t *item, xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) @@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer( unsigned int *data_map = NULL; unsigned int map_size = 0; int error; + int stale_buf = 1; + + /* + * Scan through the on-disk inode buffer and attempt to + * determine if it has been written to since it was logged. + * + * - If any of the magic numbers are incorrect then the buffer is stale + * - If any of the modes are non-zero then the buffer is not stale + * - If all of the modes are zero and at least one of the generation + * counts is non-zero then the buffer is stale + * + * If the end result is a stale buffer then the log buffer is replayed + * otherwise it is skipped. + * + * This heuristic is not perfect. It can be improved by scanning the + * entire inode chunk for evidence that any of the inode clusters have + * been updated. To fix this problem completely we will need a major + * architectural change to the logging system. + */ + if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) { + xfs_dinode_t *dip; + int inodes_per_buf; + int mode_count = 0; + int gen_count = 0; + + stale_buf = 0; + inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; + for (i = 0; i < inodes_per_buf; i++) { + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + i * mp->m_sb.sb_inodesize); + if (be16_to_cpu(dip->di_core.di_magic) != + XFS_DINODE_MAGIC) { + stale_buf = 1; + break; + } + if (be16_to_cpu(dip->di_core.di_mode)) + mode_count++; + if (be16_to_cpu(dip->di_core.di_gen)) + gen_count++; + } + + if (!mode_count && gen_count) + stale_buf = 1; + } switch (buf_f->blf_type) { case XFS_LI_BUF: @@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer( -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); } - if (!error) + if (!error && stale_buf) memcpy(xfs_buf_offset(bp, (uint)bit << XFS_BLI_SHIFT), /* dest */ item->ri_buf[i].i_addr, /* source */ @@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer( if (log->l_quotaoffs_flag & type) return; - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } /* @@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans( (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } if (error) return XFS_ERROR(error); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 7deb9e3cbbd3..e0b358c1c533 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert( */ if (!_xfs_mru_cache_migrate(mru, now)) { mru->time_zero = now; - if (!mru->next_reap) - mru->next_reap = mru->grp_count * mru->grp_time; + if (!mru->queued) { + mru->queued = 1; + queue_delayed_work(xfs_mru_reap_wq, &mru->work, + mru->grp_count * mru->grp_time); + } } else { grp = (now - mru->time_zero) / mru->grp_time; grp = (mru->lru_grp + grp) % mru->grp_count; @@ -271,29 +274,26 @@ _xfs_mru_cache_reap( struct work_struct *work) { xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); - unsigned long now; + unsigned long now, next; ASSERT(mru && mru->lists); if (!mru || !mru->lists) return; mutex_spinlock(&mru->lock); - now = jiffies; - if (mru->reap_all || - (mru->next_reap && time_after(now, mru->next_reap))) { - if (mru->reap_all) - now += mru->grp_count * mru->grp_time * 2; - mru->next_reap = _xfs_mru_cache_migrate(mru, now); - _xfs_mru_cache_clear_reap_list(mru); + next = _xfs_mru_cache_migrate(mru, jiffies); + _xfs_mru_cache_clear_reap_list(mru); + + mru->queued = next; + if ((mru->queued > 0)) { + now = jiffies; + if (next <= now) + next = 0; + else + next -= now; + queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); } - /* - * the process that triggered the reap_all is responsible - * for restating the periodic reap if it is required. - */ - if (!mru->reap_all) - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); - mru->reap_all = 0; mutex_spinunlock(&mru->lock, 0); } @@ -352,7 +352,7 @@ xfs_mru_cache_create( /* An extra list is needed to avoid reaping up to a grp_time early. */ mru->grp_count = grp_count + 1; - mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); + mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); if (!mru->lists) { err = ENOMEM; @@ -374,11 +374,6 @@ xfs_mru_cache_create( mru->grp_time = grp_time; mru->free_func = free_func; - /* start up the reaper event */ - mru->next_reap = 0; - mru->reap_all = 0; - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); - *mrup = mru; exit: @@ -394,35 +389,25 @@ exit: * Call xfs_mru_cache_flush() to flush out all cached entries, calling their * free functions as they're deleted. When this function returns, the caller is * guaranteed that all the free functions for all the elements have finished - * executing. - * - * While we are flushing, we stop the periodic reaper event from triggering. - * Normally, we want to restart this periodic event, but if we are shutting - * down the cache we do not want it restarted. hence the restart parameter - * where 0 = do not restart reaper and 1 = restart reaper. + * executing and the reaper is not running. */ void xfs_mru_cache_flush( - xfs_mru_cache_t *mru, - int restart) + xfs_mru_cache_t *mru) { if (!mru || !mru->lists) return; - cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); - mutex_spinlock(&mru->lock); - mru->reap_all = 1; - mutex_spinunlock(&mru->lock, 0); + if (mru->queued) { + mutex_spinunlock(&mru->lock, 0); + cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); + mutex_spinlock(&mru->lock); + } - queue_work(xfs_mru_reap_wq, &mru->work.work); - flush_workqueue(xfs_mru_reap_wq); + _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); + _xfs_mru_cache_clear_reap_list(mru); - mutex_spinlock(&mru->lock); - WARN_ON_ONCE(mru->reap_all != 0); - mru->reap_all = 0; - if (restart) - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); mutex_spinunlock(&mru->lock, 0); } @@ -433,8 +418,7 @@ xfs_mru_cache_destroy( if (!mru || !mru->lists) return; - /* we don't want the reaper to restart here */ - xfs_mru_cache_flush(mru, 0); + xfs_mru_cache_flush(mru); kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); kmem_free(mru, sizeof(*mru)); diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 624fd10ee8e5..dd58ea1bbebe 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -32,11 +32,9 @@ typedef struct xfs_mru_cache unsigned int grp_time; /* Time period spanned by grps. */ unsigned int lru_grp; /* Group containing time zero. */ unsigned long time_zero; /* Time first element was added. */ - unsigned long next_reap; /* Time that the reaper should - next do something. */ - unsigned int reap_all; /* if set, reap all lists */ xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ struct delayed_work work; /* Workqueue data for reaping. */ + unsigned int queued; /* work has been queued */ } xfs_mru_cache_t; int xfs_mru_cache_init(void); @@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void); int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func); -void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); +void xfs_mru_cache_flush(xfs_mru_cache_t *mru); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, void *value); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 60b6b898022b..95fff6872a2f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf( ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; + bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1a5ad8cd97b0..603459229904 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1082,6 +1082,9 @@ xfs_fsync( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); + if (flag & FSYNC_DATA) + filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* * We always need to make sure that the required inode state * is safe on disk. The vnode might be clean but because @@ -3769,12 +3772,16 @@ xfs_inode_flush( sync_lsn = log->l_last_sync_lsn; GRANT_UNLOCK(log, s); - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) - return 0; + if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { + if (flags & FLUSH_SYNC) + log_flags |= XFS_LOG_SYNC; + error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (error) + return error; + } - if (flags & FLUSH_SYNC) - log_flags |= XFS_LOG_SYNC; - return xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (ip->i_update_core == 0) + return 0; } } @@ -3788,9 +3795,6 @@ xfs_inode_flush( if (flags & FLUSH_INODE) { int flush_flags; - if (xfs_ipincount(ip)) - return EAGAIN; - if (flags & FLUSH_SYNC) { xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_iflock(ip); diff --git a/include/asm-mips/compiler.h b/include/asm-mips/compiler.h index 169ae26105e9..aa6b876bbd78 100644 --- a/include/asm-mips/compiler.h +++ b/include/asm-mips/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004 Maciej W. Rozycki + * Copyright (C) 2004, 2007 Maciej W. Rozycki * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -9,8 +9,10 @@ #define _ASM_COMPILER_H #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#define GCC_IMM_ASM "n" #define GCC_REG_ACCUM "$0" #else +#define GCC_IMM_ASM "rn" #define GCC_REG_ACCUM "accum" #endif diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h index d7f5ddfbaac7..c104c15c6625 100644 --- a/include/asm-powerpc/time.h +++ b/include/asm-powerpc/time.h @@ -149,6 +149,11 @@ static inline u64 get_tb(void) } #endif /* !CONFIG_PPC64 */ +static inline u64 get_tb_or_rtc(void) +{ + return __USE_RTC() ? get_rtc() : get_tb(); +} + static inline void set_tb(unsigned int upper, unsigned int lower) { mtspr(SPRN_TBWL, 0); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index cab741c2d603..f8abfa349ef9 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -86,7 +86,7 @@ extern struct nsproxy init_nsproxy; .count = ATOMIC_INIT(1), \ .action = { { { .sa_handler = NULL, } }, }, \ .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ - .signalfd_list = LIST_HEAD_INIT(sighand.signalfd_list), \ + .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \ } extern struct group_info init_groups; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5bdd656e88cf..a020eb2d4e2a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr, gfp_t gfp_flags); + unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); extern unsigned slab_node(struct mempolicy *policy); extern enum zone_type policy_zone; @@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) #define set_cpuset_being_rebound(x) do {} while (0) static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr, gfp_t gfp_flags) + unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) { return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); } diff --git a/include/linux/sched.h b/include/linux/sched.h index f4e324ed2e44..a01ac6dd5f5e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -438,7 +438,7 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; - struct list_head signalfd_list; + wait_queue_head_t signalfd_wqh; }; struct pacct_struct { @@ -593,7 +593,7 @@ struct user_struct { #endif /* Hash table maintenance information */ - struct list_head uidhash_list; + struct hlist_node uidhash_node; uid_t uid; }; @@ -1406,6 +1406,7 @@ extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_stat_granularity; extern unsigned int sysctl_sched_runtime_limit; +extern unsigned int sysctl_sched_compat_yield; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; @@ -1472,6 +1473,7 @@ static inline struct user_struct *get_uid(struct user_struct *u) } extern void free_uid(struct user_struct *); extern void switch_uid(struct user_struct *); +extern void release_uids(struct user_namespace *ns); #include <asm/current.h> diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index 510429495690..4c9ff0910ae0 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -45,49 +45,17 @@ struct signalfd_siginfo { #ifdef CONFIG_SIGNALFD /* - * Deliver the signal to listening signalfd. This must be called - * with the sighand lock held. Same are the following that end up - * calling signalfd_deliver(). - */ -void signalfd_deliver(struct task_struct *tsk, int sig); - -/* - * No need to fall inside signalfd_deliver() if no signal listeners - * are available. + * Deliver the signal to listening signalfd. */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { - if (unlikely(!list_empty(&tsk->sighand->signalfd_list))) - signalfd_deliver(tsk, sig); -} - -/* - * The signal -1 is used to notify the signalfd that the sighand - * is on its way to be detached. - */ -static inline void signalfd_detach_locked(struct task_struct *tsk) -{ - if (unlikely(!list_empty(&tsk->sighand->signalfd_list))) - signalfd_deliver(tsk, -1); -} - -static inline void signalfd_detach(struct task_struct *tsk) -{ - struct sighand_struct *sighand = tsk->sighand; - - if (unlikely(!list_empty(&sighand->signalfd_list))) { - spin_lock_irq(&sighand->siglock); - signalfd_deliver(tsk, -1); - spin_unlock_irq(&sighand->siglock); - } + if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh))) + wake_up(&tsk->sighand->signalfd_wqh); } #else /* CONFIG_SIGNALFD */ -#define signalfd_deliver(t, s) do { } while (0) -#define signalfd_notify(t, s) do { } while (0) -#define signalfd_detach_locked(t) do { } while (0) -#define signalfd_detach(t) do { } while (0) +static inline void signalfd_notify(struct task_struct *tsk, int sig) { } #endif /* CONFIG_SIGNALFD */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 1101b0ce878f..b5f41d4c2eec 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -11,7 +11,7 @@ struct user_namespace { struct kref kref; - struct list_head uidhash_table[UIDHASH_SZ]; + struct hlist_head uidhash_table[UIDHASH_SZ]; struct user_struct *root_user; }; diff --git a/init/Kconfig b/init/Kconfig index 96b54595f1dc..d54d0cadcc06 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -488,6 +488,7 @@ config SIGNALFD config TIMERFD bool "Enable timerfd() system call" if EMBEDDED select ANON_INODES + depends on BROKEN default y help Enable the timerfd() system call that allows to receive timer diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a6b4c0c08e13..fd4fc12d2624 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -57,8 +57,10 @@ static void __init handle_initrd(void) pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); if (pid > 0) - while (pid != sys_wait4(-1, NULL, 0, NULL)) + while (pid != sys_wait4(-1, NULL, 0, NULL)) { + try_to_freeze(); yield(); + } /* move initrd to rootfs' /old */ sys_fchdir(old_fd); diff --git a/kernel/exit.c b/kernel/exit.c index 06b24b3aa370..993369ee94d1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -24,7 +24,6 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/profile.h> -#include <linux/signalfd.h> #include <linux/mount.h> #include <linux/proc_fs.h> #include <linux/kthread.h> @@ -86,14 +85,6 @@ static void __exit_signal(struct task_struct *tsk) sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); - /* - * Notify that this sighand has been detached. This must - * be called with the tsk->sighand lock held. Also, this - * access tsk->sighand internally, so it must be called - * before tsk->sighand is reset. - */ - signalfd_detach_locked(tsk); - posix_cpu_timers_exit(tsk); if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 7332e236d367..33f12f48684a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1438,7 +1438,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, struct sighand_struct *sighand = data; spin_lock_init(&sighand->siglock); - INIT_LIST_HEAD(&sighand->signalfd_list); + init_waitqueue_head(&sighand->signalfd_wqh); } void __init proc_caches_init(void) diff --git a/kernel/sched.c b/kernel/sched.c index deeb1f8e0c30..6107a0cd6325 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1682,6 +1682,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->prio = effective_prio(p); + if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || !current->se.on_rq) { @@ -4550,10 +4555,7 @@ asmlinkage long sys_sched_yield(void) struct rq *rq = this_rq_lock(); schedstat_inc(rq, yld_cnt); - if (unlikely(rq->nr_running == 1)) - schedstat_inc(rq, yld_act_empty); - else - current->sched_class->yield_task(rq, current); + current->sched_class->yield_task(rq, current); /* * Since we are going to call schedule() anyway, there's diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 892616bf2c77..c9fbe8e73a45 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -43,6 +43,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL; unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; /* + * sys_sched_yield() compat mode + * + * This option switches the agressive yield implementation of the + * old scheduler back on. + */ +unsigned int __read_mostly sysctl_sched_compat_yield; + +/* * SCHED_BATCH wake-up granularity. * (default: 25 msec, units: nanoseconds) * @@ -897,19 +905,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) } /* - * sched_yield() support is very simple - we dequeue and enqueue + * sched_yield() support is very simple - we dequeue and enqueue. + * + * If compat_yield is turned on then we requeue to the end of the tree. */ static void yield_task_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); + struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; + struct sched_entity *rightmost, *se = &p->se; + struct rb_node *parent; - __update_rq_clock(rq); /* - * Dequeue and enqueue the task to update its - * position within the tree: + * Are we the only task in the tree? + */ + if (unlikely(cfs_rq->nr_running == 1)) + return; + + if (likely(!sysctl_sched_compat_yield)) { + __update_rq_clock(rq); + /* + * Dequeue and enqueue the task to update its + * position within the tree: + */ + dequeue_entity(cfs_rq, &p->se, 0); + enqueue_entity(cfs_rq, &p->se, 0); + + return; + } + /* + * Find the rightmost entry in the rbtree: */ - dequeue_entity(cfs_rq, &p->se, 0); - enqueue_entity(cfs_rq, &p->se, 0); + do { + parent = *link; + link = &parent->rb_right; + } while (*link); + + rightmost = rb_entry(parent, struct sched_entity, run_node); + /* + * Already in the rightmost position? + */ + if (unlikely(rightmost == se)) + return; + + /* + * Minimally necessary key value to be last in the tree: + */ + se->fair_key = rightmost->fair_key + 1; + + if (cfs_rq->rb_leftmost == &se->run_node) + cfs_rq->rb_leftmost = rb_next(&se->run_node); + /* + * Relink the task to the rightmost position: + */ + rb_erase(&se->run_node, &cfs_rq->tasks_timeline); + rb_link_node(&se->run_node, parent, link); + rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); } /* diff --git a/kernel/signal.c b/kernel/signal.c index 3169bed0b4d0..9fb91a32edda 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -378,8 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - if (likely(tsk == current)) - signr = __dequeue_signal(&tsk->pending, mask, info); + signr = __dequeue_signal(&tsk->pending, mask, info); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info); @@ -407,8 +406,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) } } } - if (likely(tsk == current)) - recalc_sigpending(); + recalc_sigpending(); if (signr && unlikely(sig_kernel_stop(signr))) { /* * Set a marker that we have dequeued a stop signal. Our @@ -425,7 +423,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } - if (signr && likely(tsk == current) && + if (signr && ((info->si_code & __SI_MASK) == __SI_TIMER) && info->si_sys_private){ /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6ace893c17c9..53a456ebf6d5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -303,6 +303,14 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_compat_yield", + .data = &sysctl_sched_compat_yield, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_PROVE_LOCKING { .ctl_name = CTL_UNNUMBERED, diff --git a/kernel/user.c b/kernel/user.c index e7d11cef6998..9ca2848fc356 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -55,25 +55,22 @@ struct user_struct root_user = { /* * These routines must be called with the uidhash spinlock held! */ -static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent) +static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) { - list_add(&up->uidhash_list, hashent); + hlist_add_head(&up->uidhash_node, hashent); } static inline void uid_hash_remove(struct user_struct *up) { - list_del(&up->uidhash_list); + hlist_del_init(&up->uidhash_node); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) { - struct list_head *up; - - list_for_each(up, hashent) { - struct user_struct *user; - - user = list_entry(up, struct user_struct, uidhash_list); + struct user_struct *user; + struct hlist_node *h; + hlist_for_each_entry(user, h, hashent, uidhash_node) { if(user->uid == uid) { atomic_inc(&user->__count); return user; @@ -122,7 +119,7 @@ void free_uid(struct user_struct *up) struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) { - struct list_head *hashent = uidhashentry(ns, uid); + struct hlist_head *hashent = uidhashentry(ns, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); @@ -202,6 +199,30 @@ void switch_uid(struct user_struct *new_user) suid_keys(current); } +void release_uids(struct user_namespace *ns) +{ + int i; + unsigned long flags; + struct hlist_head *head; + struct hlist_node *nd; + + spin_lock_irqsave(&uidhash_lock, flags); + /* + * collapse the chains so that the user_struct-s will + * be still alive, but not in hashes. subsequent free_uid() + * will free them. + */ + for (i = 0; i < UIDHASH_SZ; i++) { + head = ns->uidhash_table + i; + while (!hlist_empty(head)) { + nd = head->first; + hlist_del_init(nd); + } + } + spin_unlock_irqrestore(&uidhash_lock, flags); + + free_uid(ns->root_user); +} static int __init uid_cache_init(void) { @@ -211,7 +232,7 @@ static int __init uid_cache_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); for(n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(init_user_ns.uidhash_table + n); + INIT_HLIST_HEAD(init_user_ns.uidhash_table + n); /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 85af9422ea6e..7af90fc4f0fd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -39,7 +39,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(ns->uidhash_table + n); + INIT_HLIST_HEAD(ns->uidhash_table + n); /* Insert new root user. */ ns->root_user = alloc_uid(ns, 0); @@ -81,7 +81,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); - free_uid(ns->root_user); + release_uids(ns); kfree(ns); } diff --git a/kernel/utsname.c b/kernel/utsname.c index 9d8180a0f0d8..816d7b24fa03 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -28,7 +28,9 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) if (!ns) return ERR_PTR(-ENOMEM); + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); + up_read(&uts_sem); kref_init(&ns->kref); return ns; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de4cf458d6e1..84c795ee2d65 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, { int nid; struct page *page = NULL; + struct mempolicy *mpol; struct zonelist *zonelist = huge_zonelist(vma, address, - htlb_alloc_mask); + htlb_alloc_mask, &mpol); struct zone **z; for (z = zonelist->zones; *z; z++) { @@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, break; } } + mpol_free(mpol); /* unref if mpol !NULL */ return page; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bb54b88c3d5a..3d6ac9505d07 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif -/* Return effective policy for a VMA */ +/* + * get_vma_policy(@task, @vma, @addr) + * @task - task for fallback if vma policy == default + * @vma - virtual memory area whose policy is sought + * @addr - address in @vma for shared policy lookup + * + * Returns effective policy for a VMA at specified address. + * Falls back to @task or system default policy, as necessary. + * Returned policy has extra reference count if shared, vma, + * or some other task's policy [show_numa_maps() can pass + * @task != current]. It is the caller's responsibility to + * free the reference in these cases. + */ static struct mempolicy * get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = task->mempolicy; + int shared_pol = 0; if (vma) { - if (vma->vm_ops && vma->vm_ops->get_policy) + if (vma->vm_ops && vma->vm_ops->get_policy) { pol = vma->vm_ops->get_policy(vma, addr); - else if (vma->vm_policy && + shared_pol = 1; /* if pol non-NULL, add ref below */ + } else if (vma->vm_policy && vma->vm_policy->policy != MPOL_DEFAULT) pol = vma->vm_policy; } if (!pol) pol = &default_policy; + else if (!shared_pol && pol != current->mempolicy) + mpol_get(pol); /* vma or other task's policy */ return pol; } @@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol, } #ifdef CONFIG_HUGETLBFS -/* Return a zonelist suitable for a huge page allocation. */ +/* + * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) + * @vma = virtual memory area whose policy is sought + * @addr = address in @vma for shared policy lookup and interleave policy + * @gfp_flags = for requested zone + * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy + * + * Returns a zonelist suitable for a huge page allocation. + * If the effective policy is 'BIND, returns pointer to policy's zonelist. + * If it is also a policy for which get_vma_policy() returns an extra + * reference, we must hold that reference until after allocation. + * In that case, return policy via @mpol so hugetlb allocation can drop + * the reference. For non-'BIND referenced policies, we can/do drop the + * reference here, so the caller doesn't need to know about the special case + * for default and current task policy. + */ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, - gfp_t gfp_flags) + gfp_t gfp_flags, struct mempolicy **mpol) { struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct zonelist *zl; + *mpol = NULL; /* probably no unref needed */ if (pol->policy == MPOL_INTERLEAVE) { unsigned nid; nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); + __mpol_free(pol); /* finished with pol */ return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); } - return zonelist_policy(GFP_HIGHUSER, pol); + + zl = zonelist_policy(GFP_HIGHUSER, pol); + if (unlikely(pol != &default_policy && pol != current->mempolicy)) { + if (pol->policy != MPOL_BIND) + __mpol_free(pol); /* finished with pol */ + else + *mpol = pol; /* unref needed after allocation */ + } + return zl; } #endif @@ -1264,6 +1306,7 @@ struct page * alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct zonelist *zl; cpuset_update_task_memory_state(); @@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); return alloc_page_interleave(gfp, 0, nid); } - return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); + zl = zonelist_policy(gfp, pol); + if (pol != &default_policy && pol != current->mempolicy) { + /* + * slow path: ref counted policy -- shared or vma + */ + struct page *page = __alloc_pages(gfp, 0, zl); + __mpol_free(pol); + return page; + } + /* + * fast path: default or task policy + */ + return __alloc_pages(gfp, 0, zl); } /** @@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v) struct numa_maps *md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; + struct mempolicy *pol; int n; char buffer[50]; @@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v) if (!md) return 0; - mpol_to_str(buffer, sizeof(buffer), - get_vma_policy(priv->task, vma, vma->vm_start)); + pol = get_vma_policy(priv->task, vma, vma->vm_start); + mpol_to_str(buffer, sizeof(buffer), pol); + /* + * unref shared or other task's mempolicy + */ + if (pol != &default_policy && pol != current->mempolicy) + __mpol_free(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e185a5b55913..2351533a8507 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -58,7 +58,6 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ struct timer_list timer; int peer_pid; /* PID of the peer process */ @@ -345,10 +344,12 @@ static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, static int __nfulnl_send(struct nfulnl_instance *inst) { - int status; + int status = -1; if (inst->qlen > 1) - inst->lastnlh->nlmsg_type = NLMSG_DONE; + NLMSG_PUT(inst->skb, 0, 0, + NLMSG_DONE, + sizeof(struct nfgenmsg)); status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); if (status < 0) { @@ -358,8 +359,8 @@ __nfulnl_send(struct nfulnl_instance *inst) inst->qlen = 0; inst->skb = NULL; - inst->lastnlh = NULL; +nlmsg_failure: return status; } @@ -538,7 +539,6 @@ __build_packet_message(struct nfulnl_instance *inst, } nlh->nlmsg_len = inst->skb->tail - old_tail; - inst->lastnlh = nlh; return 0; nlmsg_failure: @@ -644,7 +644,8 @@ nfulnl_log_packet(unsigned int pf, } if (inst->qlen >= qthreshold || - (inst->skb && size > skb_tailroom(inst->skb))) { + (inst->skb && size > + skb_tailroom(inst->skb) - sizeof(struct nfgenmsg))) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ UDEBUG("flushing old skb\n"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 957957309859..3a23e30bc79e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -270,7 +270,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit-1) { + if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += skb->len; sch->bstats.packets++; return 0; @@ -306,7 +306,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit - 1) { + if (++sch->q.qlen <= q->limit) { sch->qstats.requeues++; return 0; } @@ -391,10 +391,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) q->quantum = ctl->quantum ? : psched_mtu(sch->dev); q->perturb_period = ctl->perturb_period*HZ; if (ctl->limit) - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 2); qlen = sch->q.qlen; - while (sch->q.qlen >= q->limit-1) + while (sch->q.qlen > q->limit) sfq_drop(sch); qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); @@ -423,7 +423,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt) q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; } - q->limit = SFQ_DEPTH; + q->limit = SFQ_DEPTH - 2; q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1a899924023f..036ab520df21 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1110,7 +1110,8 @@ svc_tcp_accept(struct svc_sock *svsk) serv->sv_name); printk(KERN_NOTICE "%s: last TCP connect from %s\n", - serv->sv_name, buf); + serv->sv_name, __svc_print_addr(sin, + buf, sizeof(buf))); } /* * Always select the oldest socket. It's not fair, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 36946629b6ca..0753b20e23fe 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -316,6 +316,7 @@ static inline int inode_doinit(struct inode *inode) } enum { + Opt_error = -1, Opt_context = 1, Opt_fscontext = 2, Opt_defcontext = 4, @@ -327,6 +328,7 @@ static match_table_t tokens = { {Opt_fscontext, "fscontext=%s"}, {Opt_defcontext, "defcontext=%s"}, {Opt_rootcontext, "rootcontext=%s"}, + {Opt_error, NULL}, }; #define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n" |