diff -urP linux-2.4.19.orig/Documentation/Configure.help linux-2.4.19/Documentation/Configure.help --- linux-2.4.19.orig/Documentation/Configure.help Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/Documentation/Configure.help Wed Feb 12 12:09:01 2003 @@ -10639,6 +10639,48 @@ say M here and read . This is recommended. The module will be called dl2k.o. +Intel(R) PRO/1000 Gigabit Ethernet support +CONFIG_E1000 + This driver supports Intel(R) PRO/1000 gigabit ethernet family of + adapters, which includes: + + Controller Adapter Name Board IDs + ---------- ------------ --------- + 82542 PRO/1000 Gigabit Server Adapter 700262-xxx, + 717037-xxx + 82543 PRO/1000 F Server Adapter 738640-xxx, + A38888-xxx + 82543 PRO/1000 T Server Adapter A19845-xxx, + A33948-xxx + 82544 PRO/1000 XT Server Adapter A51580-xxx + 82544 PRO/1000 XF Server Adapter A50484-xxx + 82544 PRO/1000 T Desktop Adapter A62947-xxx + 82540 PRO/1000 MT Desktop Adapter A78408-xxx + 82545 PRO/1000 MT Server Adapter A92165-xxx + 82546 PRO/1000 MT Dual Port Server Adapter A92111-xxx + 82545 PRO/1000 MF Server Adapter A91622-xxx + 82545 PRO/1000 MF Server Adapter(LX) A91624-xxx + 82546 PRO/1000 MF Dual Port Server Adapter A91620-xxx + + For more information on how to identify your adapter, go to the + Adapter & Driver ID Guide at: + + + + For general information and support, go to the Intel support + website at: + + + + More specific information on configuring the driver is in + . + + This driver is also available as a module ( = code which can be + inserted in and removed from the running kernel whenever you want). + The module will be called e1000.o. If you want to compile it as a + module, say M here and read as well + as . + AMD LANCE and PCnet (AT1500 and NE2100) support CONFIG_LANCE If you have a network (Ethernet) card of this type, say Y and read diff -urP linux-2.4.19.orig/Documentation/networking/e1000.txt linux-2.4.19/Documentation/networking/e1000.txt --- linux-2.4.19.orig/Documentation/networking/e1000.txt Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/Documentation/networking/e1000.txt Wed Feb 12 12:09:01 2003 @@ -0,0 +1,302 @@ +Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters +=============================================================== + +October 12, 2002 + + +Contents +======== + +- In This Release +- Supported Adapters +- Command Line Parameters +- Speed and Duplex Configuration +- Additional Configurations +- Known Issues +- Support + + +In This Release +=============== + +This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family +of Adapters, version 4.4.x. This driver includes support for +Itanium(TM)-based systems. + +This release version includes the following: + + - Support for the ethtool 1.6 interface. A third-party application can use + the ethtool interface to get and set driver parameters. + + - Zero copy. This feature provides faster data throughput. Enabled by + default in supporting kernels. It is not supported on the Intel(R) + PRO/1000 Gigabit Server Adapter. + +Features include: + + - Support for the 82545 and 82546-based adapters listed below + + - Wake on LAN* support via ethtool for 82540, 82544, 82545, and 82546- + based adapters + + - Adaptive IFS for increased performance at half duplex + + + +Supported Adapters +================== + +The following Intel network adapters are compatible with the drivers in this +release: + + Controller Adapter Name Board IDs + ---------- ------------ --------- + + 82542 PRO/1000 Gigabit Server Adapter 700262-xxx, 717037-xxx + + 82543 PRO/1000 F Server Adapter 738640-xxx, A38888-xxx + + 82543 PRO/1000 T Server Adapter A19845-xxx, A33948-xxx + + 82544 PRO/1000 XT Server Adapter A51580-xxx + + 82544 PRO/1000 XF Server Adapter A50484-xxx + + 82544 PRO/1000 T Desktop Adapter A62947-xxx + + 82540 PRO/1000 MT Desktop Adapter A78408-xxx + + 82545 PRO/1000 MT Server Adapter A92165-xxx + + 82546 PRO/1000 MT Dual Port Server Adapter A92111-xxx + + 82545 PRO/1000 MF Server Adapter A91622-xxx + + 82545 PRO/1000 MF Server Adapter(LX) A91624-xxx + + 82546 PRO/1000 MF Dual Port Server Adapter A91620-xxx + + +To verify your Intel adapter is supported, find the board ID number on the +adapter. Look for a label that has a barcode and a number in the format of +123456-001 (six digits hyphen three digits). Match this to the list of +numbers above. + +For more information on how to identify your adapter, go to the Adapter & +Driver ID Guide at: + + http://support.intel.com/support/network/adapter/pro100/21397.htm + +For the latest Intel network drivers for Linux, go to: + + http://appsr.intel.com/scripts-df/support_intel.asp + + +Command Line Parameters +======================= + +If the driver is built as a module, the following optional parameters are +used by entering them on the command line with the modprobe or insmod command. +For example, with two PRO/1000 PCI adapters, entering: + + insmod e1000 TxDescriptors=80,128 + +loads the e1000 driver with 80 TX resources for the first adapter and 128 TX +resources for the second adapter. + +For more information about the AutoNeg, Duplex, and Speed parameters, see the +"Speed and Duplex Configuration" section in this document. + + +AutoNeg (adapters using copper connections only) +Valid Range: 0x01-0x0F, 0x20-0x2F +Default Value: 0x2F + This parameter is a bit mask that specifies which speed and duplex + settings the board advertises. When this parameter is used, the Speed and + Duplex parameters must not be specified. + +Duplex (adapters using copper connections only) +Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) +Default Value: 0 + Defines the direction in which data is allowed to flow. Can by either one + or two-directional. If both Duplex and the link partner are set to auto- + negotiate, the board auto-detects the correct duplex. If the link partner + is forced (either full or half), Duplex defaults to half-duplex. + +FlowControl +Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) +Default: Read flow control settings from the EEPROM + This parameter controls the automatic generation(Tx) and response(Rx) to + Ethernet PAUSE frames. + +RxDescriptors +Valid Range: 80-256 for 82542 and 82543-based adapters + 80-4096 for 82540, 82544, 82545, and 82546-based adapters +Default Value: 80 + This value is the number of receive descriptors allocated by the driver. + Increasing this value allows the driver to buffer more incoming packets. + Each descriptor is 16 bytes. A receive buffer is also allocated for each + descriptor and can be either 2048, 4096, 8192, or 16384 bytes, depending + on the MTU setting. The maximum MTU size is 16110. + + NOTE: MTU designates the frame size. It only needs to be set for Jumbo + Frames. + +RxIntDelay +Valid Range: 0-65535 (0=off) +Default Value: 0 + This value delays the generation of receive interrupts in units of 1.024 + microseconds. Receive interrupt reduction can improve CPU efficiency if + properly tuned for specific network traffic. Increasing this value adds + extra latency to frame reception and can end up decreasing the throughput + of TCP traffic. If the system is reporting dropped receives, this value + may be set too high, causing the driver to run out of available receive + descriptors. + + CAUTION: When setting RxIntDelay to a value other than 0, adapters may + hang (stop transmitting) under certain network conditions. If + this occurs a NETDEV WATCHDOG message is logged in the system + event log. In addition, the controller is automatically reset, + restoring the network connection. To eliminate the potential for + the hang ensure that RxIntDelay is set to 0. + +RxAbsIntDelay (82540, 82545, and 82546-based adapters only) +Valid Range: 0-65535 (0=off) +Default Value: 128 + This value, in units of 1.024 microseconds, limits the delay in which a + transmit interrupt is generated. Useful only if RxIntDelay is non-zero, + this value ensures that an interrupt is generated after the initial + packet is received within the set amount of time. Proper tuning, + along with RxIntDelay, may improve traffic throughput in specific network + conditions. + +Speed (adapters using copper connections only) +Valid Settings: 0, 10, 100, 1000 +Default Value: 0 (auto-negotiate at all supported speeds) + Speed forces the line speed to the specified value in megabits per second + (Mbps). If this parameter is not specified or is set to 0 and the link + partner is set to auto-negotiate, the board will auto-detect the correct + speed. Duplex must also be set when Speed is set to either 10 or 100. + +TxDescriptors +Valid Range: 80-256 for 82542 and 82543-based adapters + 80-4096 for 82540, 82544, 82545, and 82546-based adapters +Default Value: 256 + This value is the number of transmit descriptors allocated by the driver. + Increasing this value allows the driver to queue more transmits. Each + descriptor is 16 bytes. + +TxIntDelay +Valid Range: 0-65535 (0=off) +Default Value: 64 + This value delays the generation of transmit interrupts in units of + 1.024 microseconds. Transmit interrupt reduction can improve CPU + efficiency if properly tuned for specific network traffic. If the + system is reporting dropped transmits, this value may be set too high + causing the driver to run out of available transmit descriptors. + +TxAbsIntDelay (82540, 82545, and 82546-based adapters only) +Valid Range: 0-65535 (0=off) +Default Value: 64 + This value, in units of 1.024 microseconds, limits the delay in which a + transmit interrupt is generated. Useful only if TxIntDelay is non-zero, + this value ensures that an interrupt is generated after the initial + packet is sent on the wire within the set amount of time. Proper tuning, + along with TxIntDelay, may improve traffic throughput in specific + network conditions. + +XsumRX (not available on the PRO/1000 Gigabit Server Adapter) +Valid Range: 0-1 +Default Value: 1 + A value of '1' indicates that the driver should enable IP checksum + offload for received packets (both UDP and TCP) to the adapter hardware. + + +Speed and Duplex Configuration +============================== + +Three keywords are used to control the speed and duplex configuration. These +keywords are Speed, Duplex, and AutoNeg. + +If the board uses a fiber interface, these keywords are ignored, and the +fiber interface board only links at 1000 Mbps full-duplex. + +For copper-based boards, the keywords interact as follows: + + The default operation is auto-negotiate. The board advertises all supported + speed and duplex combinations, and it links at the highest common speed and + duplex mode IF the link partner is set to auto-negotiate. + + If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps is + advertised (The 1000BaseT spec requires auto-negotiation.) + + If Speed = 10 or 100, then both Speed and Duplex must be set. Auto- + negotiation is disabled, and the AutoNeg parameter is ignored. Partner MUST + also be forced. + +The AutoNeg parameter is used when more control is required over the auto- +negotiation process. When this parameter is used, Speed and Duplex must not +be specified. This parameter is a bitmap that specifies which speed and +duplex settings are advertised to the link partner. + +Bit 7 6 5 4 3 2 1 0 +Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 +Duplex Full Full Half Full Half + +Note that setting AutoNeg does not guarantee that the board will link at the +highest specified speed or duplex mode, but the board will link at the +highest possible speed/duplex of the link partner IF the link partner is also +set to auto-negotiate. If the link partner is forced speed/duplex, the +adapter MUST be forced to the same speed/duplex. + + +Additional Configurations +========================= + + Jumbo Frames + ------------ + + The driver supports Jumbo Frames for all adapters except 82542-based + adapters. Jumbo Frames support is enabled by changing the MTU to a value + larger than the default of 1500. Use the ifconfig command to increase the + MTU size. For example: + + ifconfig ethx mtu 9000 up + + +Known Issues +============ + + Jumbo Frames System Requirement + ------------------------------- + + Memory allocation failures have been observed on Linux systems with 64 MB + of RAM or less that are running Jumbo Frames. If you are using Jumbo + Frames, your system may require more than the advertised minimum + requirement of 64 MB of system memory. + + +Support +======= + +For general information and support, go to the Intel support website at: + + http://support.intel.com + +If an issue is identified with the released source code on the supported +kernel with a supported adapter, email the specific information related to +the issue to linux.nics@intel.com. + + +License +======= + +This software program is released under the terms of a license agreement +between you ('Licensee') and Intel. Do not use or load this software or any +associated materials (collectively, the 'Software') until you have carefully +read the full terms and conditions of the LICENSE located in this software +package. By loading or using the Software, you agree to the terms of this +Agreement. If you do not agree with the terms of this Agreement, do not +install or use the Software. + +* Other names and brands may be claimed as the property of others. diff -urP linux-2.4.19.orig/Documentation/web100/locking.txt linux-2.4.19/Documentation/web100/locking.txt --- linux-2.4.19.orig/Documentation/web100/locking.txt Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/Documentation/web100/locking.txt Wed Feb 12 12:09:01 2003 @@ -0,0 +1,33 @@ +Web100 Locking Model for Linux 2.4 +John Heffner +August 2, 2001 + + +1. Lookup Structures + +The connections entries are kept linked together simultaneously in a table +and in a list. Only entries in these structures can be looked up. To +protect these lookup structures, we have a single global reader-writer +spinlock, web100_linkage_lock. Since we grab the lock both from user space +and in the bottom half, we must do a [read/write]_lock_bh. As this disables +the local BH's, this lock should *not* be held for very long. + + +2. Data Integrity + +The statistics are protected by the sock's lock. Any code modifying or +reading the statistics should hold the sock lock while doing so. We assume +that if the socket is gone, the statistics should not be modified, so +readers need not hold any lock. + + +3. Statistics Destruction + +A statistics structure keeps a count of the number of references to it, +wc_users. When a lookup is performed, the reference count should be +incremented (while the linkage lock is held) by calling web100_stats_use. +When the reference is no longer needed, decrement the count by calling +web100_stats_unuse. The latter function will free the statistics when there +are no remaining references. The lookup structures keep one reference. The +sock also keeps one, since the sock may be destroyed before it ever enters +the ESTABLISHED state. diff -urP linux-2.4.19.orig/Documentation/web100/proc_interface.txt linux-2.4.19/Documentation/web100/proc_interface.txt --- linux-2.4.19.orig/Documentation/web100/proc_interface.txt Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/Documentation/web100/proc_interface.txt Wed Feb 12 12:09:01 2003 @@ -0,0 +1,104 @@ +WEB100 proc interface notes +=========================== + +The web100 modifications to the kernel collect information about the +state of a TCP transfer in a kernel data structure that is linked +out of the "sock" TCP structure in sock.h. Please see +"include/net/web100_stats.h" for the structure definition. + +The API for this structure is provided through the /proc interface. +This document provides a brief description of this interface. Please +see fs/proc/web100.c for source code. + +First, kernel creates the /proc/web100 directory and the file +/proc/web100/header at system boot time. + +Each new TCP connection is assigned a unique, unchanging number +(similar to a pid), and its directory name is that number as ASCII +decimal. These directories persist for about sixty seconds after the +connection is terminated (goes into a CLOSED or TIME_WAIT state). The +connection stats will not change after the connection is terminated. +(So a connection whose state variable is TIME_WAIT is not necessarily +still in TIME_WAIT.) It should be noted that what is meant by a +"connection" here is actually one side of a connection. If a +connection is created from the local host to the local host, two +connection ID's will be created. + +When writing an application to read from the proc interface, it should be +taken into consideration that the directories and their files can disappear at +any time (they do so at an interrupt level). So if a file open fails on a +file you just looked up (say, with glob), that's probably normal and the +program should handle it gracefully. + +Another seemingly strange thing that can happen is that stats for multiple +connections with the same four-tuple can show up. No more than one of the +connections may be in any state but CLOSED or TIME_WAIT. This behavior is +correct, and should be handled as such. + +The algorithms governing the connection numbers are not yet final. +Currently, for simplification, it is only possible to have 32768 +connections. Also, the proc interface takes up the entire top half of the +inode number space for the proc filesystem, which is unnecessary and very +undesirable in the long term. These problems should be fixed at some point. + +Inside each connection directory is an identical set of files. One is +spec-ascii, which contains the connection four-tuple in human-readable +format. One can, for example, see all outgoing ssh connections by executing +"grep ':22$' /proc/web100/*/spec-ascii" from the command prompt. + +The remaining files provide access to states of TCP-KIS variables in +local host byte-order. Since the number, names, and contents of these +files can and will change with releases, they are described in a +header file -- /proc/web100/header. A file named spec, which contains the +variables describing the connection's four-tuple, should be present +for any release. + +The header file is in human-readable format as follows: + + + / + + + ... + + / + ... +The filename is the name of the file inside each connection directory. (The +/ is prepended to make it clear it is a new file, not a new variable in the +previous file. There is also an empty line before each filename.) Each +file has an arbitrary number of variables, and there are an arbitrary number +of files. The type is an integer, and is currently defined something like: + + enum { + WEB100_TYPE_INTEGER, + WEB100_TYPE_INTEGER32, + WEB100_TYPE_IP_ADDRESS, + WEB100_TYPE_COUNTER32, + WEB100_TYPE_GAUGE32, + WEB100_TYPE_UNSIGNED32, + WEB100_TYPE_TIME_TICKS, + WEB100_TYPE_COUNTER64, + WEB100_TYPE_UNSIGNED16 + }; + +in the kernel source file fs/proc/web100.c. These correspond to +MIB-II types. (RFC2578) + +To read variables, seek to the appropriate offset, then read the appropriate +amount of data. (Length is implied by the type.) Multiple variables may be +read with a single read, and will be read atomically when doing so. +Currently, all variables are readable, but this may not be true in the +future. + +To write variables, seek to the appropriate offset, and write the +appropriate amount of data. Only a single variable may be written at one +time. If variables must be atomically written, a variable should be used as +a flag to signal that the write is done, and the kernel code depending on +the variables should be written to handle this. + +See: http://www.web100.org +Please send coments to prog@web100.org + +John Heffner, Matt Mathis, R. Reddy +August 2000, Jan 2001 + diff -urP linux-2.4.19.orig/Documentation/web100/sysctl.txt linux-2.4.19/Documentation/web100/sysctl.txt --- linux-2.4.19.orig/Documentation/web100/sysctl.txt Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/Documentation/web100/sysctl.txt Wed Feb 12 12:09:01 2003 @@ -0,0 +1,32 @@ +Web100 sysctl variables +John Heffner +October 10, 2002 + + +net.ipv4.WAD_IFQ + This value is used for WAD_IFQ by a connection when its KIS + variable is 0. This variable requires that Net100 extensions be + enabled. + +net.ipv4.WAD_MaxBurst + This value is used for WAD_MaxBurst by a connection when its KIS + variable is 0. This variable requires that Net100 extensions be + enabled. + +net.ipv4.web100_default_wscale + This will be the minimum window scale advertised. + +net.ipv4.web100_no_metrics_save + When non-zero, TCP metrics will not be saved the the route dest + cache. NOTE: values already in the cache will not be flushed + by writing to this variable. To do so, as root write to + net.ipv4.route.flush. This variable requires that Net100 + extensions be enabled. + +net.ipv4.web100_rbufmode + The X_RBufMode KIS variable for each connection is set to this value + upon creation of the statistics structure. + +net.ipv4.web100_sbufmode + The X_SBufMode KIS variable for each connection is set to this value + upon creation of the statistics structure. diff -urP linux-2.4.19.orig/Documentation/web100/tuning.txt linux-2.4.19/Documentation/web100/tuning.txt --- linux-2.4.19.orig/Documentation/web100/tuning.txt Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/Documentation/web100/tuning.txt Wed Feb 12 12:09:01 2003 @@ -0,0 +1,36 @@ +One of the primary features of the 2.1 release of the Web100 kernel patch is +that is contains a new style of TCP buffer management which effectively +"auto-tunes" both sending and receiving flows. The algorithms used are very +similar to those described in +, and will be further +described in a future Web100 paper. + +Enabling/Disabling Autotuning +----------------------------- +System-wide and per-connection controls have been provided for enabling and +disabling these experimental algorithms. KIS variables X_SBufMode and +X_RBufMode are the per-connection controls. For each, a value of 0 uses the +classic Linux buffering, and a value of 1 uses the Web100 autotuning. +Currently no other values are accepted. The system-wide sysctl variables +net.ipv4.web100_sbufmode and net.ipv4.web100_rbufmode are defaults for the +KIS variables, loaded at connection startup. Changing the sysctl variables +will NOT affect currently established connections. + +Mis-tuning +---------- +For diagnostic or demonstration purposes, it may be useful to mis-tune +connections. Previously, this was done by writing to the SndbufSet or +RcvbufSet and then STuneMode or RTuneMode variables. These are now +deprecated. The suggested method of mis-tuning now is to use LimCwnd and +LimRwin. These have precise implementation-independent definitions -- they +are simply clamps on cwnd and rwin. + +Support for legacy applications +------------------------------ +To provide backward compatibility, the old tuning variables do still have +functionality, though it has been somewhat altered. SndbufSet and RcvbufSet +immediately set sndbuf and rcvbuf, respectively. They also set LimCwnd and +LimRwin. The deprecated variables SMaxWinBuf and RMaxWinBuf also have the +same effects. The new variables X_Sndbuf and X_Rcvbuf as well as the +deprecated variables SAppBuf and RAppBuf write to sndbuf and rcvbuf but do +not set LimCwnd and LimRwin. diff -urP linux-2.4.19.orig/Makefile linux-2.4.19/Makefile --- linux-2.4.19.orig/Makefile Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/Makefile Wed Feb 12 12:09:01 2003 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 19 -EXTRAVERSION = +EXTRAVERSION = -web100-hstcp-v1.3+sk98 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -urP linux-2.4.19.orig/arch/alpha/kernel/time.c linux-2.4.19/arch/alpha/kernel/time.c --- linux-2.4.19.orig/arch/alpha/kernel/time.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/arch/alpha/kernel/time.c Wed Feb 12 12:09:01 2003 @@ -395,6 +395,30 @@ } } +#ifdef CONFIG_WEB100_STATS +void get_mono_time(__u64 *time) +{ +#ifdef CONFIG_SMP + *time = jiffies * (1000000 / HZ); +#else + unsigned long flags; + unsigned long delta_cycles, delta_usec, partial_tick, lost, now; + + read_lock_irqsave(&xtime_lock, flags); + delta_cycles = rpcc() - state.last_time; + partial_tick = state.partial_tick; + now = jiffies; + read_unlock_irqrestore(&xtime_lock, flags); + + delta_usec = (delta_cycles * state.scaled_ticks_per_cycle + + partial_tick) * 15625; + delta_usec = ((delta_usec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2; + + *time = now * (1000000 / HZ) + delta_usec; +#endif +} +#endif + /* * Use the cycle counter to estimate an displacement from the last time * tick. Unfortunately the Alpha designers made only the low 32-bits of diff -urP linux-2.4.19.orig/arch/i386/kernel/time.c linux-2.4.19/arch/i386/kernel/time.c --- linux-2.4.19.orig/arch/i386/kernel/time.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/arch/i386/kernel/time.c Wed Feb 12 12:09:01 2003 @@ -259,6 +259,17 @@ #endif +#ifdef CONFIG_WEB100_STATS +void get_mono_time(__u64 *time) +{ + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + *time = (__u64)jiffies * (1000000 / HZ) + do_gettimeoffset(); + read_unlock_irqrestore(&xtime_lock, flags); +} +#endif + /* * This version of gettimeofday has microsecond resolution * and better than microsecond precision on fast x86 machines with TSC. diff -urP linux-2.4.19.orig/drivers/net/Config.in linux-2.4.19/drivers/net/Config.in --- linux-2.4.19.orig/drivers/net/Config.in Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/drivers/net/Config.in Wed Feb 12 12:09:01 2003 @@ -241,6 +241,7 @@ bool ' Omit support for old Tigon I based AceNICs' CONFIG_ACENIC_OMIT_TIGON_I fi dep_tristate 'D-Link DL2000-based Gigabit Ethernet support' CONFIG_DL2K $CONFIG_PCI +dep_tristate 'Intel(R) PRO/1000 Gigabit Ethernet support' CONFIG_E1000 $CONFIG_PCI dep_tristate 'MyriCOM Gigabit Ethernet support' CONFIG_MYRI_SBUS $CONFIG_SBUS dep_tristate 'National Semiconduct DP83820 support' CONFIG_NS83820 $CONFIG_PCI dep_tristate 'Packet Engines Hamachi GNIC-II support' CONFIG_HAMACHI $CONFIG_PCI diff -urP linux-2.4.19.orig/drivers/net/Makefile linux-2.4.19/drivers/net/Makefile --- linux-2.4.19.orig/drivers/net/Makefile Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/drivers/net/Makefile Wed Feb 12 12:09:01 2003 @@ -29,6 +29,10 @@ obj-$(CONFIG_ISDN) += slhc.o endif +ifeq ($(CONFIG_E1000),y) + obj-y += e1000/e1000.o +endif + subdir-$(CONFIG_NET_PCMCIA) += pcmcia subdir-$(CONFIG_NET_WIRELESS) += wireless subdir-$(CONFIG_TULIP) += tulip @@ -40,6 +44,7 @@ subdir-$(CONFIG_DEV_APPLETALK) += appletalk subdir-$(CONFIG_SK98LIN) += sk98lin subdir-$(CONFIG_SKFP) += skfp +subdir-$(CONFIG_E1000) += e1000 # # link order important here diff -urP linux-2.4.19.orig/drivers/net/e1000/LICENSE linux-2.4.19/drivers/net/e1000/LICENSE --- linux-2.4.19.orig/drivers/net/e1000/LICENSE Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/LICENSE Wed Feb 12 12:09:01 2003 @@ -0,0 +1,339 @@ + +"This software program is licensed subject to the GNU General Public License +(GPL). Version 2, June 1991, available at +" + +GNU General Public License + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public License is intended +to guarantee your freedom to share and change free software--to make sure +the software is free for all its users. This General Public License applies +to most of the Free Software Foundation's software and to any other program +whose authors commit to using it. (Some other Free Software Foundation +software is covered by the GNU Library General Public License instead.) You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These +restrictions translate to certain responsibilities for you if you distribute +copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its +recipients to know that what they have is not the original, so that any +problems introduced by others will not reflect on the original authors' +reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will +individually obtain patent licenses, in effect making the program +proprietary. To prevent this, we have made it clear that any patent must be +licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice + placed by the copyright holder saying it may be distributed under the + terms of this General Public License. The "Program", below, refers to any + such program or work, and a "work based on the Program" means either the + Program or any derivative work under copyright law: that is to say, a + work containing the Program or a portion of it, either verbatim or with + modifications and/or translated into another language. (Hereinafter, + translation is included without limitation in the term "modification".) + Each licensee is addressed as "you". + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + the Program is not restricted, and the output from the Program is covered + only if its contents constitute a work based on the Program (independent + of having been made by running the Program). Whether that is true depends + on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code + as you receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice and + disclaimer of warranty; keep intact all the notices that refer to this + License and to the absence of any warranty; and give any other recipients + of the Program a copy of this License along with the Program. + + You may charge a fee for the physical act of transferring a copy, and you + may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, + thus forming a work based on the Program, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + * a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + * b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any part + thereof, to be licensed as a whole at no charge to all third parties + under the terms of this License. + + * c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive + use in the most ordinary way, to print or display an announcement + including an appropriate copyright notice and a notice that there is + no warranty (or else, saying that you provide a warranty) and that + users may redistribute the program under these conditions, and + telling the user how to view a copy of this License. (Exception: if + the Program itself is interactive but does not normally print such + an announcement, your work based on the Program is not required to + print an announcement.) + + These requirements apply to the modified work as a whole. If identifiable + sections of that work are not derived from the Program, and can be + reasonably considered independent and separate works in themselves, then + this License, and its terms, do not apply to those sections when you + distribute them as separate works. But when you distribute the same + sections as part of a whole which is a work based on the Program, the + distribution of the whole must be on the terms of this License, whose + permissions for other licensees extend to the entire whole, and thus to + each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Program. + + In addition, mere aggregation of another work not based on the Program + with the Program (or with a work based on the Program) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under + Section 2) in object code or executable form under the terms of Sections + 1 and 2 above provided that you also do one of the following: + + * a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 + above on a medium customarily used for software interchange; or, + + * b) Accompany it with a written offer, valid for at least three years, + to give any third party, for a charge no more than your cost of + physically performing source distribution, a complete machine- + readable copy of the corresponding source code, to be distributed + under the terms of Sections 1 and 2 above on a medium customarily + used for software interchange; or, + + * c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed + only for noncommercial distribution and only if you received the + program in object code or executable form with such an offer, in + accord with Subsection b above.) + + The source code for a work means the preferred form of the work for + making modifications to it. For an executable work, complete source code + means all the source code for all modules it contains, plus any + associated interface definition files, plus the scripts used to control + compilation and installation of the executable. However, as a special + exception, the source code distributed need not include anything that is + normally distributed (in either source or binary form) with the major + components (compiler, kernel, and so on) of the operating system on which + the executable runs, unless that component itself accompanies the + executable. + + If distribution of executable or object code is made by offering access + to copy from a designated place, then offering equivalent access to copy + the source code from the same place counts as distribution of the source + code, even though third parties are not compelled to copy the source + along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as + expressly provided under this License. Any attempt otherwise to copy, + modify, sublicense or distribute the Program is void, and will + automatically terminate your rights under this License. However, parties + who have received copies, or rights, from you under this License will not + have their licenses terminated so long as such parties remain in full + compliance. + +5. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Program or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Program (or any work based on the Program), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Program or works + based on it. + +6. Each time you redistribute the Program (or any work based on the + Program), the recipient automatically receives a license from the + original licensor to copy, distribute or modify the Program subject to + these terms and conditions. You may not impose any further restrictions + on the recipients' exercise of the rights granted herein. You are not + responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot distribute + so as to satisfy simultaneously your obligations under this License and + any other pertinent obligations, then as a consequence you may not + distribute the Program at all. For example, if a patent license would + not permit royalty-free redistribution of the Program by all those who + receive copies directly or indirectly through you, then the only way you + could satisfy both it and this License would be to refrain entirely from + distribution of the Program. + + If any portion of this section is held invalid or unenforceable under any + particular circumstance, the balance of the section is intended to apply + and the section as a whole is intended to apply in other circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system, which is implemented + by public license practices. Many people have made generous contributions + to the wide range of software distributed through that system in + reliance on consistent application of that system; it is up to the + author/donor to decide if he or she is willing to distribute software + through any other system and a licensee cannot impose that choice. + + This section is intended to make thoroughly clear what is believed to be + a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Program under this License may add an + explicit geographical distribution limitation excluding those countries, + so that distribution is permitted only in or among countries not thus + excluded. In such case, this License incorporates the limitation as if + written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of + the General Public License from time to time. Such new versions will be + similar in spirit to the present version, but may differ in detail to + address new problems or concerns. + + Each version is given a distinguishing version number. If the Program + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Program does not specify a version + number of this License, you may choose any version ever published by the + Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs + whose distribution conditions are different, write to the author to ask + for permission. For software which is copyrighted by the Free Software + Foundation, write to the Free Software Foundation; we sometimes make + exceptions for this. Our decision will be guided by the two goals of + preserving the free status of all derivatives of our free software and + of promoting the sharing and reuse of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH + YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it free +software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey the +exclusion of warranty; and each file should have at least the "copyright" +line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 +Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free +software, and you are welcome to redistribute it under certain conditions; +type 'show c' for details. + +The hypothetical commands 'show w' and 'show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than 'show w' and 'show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program +'Gnomovision' (which makes passes at compilers) written by James Hacker. + +signature of Ty Coon, 1 April 1989 +Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General Public +License instead of this License. diff -urP linux-2.4.19.orig/drivers/net/e1000/Makefile linux-2.4.19/drivers/net/e1000/Makefile --- linux-2.4.19.orig/drivers/net/e1000/Makefile Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/Makefile Wed Feb 12 12:09:01 2003 @@ -0,0 +1,39 @@ +################################################################################ +# +# +# Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# The full GNU General Public License is included in this distribution in the +# file called LICENSE. +# +# Contact Information: +# Linux NICS +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ + +# +# Makefile for the Intel(R) PRO/1000 ethernet driver +# + +O_TARGET := e1000.o + +obj-y := e1000_main.o e1000_hw.o e1000_ethtool.o e1000_param.o \ + e1000_proc.o +obj-m := $(O_TARGET) + +include $(TOPDIR)/Rules.make diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000.h linux-2.4.19/drivers/net/e1000/e1000.h --- linux-2.4.19.orig/drivers/net/e1000/e1000.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000.h Wed Feb 12 12:09:01 2003 @@ -0,0 +1,207 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +/* Linux PRO/1000 Ethernet Driver main header file */ + +#ifndef _E1000_H_ +#define _E1000_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BAR_0 0 +#define BAR_1 1 +#define BAR_5 5 +#define PCI_DMA_64BIT 0xffffffffffffffffULL +#define PCI_DMA_32BIT 0x00000000ffffffffULL + + +struct e1000_adapter; + +#include "e1000_hw.h" + +#if DBG +#define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args) +#else +#define E1000_DBG(args...) +#endif + +#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args) + +#define E1000_MAX_INTR 10 + +/* Supported Rx Buffer Sizes */ +#define E1000_RXBUFFER_2048 2048 +#define E1000_RXBUFFER_4096 4096 +#define E1000_RXBUFFER_8192 8192 +#define E1000_RXBUFFER_16384 16384 + +/* Flow Control High-Watermark: 43464 bytes */ +#define E1000_FC_HIGH_THRESH 0xA9C8 + +/* Flow Control Low-Watermark: 43456 bytes */ +#define E1000_FC_LOW_THRESH 0xA9C0 + +/* Flow Control Pause Time: 858 usec */ +#define E1000_FC_PAUSE_TIME 0x0680 + +/* How many Tx Descriptors do we need to call netif_wake_queue ? */ +#define E1000_TX_QUEUE_WAKE 16 +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define E1000_RX_BUFFER_WRITE 16 + +#define E1000_JUMBO_PBA 0x00000028 +#define E1000_DEFAULT_PBA 0x00000030 + +#define AUTO_ALL_MODES 0 + +/* only works for sizes that are powers of 2 */ +#define E1000_ROUNDUP(i, size) ((i) = (((i) + (size) - 1) & ~((size) - 1))) + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer */ +struct e1000_buffer { + struct sk_buff *skb; + uint64_t dma; + unsigned long length; + unsigned long time_stamp; +}; + +struct e1000_desc_ring { + /* pointer to the descriptor ring memory */ + void *desc; + /* physical address of the descriptor ring */ + dma_addr_t dma; + /* length of descriptor ring in bytes */ + unsigned int size; + /* number of descriptors in the ring */ + unsigned int count; + /* next descriptor to associate a buffer with */ + unsigned int next_to_use; + /* next descriptor to check for DD status bit */ + unsigned int next_to_clean; + /* array of buffer information structs */ + struct e1000_buffer *buffer_info; +}; + +#define E1000_DESC_UNUSED(R) \ +((((R)->next_to_clean + (R)->count) - ((R)->next_to_use + 1)) % ((R)->count)) + +#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i])) +#define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc) +#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc) +#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc) + +/* board specific private data structure */ + +struct e1000_adapter { + struct timer_list watchdog_timer; + struct timer_list phy_info_timer; +#ifdef CONFIG_PROC_FS + struct list_head proc_list_head; +#endif + struct vlan_group *vlgrp; + char *id_string; + uint32_t bd_number; + uint32_t rx_buffer_len; + uint32_t part_num; + uint32_t wol; + uint16_t link_speed; + uint16_t link_duplex; + spinlock_t stats_lock; + atomic_t irq_sem; + struct tq_struct tx_timeout_task; + + struct timer_list blink_timer; + unsigned long led_status; + + /* TX */ + struct e1000_desc_ring tx_ring; + uint32_t txd_cmd; + uint32_t tx_int_delay; + uint32_t tx_abs_int_delay; + int max_data_per_txd; + + /* RX */ + struct e1000_desc_ring rx_ring; + uint64_t hw_csum_err; + uint64_t hw_csum_good; + uint32_t rx_int_delay; + uint32_t rx_abs_int_delay; + boolean_t rx_csum; + + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + struct net_device_stats net_stats; + + /* structs defined in e1000_hw.h */ + struct e1000_hw hw; + struct e1000_hw_stats stats; + struct e1000_phy_info phy_info; + struct e1000_phy_stats phy_stats; + + + + uint32_t pci_state[16]; + char ifname[IFNAMSIZ]; +}; +#endif /* _E1000_H_ */ diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_ethtool.c linux-2.4.19/drivers/net/e1000/e1000_ethtool.c --- linux-2.4.19.orig/drivers/net/e1000/e1000_ethtool.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_ethtool.c Wed Feb 12 12:09:01 2003 @@ -0,0 +1,538 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool support for e1000 */ + +#include "e1000.h" + +#include + +extern char e1000_driver_name[]; +extern char e1000_driver_version[]; + +extern int e1000_up(struct e1000_adapter *adapter); +extern void e1000_down(struct e1000_adapter *adapter); +extern void e1000_reset(struct e1000_adapter *adapter); + +static void +e1000_ethtool_gset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd) +{ + struct e1000_hw *hw = &adapter->hw; + + if(hw->media_type == e1000_media_type_copper) { + + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Full| + SUPPORTED_Autoneg | + SUPPORTED_TP); + + ecmd->advertising = ADVERTISED_TP; + + if(hw->autoneg == 1) { + ecmd->advertising |= ADVERTISED_Autoneg; + + /* the e1000 autoneg seems to match ethtool nicely */ + + ecmd->advertising |= hw->autoneg_advertised; + } + + ecmd->port = PORT_TP; + ecmd->phy_address = hw->phy_addr; + + if(hw->mac_type == e1000_82543) + ecmd->transceiver = XCVR_EXTERNAL; + else + ecmd->transceiver = XCVR_INTERNAL; + + } else { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg); + + ecmd->advertising = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg); + + ecmd->port = PORT_FIBRE; + + if(hw->mac_type >= e1000_82545) + ecmd->transceiver = XCVR_INTERNAL; + else + ecmd->transceiver = XCVR_EXTERNAL; + } + + if(netif_carrier_ok(adapter->netdev)) { + + e1000_get_speed_and_duplex(hw, &adapter->link_speed, + &adapter->link_duplex); + ecmd->speed = adapter->link_speed; + + /* unfortunatly FULL_DUPLEX != DUPLEX_FULL + * and HALF_DUPLEX != DUPLEX_HALF */ + + if(adapter->link_duplex == FULL_DUPLEX) + ecmd->duplex = DUPLEX_FULL; + else + ecmd->duplex = DUPLEX_HALF; + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + ecmd->autoneg = (hw->autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); +} + +static int +e1000_ethtool_sset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd) +{ + struct e1000_hw *hw = &adapter->hw; + + if(ecmd->autoneg == AUTONEG_ENABLE) { + hw->autoneg = 1; + hw->autoneg_advertised = 0x002F; + ecmd->advertising = 0x002F; + } else { + hw->autoneg = 0; + switch(ecmd->speed + ecmd->duplex) { + case SPEED_10 + DUPLEX_HALF: + hw->forced_speed_duplex = e1000_10_half; + break; + case SPEED_10 + DUPLEX_FULL: + hw->forced_speed_duplex = e1000_10_full; + break; + case SPEED_100 + DUPLEX_HALF: + hw->forced_speed_duplex = e1000_100_half; + break; + case SPEED_100 + DUPLEX_FULL: + hw->forced_speed_duplex = e1000_100_full; + break; + case SPEED_1000 + DUPLEX_FULL: + hw->autoneg = 1; + hw->autoneg_advertised = ADVERTISE_1000_FULL; + break; + case SPEED_1000 + DUPLEX_HALF: /* not supported */ + default: + return -EINVAL; + } + } + + /* reset the link */ + + if(netif_running(adapter->netdev)) { + e1000_down(adapter); + e1000_up(adapter); + } else + e1000_reset(adapter); + + return 0; +} + +static inline int +e1000_eeprom_size(struct e1000_hw *hw) +{ + if((hw->mac_type > e1000_82544) && + (E1000_READ_REG(hw, EECD) & E1000_EECD_SIZE)) + return 512; + else + return 128; +} + +static void +e1000_ethtool_gdrvinfo(struct e1000_adapter *adapter, + struct ethtool_drvinfo *drvinfo) +{ + strncpy(drvinfo->driver, e1000_driver_name, 32); + strncpy(drvinfo->version, e1000_driver_version, 32); + strncpy(drvinfo->fw_version, "N/A", 32); + strncpy(drvinfo->bus_info, adapter->pdev->slot_name, 32); +#define E1000_REGS_LEN 32 + drvinfo->regdump_len = E1000_REGS_LEN * sizeof(uint32_t); + drvinfo->eedump_len = e1000_eeprom_size(&adapter->hw); +} + +static void +e1000_ethtool_gregs(struct e1000_adapter *adapter, + struct ethtool_regs *regs, uint32_t *regs_buff) +{ + struct e1000_hw *hw = &adapter->hw; + + regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id; + + regs_buff[0] = E1000_READ_REG(hw, CTRL); + regs_buff[1] = E1000_READ_REG(hw, STATUS); + + regs_buff[2] = E1000_READ_REG(hw, RCTL); + regs_buff[3] = E1000_READ_REG(hw, RDLEN); + regs_buff[4] = E1000_READ_REG(hw, RDH); + regs_buff[5] = E1000_READ_REG(hw, RDT); + regs_buff[6] = E1000_READ_REG(hw, RDTR); + + regs_buff[7] = E1000_READ_REG(hw, TCTL); + regs_buff[8] = E1000_READ_REG(hw, TDLEN); + regs_buff[9] = E1000_READ_REG(hw, TDH); + regs_buff[10] = E1000_READ_REG(hw, TDT); + regs_buff[11] = E1000_READ_REG(hw, TIDV); + + return; +} + +static int +e1000_ethtool_geeprom(struct e1000_adapter *adapter, + struct ethtool_eeprom *eeprom, uint16_t *eeprom_buff) +{ + struct e1000_hw *hw = &adapter->hw; + int i, max_len, first_word, last_word; + + if(eeprom->len == 0) + return -EINVAL; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + max_len = e1000_eeprom_size(hw); + + if(eeprom->offset > eeprom->offset + eeprom->len) + return -EINVAL; + + if((eeprom->offset + eeprom->len) > max_len) + eeprom->len = (max_len - eeprom->offset); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + + for(i = 0; i <= (last_word - first_word); i++) + e1000_read_eeprom(hw, first_word + i, &eeprom_buff[i]); + + return 0; +} + +static int +e1000_ethtool_seeprom(struct e1000_adapter *adapter, + struct ethtool_eeprom *eeprom, void *user_data) +{ + struct e1000_hw *hw = &adapter->hw; + uint16_t eeprom_buff[256]; + int i, max_len, first_word, last_word; + void *ptr; + + if(eeprom->len == 0) + return -EOPNOTSUPP; + + if(eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) + return -EFAULT; + + max_len = e1000_eeprom_size(hw); + + if((eeprom->offset + eeprom->len) > max_len) + eeprom->len = (max_len - eeprom->offset); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + ptr = (void *)eeprom_buff; + + if(eeprom->offset & 1) { + /* need read/modify/write of first changed EEPROM word */ + /* only the second byte of the word is being modified */ + e1000_read_eeprom(hw, first_word, &eeprom_buff[0]); + ptr++; + } + if((eeprom->offset + eeprom->len) & 1) { + /* need read/modify/write of last changed EEPROM word */ + /* only the first byte of the word is being modified */ + e1000_read_eeprom(hw, last_word, + &eeprom_buff[last_word - first_word]); + } + if(copy_from_user(ptr, user_data, eeprom->len)) + return -EFAULT; + + for(i = 0; i <= (last_word - first_word); i++) + e1000_write_eeprom(hw, first_word + i, eeprom_buff[i]); + + /* Update the checksum over the first part of the EEPROM if needed */ + if(first_word <= EEPROM_CHECKSUM_REG) + e1000_update_eeprom_checksum(hw); + + return 0; +} + +static void +e1000_ethtool_gwol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol) +{ + struct e1000_hw *hw = &adapter->hw; + + switch(adapter->hw.device_id) { + case E1000_DEV_ID_82542: + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + wol->supported = 0; + wol->wolopts = 0; + return; + + case E1000_DEV_ID_82546EB_FIBER: + /* Wake events only supported on port A for dual fiber */ + if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) { + wol->supported = 0; + wol->wolopts = 0; + return; + } + /* Fall Through */ + + default: + wol->supported = WAKE_PHY | WAKE_UCAST | + WAKE_MCAST | WAKE_BCAST | WAKE_MAGIC; + + wol->wolopts = 0; + if(adapter->wol & E1000_WUFC_LNKC) + wol->wolopts |= WAKE_PHY; + if(adapter->wol & E1000_WUFC_EX) + wol->wolopts |= WAKE_UCAST; + if(adapter->wol & E1000_WUFC_MC) + wol->wolopts |= WAKE_MCAST; + if(adapter->wol & E1000_WUFC_BC) + wol->wolopts |= WAKE_BCAST; + if(adapter->wol & E1000_WUFC_MAG) + wol->wolopts |= WAKE_MAGIC; + return; + } +} + +static int +e1000_ethtool_swol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol) +{ + struct e1000_hw *hw = &adapter->hw; + + switch(adapter->hw.device_id) { + case E1000_DEV_ID_82542: + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + return wol->wolopts ? -EOPNOTSUPP : 0; + + case E1000_DEV_ID_82546EB_FIBER: + /* Wake events only supported on port A for dual fiber */ + if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) + return wol->wolopts ? -EOPNOTSUPP : 0; + /* Fall Through */ + + default: + if(wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE)) + return -EOPNOTSUPP; + + adapter->wol = 0; + + if(wol->wolopts & WAKE_PHY) + adapter->wol |= E1000_WUFC_LNKC; + if(wol->wolopts & WAKE_UCAST) + adapter->wol |= E1000_WUFC_EX; + if(wol->wolopts & WAKE_MCAST) + adapter->wol |= E1000_WUFC_MC; + if(wol->wolopts & WAKE_BCAST) + adapter->wol |= E1000_WUFC_BC; + if(wol->wolopts & WAKE_MAGIC) + adapter->wol |= E1000_WUFC_MAG; + } + + return 0; +} + + +/* toggle LED 4 times per second = 2 "blinks" per second */ +#define E1000_ID_INTERVAL (HZ/4) + +/* bit defines for adapter->led_status */ +#define E1000_LED_ON 0 + +static void +e1000_led_blink_callback(unsigned long data) +{ + struct e1000_adapter *adapter = (struct e1000_adapter *) data; + + if(test_and_change_bit(E1000_LED_ON, &adapter->led_status)) + e1000_led_off(&adapter->hw); + else + e1000_led_on(&adapter->hw); + + mod_timer(&adapter->blink_timer, jiffies + E1000_ID_INTERVAL); +} + +static int +e1000_ethtool_led_blink(struct e1000_adapter *adapter, struct ethtool_value *id) +{ + if(!adapter->blink_timer.function) { + init_timer(&adapter->blink_timer); + adapter->blink_timer.function = e1000_led_blink_callback; + adapter->blink_timer.data = (unsigned long) adapter; + } + + e1000_setup_led(&adapter->hw); + mod_timer(&adapter->blink_timer, jiffies); + + set_current_state(TASK_INTERRUPTIBLE); + if(id->data) + schedule_timeout(id->data * HZ); + else + schedule_timeout(MAX_SCHEDULE_TIMEOUT); + + del_timer_sync(&adapter->blink_timer); + e1000_led_off(&adapter->hw); + clear_bit(E1000_LED_ON, &adapter->led_status); + e1000_cleanup_led(&adapter->hw); + + return 0; +} + +int +e1000_ethtool_ioctl(struct net_device *netdev, struct ifreq *ifr) +{ + struct e1000_adapter *adapter = netdev->priv; + void *addr = ifr->ifr_data; + uint32_t cmd; + + if(get_user(cmd, (uint32_t *) addr)) + return -EFAULT; + + switch(cmd) { + case ETHTOOL_GSET: { + struct ethtool_cmd ecmd = {ETHTOOL_GSET}; + e1000_ethtool_gset(adapter, &ecmd); + if(copy_to_user(addr, &ecmd, sizeof(ecmd))) + return -EFAULT; + return 0; + } + case ETHTOOL_SSET: { + struct ethtool_cmd ecmd; + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + if(copy_from_user(&ecmd, addr, sizeof(ecmd))) + return -EFAULT; + return e1000_ethtool_sset(adapter, &ecmd); + } + case ETHTOOL_GDRVINFO: { + struct ethtool_drvinfo drvinfo = {ETHTOOL_GDRVINFO}; + e1000_ethtool_gdrvinfo(adapter, &drvinfo); + if(copy_to_user(addr, &drvinfo, sizeof(drvinfo))) + return -EFAULT; + return 0; + } + case ETHTOOL_GREGS: { + struct ethtool_regs regs = {ETHTOOL_GREGS}; + uint32_t regs_buff[E1000_REGS_LEN]; + + if(copy_from_user(®s, addr, sizeof(regs))) + return -EFAULT; + e1000_ethtool_gregs(adapter, ®s, regs_buff); + if(copy_to_user(addr, ®s, sizeof(regs))) + return -EFAULT; + + addr += offsetof(struct ethtool_regs, data); + if(copy_to_user(addr, regs_buff, regs.len)) + return -EFAULT; + + return 0; + } + case ETHTOOL_NWAY_RST: { + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + if(netif_running(netdev)) { + e1000_down(adapter); + e1000_up(adapter); + } + return 0; + } + case ETHTOOL_PHYS_ID: { + struct ethtool_value id; + if(copy_from_user(&id, addr, sizeof(id))) + return -EFAULT; + return e1000_ethtool_led_blink(adapter, &id); + } + case ETHTOOL_GLINK: { + struct ethtool_value link = {ETHTOOL_GLINK}; + link.data = netif_carrier_ok(netdev); + if(copy_to_user(addr, &link, sizeof(link))) + return -EFAULT; + return 0; + } + case ETHTOOL_GWOL: { + struct ethtool_wolinfo wol = {ETHTOOL_GWOL}; + e1000_ethtool_gwol(adapter, &wol); + if(copy_to_user(addr, &wol, sizeof(wol)) != 0) + return -EFAULT; + return 0; + } + case ETHTOOL_SWOL: { + struct ethtool_wolinfo wol; + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + if(copy_from_user(&wol, addr, sizeof(wol)) != 0) + return -EFAULT; + return e1000_ethtool_swol(adapter, &wol); + } + case ETHTOOL_GEEPROM: { + struct ethtool_eeprom eeprom = {ETHTOOL_GEEPROM}; + uint16_t eeprom_buff[256]; + void *ptr; + int err; + + if(copy_from_user(&eeprom, addr, sizeof(eeprom))) + return -EFAULT; + + if((err = e1000_ethtool_geeprom(adapter, + &eeprom, eeprom_buff))) + return err; + + if(copy_to_user(addr, &eeprom, sizeof(eeprom))) + return -EFAULT; + + addr += offsetof(struct ethtool_eeprom, data); + ptr = ((void *)eeprom_buff) + (eeprom.offset & 1); + + if(copy_to_user(addr, ptr, eeprom.len)) + return -EFAULT; + return 0; + } + case ETHTOOL_SEEPROM: { + struct ethtool_eeprom eeprom; + + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + + if(copy_from_user(&eeprom, addr, sizeof(eeprom))) + return -EFAULT; + + addr += offsetof(struct ethtool_eeprom, data); + return e1000_ethtool_seeprom(adapter, &eeprom, addr); + } + default: + return -EOPNOTSUPP; + } +} + + diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_hw.c linux-2.4.19/drivers/net/e1000/e1000_hw.c --- linux-2.4.19.orig/drivers/net/e1000/e1000_hw.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_hw.c Wed Feb 12 12:09:01 2003 @@ -0,0 +1,3610 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* e1000_hw.c + * Shared functions for accessing and configuring the MAC + */ + +#include "e1000_hw.h" + +static int32_t e1000_setup_fiber_link(struct e1000_hw *hw); +static int32_t e1000_setup_copper_link(struct e1000_hw *hw); +static int32_t e1000_phy_force_speed_duplex(struct e1000_hw *hw); +static int32_t e1000_config_mac_to_phy(struct e1000_hw *hw); +static int32_t e1000_force_mac_fc(struct e1000_hw *hw); +static void e1000_raise_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl); +static void e1000_lower_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl); +static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, uint32_t data, uint16_t count); +static uint16_t e1000_shift_in_mdi_bits(struct e1000_hw *hw); +static int32_t e1000_phy_reset_dsp(struct e1000_hw *hw); +static void e1000_raise_ee_clk(struct e1000_hw *hw, uint32_t *eecd); +static void e1000_lower_ee_clk(struct e1000_hw *hw, uint32_t *eecd); +static void e1000_shift_out_ee_bits(struct e1000_hw *hw, uint16_t data, uint16_t count); +static uint16_t e1000_shift_in_ee_bits(struct e1000_hw *hw); +static void e1000_setup_eeprom(struct e1000_hw *hw); +static void e1000_clock_eeprom(struct e1000_hw *hw); +static void e1000_cleanup_eeprom(struct e1000_hw *hw); +static void e1000_standby_eeprom(struct e1000_hw *hw); +static int32_t e1000_id_led_init(struct e1000_hw * hw); + +/****************************************************************************** + * Set the mac type member in the hw struct. + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +int32_t +e1000_set_mac_type(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_set_mac_type"); + + switch (hw->device_id) { + case E1000_DEV_ID_82542: + switch (hw->revision_id) { + case E1000_82542_2_0_REV_ID: + hw->mac_type = e1000_82542_rev2_0; + break; + case E1000_82542_2_1_REV_ID: + hw->mac_type = e1000_82542_rev2_1; + break; + default: + /* Invalid 82542 revision ID */ + return -E1000_ERR_MAC_TYPE; + } + break; + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + hw->mac_type = e1000_82543; + break; + case E1000_DEV_ID_82544EI_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + case E1000_DEV_ID_82544GC_COPPER: + case E1000_DEV_ID_82544GC_LOM: + hw->mac_type = e1000_82544; + break; + case E1000_DEV_ID_82540EM: + case E1000_DEV_ID_82540EM_LOM: + case E1000_DEV_ID_82540EP: + case E1000_DEV_ID_82540EP_LOM: + case E1000_DEV_ID_82540EP_LP: + hw->mac_type = e1000_82540; + break; + case E1000_DEV_ID_82545EM_COPPER: + case E1000_DEV_ID_82545EM_FIBER: + hw->mac_type = e1000_82545; + break; + case E1000_DEV_ID_82546EB_COPPER: + case E1000_DEV_ID_82546EB_FIBER: + hw->mac_type = e1000_82546; + break; + default: + /* Should never have loaded on this device */ + return -E1000_ERR_MAC_TYPE; + } + return E1000_SUCCESS; +} +/****************************************************************************** + * Reset the transmit and receive units; mask and clear all interrupts. + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +void +e1000_reset_hw(struct e1000_hw *hw) +{ + uint32_t ctrl; + uint32_t ctrl_ext; + uint32_t icr; + uint32_t manc; + + DEBUGFUNC("e1000_reset_hw"); + + /* For 82542 (rev 2.0), disable MWI before issuing a device reset */ + if(hw->mac_type == e1000_82542_rev2_0) { + DEBUGOUT("Disabling MWI on 82542 rev 2.0\n"); + e1000_pci_clear_mwi(hw); + } + + /* Clear interrupt mask to stop board from generating interrupts */ + DEBUGOUT("Masking off all interrupts\n"); + E1000_WRITE_REG(hw, IMC, 0xffffffff); + + /* Disable the Transmit and Receive units. Then delay to allow + * any pending transactions to complete before we hit the MAC with + * the global reset. + */ + E1000_WRITE_REG(hw, RCTL, 0); + E1000_WRITE_REG(hw, TCTL, E1000_TCTL_PSP); + E1000_WRITE_FLUSH(hw); + + /* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */ + hw->tbi_compatibility_on = FALSE; + + /* Delay to allow any outstanding PCI transactions to complete before + * resetting the device + */ + msec_delay(10); + + /* Issue a global reset to the MAC. This will reset the chip's + * transmit, receive, DMA, and link units. It will not effect + * the current PCI configuration. The global reset bit is self- + * clearing, and should clear within a microsecond. + */ + DEBUGOUT("Issuing a global reset to MAC\n"); + ctrl = E1000_READ_REG(hw, CTRL); + + if(hw->mac_type > e1000_82543) + E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST)); + else + E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST)); + + /* Force a reload from the EEPROM if necessary */ + if(hw->mac_type < e1000_82540) { + /* Wait for reset to complete */ + udelay(10); + ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_EE_RST; + E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); + /* Wait for EEPROM reload */ + msec_delay(2); + } else { + /* Wait for EEPROM reload (it happens automatically) */ + msec_delay(4); + /* Dissable HW ARPs on ASF enabled adapters */ + manc = E1000_READ_REG(hw, MANC); + manc &= ~(E1000_MANC_ARP_EN); + E1000_WRITE_REG(hw, MANC, manc); + } + + /* Clear interrupt mask to stop board from generating interrupts */ + DEBUGOUT("Masking off all interrupts\n"); + E1000_WRITE_REG(hw, IMC, 0xffffffff); + + /* Clear any pending interrupt events. */ + icr = E1000_READ_REG(hw, ICR); + + /* If MWI was previously enabled, reenable it. */ + if(hw->mac_type == e1000_82542_rev2_0) { + if(hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE) + e1000_pci_set_mwi(hw); + } +} + +/****************************************************************************** + * Performs basic configuration of the adapter. + * + * hw - Struct containing variables accessed by shared code + * + * Assumes that the controller has previously been reset and is in a + * post-reset uninitialized state. Initializes the receive address registers, + * multicast table, and VLAN filter table. Calls routines to setup link + * configuration and flow control settings. Clears all on-chip counters. Leaves + * the transmit and receive units disabled and uninitialized. + *****************************************************************************/ +int32_t +e1000_init_hw(struct e1000_hw *hw) +{ + uint32_t ctrl, status; + uint32_t i; + int32_t ret_val; + uint16_t pcix_cmd_word; + uint16_t pcix_stat_hi_word; + uint16_t cmd_mmrbc; + uint16_t stat_mmrbc; + + DEBUGFUNC("e1000_init_hw"); + + /* Initialize Identification LED */ + ret_val = e1000_id_led_init(hw); + if(ret_val < 0) { + DEBUGOUT("Error Initializing Identification LED\n"); + return ret_val; + } + + /* Set the Media Type and exit with error if it is not valid. */ + if(hw->mac_type != e1000_82543) { + /* tbi_compatibility is only valid on 82543 */ + hw->tbi_compatibility_en = FALSE; + } + + if(hw->mac_type >= e1000_82543) { + status = E1000_READ_REG(hw, STATUS); + if(status & E1000_STATUS_TBIMODE) { + hw->media_type = e1000_media_type_fiber; + /* tbi_compatibility not valid on fiber */ + hw->tbi_compatibility_en = FALSE; + } else { + hw->media_type = e1000_media_type_copper; + } + } else { + /* This is an 82542 (fiber only) */ + hw->media_type = e1000_media_type_fiber; + } + + /* Disabling VLAN filtering. */ + DEBUGOUT("Initializing the IEEE VLAN\n"); + E1000_WRITE_REG(hw, VET, 0); + + e1000_clear_vfta(hw); + + /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */ + if(hw->mac_type == e1000_82542_rev2_0) { + DEBUGOUT("Disabling MWI on 82542 rev 2.0\n"); + e1000_pci_clear_mwi(hw); + E1000_WRITE_REG(hw, RCTL, E1000_RCTL_RST); + E1000_WRITE_FLUSH(hw); + msec_delay(5); + } + + /* Setup the receive address. This involves initializing all of the Receive + * Address Registers (RARs 0 - 15). + */ + e1000_init_rx_addrs(hw); + + /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */ + if(hw->mac_type == e1000_82542_rev2_0) { + E1000_WRITE_REG(hw, RCTL, 0); + E1000_WRITE_FLUSH(hw); + msec_delay(1); + if(hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE) + e1000_pci_set_mwi(hw); + } + + /* Zero out the Multicast HASH table */ + DEBUGOUT("Zeroing the MTA\n"); + for(i = 0; i < E1000_MC_TBL_SIZE; i++) + E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); + + /* Set the PCI priority bit correctly in the CTRL register. This + * determines if the adapter gives priority to receives, or if it + * gives equal priority to transmits and receives. + */ + if(hw->dma_fairness) { + ctrl = E1000_READ_REG(hw, CTRL); + E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PRIOR); + } + + /* Workaround for PCI-X problem when BIOS sets MMRBC incorrectly. */ + if(hw->bus_type == e1000_bus_type_pcix) { + e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word); + e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word); + cmd_mmrbc = (pcix_cmd_word & PCIX_COMMAND_MMRBC_MASK) >> + PCIX_COMMAND_MMRBC_SHIFT; + stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >> + PCIX_STATUS_HI_MMRBC_SHIFT; + if(stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K) + stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K; + if(cmd_mmrbc > stat_mmrbc) { + pcix_cmd_word &= ~PCIX_COMMAND_MMRBC_MASK; + pcix_cmd_word |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT; + e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word); + } + } + + /* Call a subroutine to configure the link and setup flow control. */ + ret_val = e1000_setup_link(hw); + + /* Set the transmit descriptor write-back policy */ + if(hw->mac_type > e1000_82544) { + ctrl = E1000_READ_REG(hw, TXDCTL); + ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; + E1000_WRITE_REG(hw, TXDCTL, ctrl); + } + + /* Clear all of the statistics registers (clear on read). It is + * important that we do this after we have tried to establish link + * because the symbol error count will increment wildly if there + * is no link. + */ + e1000_clear_hw_cntrs(hw); + + return ret_val; +} + +/****************************************************************************** + * Configures flow control and link settings. + * + * hw - Struct containing variables accessed by shared code + * + * Determines which flow control settings to use. Calls the apropriate media- + * specific link configuration function. Configures the flow control settings. + * Assuming the adapter has a valid link partner, a valid link should be + * established. Assumes the hardware has previously been reset and the + * transmitter and receiver are not enabled. + *****************************************************************************/ +int32_t +e1000_setup_link(struct e1000_hw *hw) +{ + uint32_t ctrl_ext; + int32_t ret_val; + uint16_t eeprom_data; + + DEBUGFUNC("e1000_setup_link"); + + /* Read and store word 0x0F of the EEPROM. This word contains bits + * that determine the hardware's default PAUSE (flow control) mode, + * a bit that determines whether the HW defaults to enabling or + * disabling auto-negotiation, and the direction of the + * SW defined pins. If there is no SW over-ride of the flow + * control setting, then the variable hw->fc will + * be initialized based on a value in the EEPROM. + */ + if(e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + + if(hw->fc == e1000_fc_default) { + if((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0) + hw->fc = e1000_fc_none; + else if((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == + EEPROM_WORD0F_ASM_DIR) + hw->fc = e1000_fc_tx_pause; + else + hw->fc = e1000_fc_full; + } + + /* We want to save off the original Flow Control configuration just + * in case we get disconnected and then reconnected into a different + * hub or switch with different Flow Control capabilities. + */ + if(hw->mac_type == e1000_82542_rev2_0) + hw->fc &= (~e1000_fc_tx_pause); + + if((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1)) + hw->fc &= (~e1000_fc_rx_pause); + + hw->original_fc = hw->fc; + + DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc); + + /* Take the 4 bits from EEPROM word 0x0F that determine the initial + * polarity value for the SW controlled pins, and setup the + * Extended Device Control reg with that info. + * This is needed because one of the SW controlled pins is used for + * signal detection. So this should be done before e1000_setup_pcs_link() + * or e1000_phy_setup() is called. + */ + if(hw->mac_type == e1000_82543) { + ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) << + SWDPIO__EXT_SHIFT); + E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); + } + + /* Call the necessary subroutine to configure the link. */ + ret_val = (hw->media_type == e1000_media_type_fiber) ? + e1000_setup_fiber_link(hw) : + e1000_setup_copper_link(hw); + + /* Initialize the flow control address, type, and PAUSE timer + * registers to their default values. This is done even if flow + * control is disabled, because it does not hurt anything to + * initialize these registers. + */ + DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); + + E1000_WRITE_REG(hw, FCAL, FLOW_CONTROL_ADDRESS_LOW); + E1000_WRITE_REG(hw, FCAH, FLOW_CONTROL_ADDRESS_HIGH); + E1000_WRITE_REG(hw, FCT, FLOW_CONTROL_TYPE); + E1000_WRITE_REG(hw, FCTTV, hw->fc_pause_time); + + /* Set the flow control receive threshold registers. Normally, + * these registers will be set to a default threshold that may be + * adjusted later by the driver's runtime code. However, if the + * ability to transmit pause frames in not enabled, then these + * registers will be set to 0. + */ + if(!(hw->fc & e1000_fc_tx_pause)) { + E1000_WRITE_REG(hw, FCRTL, 0); + E1000_WRITE_REG(hw, FCRTH, 0); + } else { + /* We need to set up the Receive Threshold high and low water marks + * as well as (optionally) enabling the transmission of XON frames. + */ + if(hw->fc_send_xon) { + E1000_WRITE_REG(hw, FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE)); + E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water); + } else { + E1000_WRITE_REG(hw, FCRTL, hw->fc_low_water); + E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water); + } + } + return ret_val; +} + +/****************************************************************************** + * Sets up link for a fiber based adapter + * + * hw - Struct containing variables accessed by shared code + * + * Manipulates Physical Coding Sublayer functions in order to configure + * link. Assumes the hardware has been previously reset and the transmitter + * and receiver are not enabled. + *****************************************************************************/ +static int32_t +e1000_setup_fiber_link(struct e1000_hw *hw) +{ + uint32_t ctrl; + uint32_t status; + uint32_t txcw = 0; + uint32_t i; + uint32_t signal; + int32_t ret_val; + + DEBUGFUNC("e1000_setup_fiber_link"); + + /* On adapters with a MAC newer that 82544, SW Defineable pin 1 will be + * set when the optics detect a signal. On older adapters, it will be + * cleared when there is a signal + */ + ctrl = E1000_READ_REG(hw, CTRL); + if(hw->mac_type > e1000_82544) signal = E1000_CTRL_SWDPIN1; + else signal = 0; + + /* Take the link out of reset */ + ctrl &= ~(E1000_CTRL_LRST); + + e1000_config_collision_dist(hw); + + /* Check for a software override of the flow control settings, and setup + * the device accordingly. If auto-negotiation is enabled, then software + * will have to set the "PAUSE" bits to the correct value in the Tranmsit + * Config Word Register (TXCW) and re-start auto-negotiation. However, if + * auto-negotiation is disabled, then software will have to manually + * configure the two flow control enable bits in the CTRL register. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, but + * not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but we do + * not support receiving pause frames). + * 3: Both Rx and TX flow control (symmetric) are enabled. + */ + switch (hw->fc) { + case e1000_fc_none: + /* Flow control is completely disabled by a software over-ride. */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); + break; + case e1000_fc_rx_pause: + /* RX Flow control is enabled and TX Flow control is disabled by a + * software over-ride. Since there really isn't a way to advertise + * that we are capable of RX Pause ONLY, we will advertise that we + * support both symmetric and asymmetric RX PAUSE. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); + break; + case e1000_fc_tx_pause: + /* TX Flow control is enabled, and RX Flow control is disabled, by a + * software over-ride. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); + break; + case e1000_fc_full: + /* Flow control (both RX and TX) is enabled by a software over-ride. */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + break; + } + + /* Since auto-negotiation is enabled, take the link out of reset (the link + * will be in reset, because we previously reset the chip). This will + * restart auto-negotiation. If auto-neogtiation is successful then the + * link-up status bit will be set and the flow control enable bits (RFCE + * and TFCE) will be set according to their negotiated value. + */ + DEBUGOUT("Auto-negotiation enabled\n"); + + E1000_WRITE_REG(hw, TXCW, txcw); + E1000_WRITE_REG(hw, CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + hw->txcw = txcw; + msec_delay(1); + + /* If we have a signal (the cable is plugged in) then poll for a "Link-Up" + * indication in the Device Status Register. Time-out if a link isn't + * seen in 500 milliseconds seconds (Auto-negotiation should complete in + * less than 500 milliseconds even if the other end is doing it in SW). + */ + if((E1000_READ_REG(hw, CTRL) & E1000_CTRL_SWDPIN1) == signal) { + DEBUGOUT("Looking for Link\n"); + for(i = 0; i < (LINK_UP_TIMEOUT / 10); i++) { + msec_delay(10); + status = E1000_READ_REG(hw, STATUS); + if(status & E1000_STATUS_LU) break; + } + if(i == (LINK_UP_TIMEOUT / 10)) { + /* AutoNeg failed to achieve a link, so we'll call + * e1000_check_for_link. This routine will force the link up if we + * detect a signal. This will allow us to communicate with + * non-autonegotiating link partners. + */ + DEBUGOUT("Never got a valid link from auto-neg!!!\n"); + hw->autoneg_failed = 1; + ret_val = e1000_check_for_link(hw); + if(ret_val < 0) { + DEBUGOUT("Error while checking for link\n"); + return ret_val; + } + hw->autoneg_failed = 0; + } else { + hw->autoneg_failed = 0; + DEBUGOUT("Valid Link Found\n"); + } + } else { + DEBUGOUT("No Signal Detected\n"); + } + return 0; +} + +/****************************************************************************** +* Detects which PHY is present and the speed and duplex +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +static int32_t +e1000_setup_copper_link(struct e1000_hw *hw) +{ + uint32_t ctrl; + int32_t ret_val; + uint16_t i; + uint16_t phy_data; + + DEBUGFUNC("e1000_setup_copper_link"); + + ctrl = E1000_READ_REG(hw, CTRL); + /* With 82543, we need to force speed and duplex on the MAC equal to what + * the PHY speed and duplex configuration is. In addition, we need to + * perform a hardware reset on the PHY to take it out of reset. + */ + if(hw->mac_type > e1000_82543) { + ctrl |= E1000_CTRL_SLU; + ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + E1000_WRITE_REG(hw, CTRL, ctrl); + } else { + ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU); + E1000_WRITE_REG(hw, CTRL, ctrl); + e1000_phy_hw_reset(hw); + } + + /* Make sure we have a valid PHY */ + ret_val = e1000_detect_gig_phy(hw); + if(ret_val < 0) { + DEBUGOUT("Error, did not detect valid phy.\n"); + return ret_val; + } + DEBUGOUT1("Phy ID = %x \n", hw->phy_id); + + /* Enable CRS on TX. This must be set for half-duplex operation. */ + if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + + /* Options: + * MDI/MDI-X = 0 (default) + * 0 - Auto for all speeds + * 1 - MDI mode + * 2 - MDI-X mode + * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) + */ + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + + switch (hw->mdix) { + case 1: + phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; + break; + case 2: + phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; + break; + case 3: + phy_data |= M88E1000_PSCR_AUTO_X_1000T; + break; + case 0: + default: + phy_data |= M88E1000_PSCR_AUTO_X_MODE; + break; + } + + /* Options: + * disable_polarity_correction = 0 (default) + * Automatic Correction for Reversed Cable Polarity + * 0 - Disabled + * 1 - Enabled + */ + phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; + if(hw->disable_polarity_correction == 1) + phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; + if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + + /* Force TX_CLK in the Extended PHY Specific Control Register + * to 25MHz clock. + */ + if(e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + phy_data |= M88E1000_EPSCR_TX_CLK_25; + + if (hw->phy_revision < M88E1011_I_REV_4) { + /* Configure Master and Slave downshift values */ + phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | + M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); + phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | + M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); + if(e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + } + + /* SW Reset the PHY so all changes take effect */ + ret_val = e1000_phy_reset(hw); + if(ret_val < 0) { + DEBUGOUT("Error Resetting the PHY\n"); + return ret_val; + } + + /* Options: + * autoneg = 1 (default) + * PHY will advertise value(s) parsed from + * autoneg_advertised and fc + * autoneg = 0 + * PHY will be set to 10H, 10F, 100H, or 100F + * depending on value parsed from forced_speed_duplex. + */ + + /* Is autoneg enabled? This is enabled by default or by software override. + * If so, call e1000_phy_setup_autoneg routine to parse the + * autoneg_advertised and fc options. If autoneg is NOT enabled, then the + * user should have provided a speed/duplex override. If so, then call + * e1000_phy_force_speed_duplex to parse and set this up. + */ + if(hw->autoneg) { + /* Perform some bounds checking on the hw->autoneg_advertised + * parameter. If this variable is zero, then set it to the default. + */ + hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT; + + /* If autoneg_advertised is zero, we assume it was not defaulted + * by the calling code so we set to advertise full capability. + */ + if(hw->autoneg_advertised == 0) + hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT; + + DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); + ret_val = e1000_phy_setup_autoneg(hw); + if(ret_val < 0) { + DEBUGOUT("Error Setting up Auto-Negotiation\n"); + return ret_val; + } + DEBUGOUT("Restarting Auto-Neg\n"); + + /* Restart auto-negotiation by setting the Auto Neg Enable bit and + * the Auto Neg Restart bit in the PHY control register. + */ + if(e1000_read_phy_reg(hw, PHY_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); + if(e1000_write_phy_reg(hw, PHY_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + + /* Does the user want to wait for Auto-Neg to complete here, or + * check at a later time (for example, callback routine). + */ + if(hw->wait_autoneg_complete) { + ret_val = e1000_wait_autoneg(hw); + if(ret_val < 0) { + DEBUGOUT("Error while waiting for autoneg to complete\n"); + return ret_val; + } + } + } else { + DEBUGOUT("Forcing speed and duplex\n"); + ret_val = e1000_phy_force_speed_duplex(hw); + if(ret_val < 0) { + DEBUGOUT("Error Forcing Speed and Duplex\n"); + return ret_val; + } + } + + /* Check link status. Wait up to 100 microseconds for link to become + * valid. + */ + for(i = 0; i < 10; i++) { + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(phy_data & MII_SR_LINK_STATUS) { + /* We have link, so we need to finish the config process: + * 1) Set up the MAC to the current PHY speed/duplex + * if we are on 82543. If we + * are on newer silicon, we only need to configure + * collision distance in the Transmit Control Register. + * 2) Set up flow control on the MAC to that established with + * the link partner. + */ + if(hw->mac_type >= e1000_82544) { + e1000_config_collision_dist(hw); + } else { + ret_val = e1000_config_mac_to_phy(hw); + if(ret_val < 0) { + DEBUGOUT("Error configuring MAC to PHY settings\n"); + return ret_val; + } + } + ret_val = e1000_config_fc_after_link_up(hw); + if(ret_val < 0) { + DEBUGOUT("Error Configuring Flow Control\n"); + return ret_val; + } + DEBUGOUT("Valid link established!!!\n"); + return 0; + } + udelay(10); + } + + DEBUGOUT("Unable to establish link!!!\n"); + return 0; +} + +/****************************************************************************** +* Configures PHY autoneg and flow control advertisement settings +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +int32_t +e1000_phy_setup_autoneg(struct e1000_hw *hw) +{ + uint16_t mii_autoneg_adv_reg; + uint16_t mii_1000t_ctrl_reg; + + DEBUGFUNC("e1000_phy_setup_autoneg"); + + /* Read the MII Auto-Neg Advertisement Register (Address 4). */ + if(e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + + /* Read the MII 1000Base-T Control Register (Address 9). */ + if(e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + + /* Need to parse both autoneg_advertised and fc and set up + * the appropriate PHY registers. First we will parse for + * autoneg_advertised software override. Since we can advertise + * a plethora of combinations, we need to check each bit + * individually. + */ + + /* First we clear all the 10/100 mb speed bits in the Auto-Neg + * Advertisement Register (Address 4) and the 1000 mb speed bits in + * the 1000Base-T Control Register (Address 9). + */ + mii_autoneg_adv_reg &= ~REG4_SPEED_MASK; + mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK; + + DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised); + + /* Do we want to advertise 10 Mb Half Duplex? */ + if(hw->autoneg_advertised & ADVERTISE_10_HALF) { + DEBUGOUT("Advertise 10mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; + } + + /* Do we want to advertise 10 Mb Full Duplex? */ + if(hw->autoneg_advertised & ADVERTISE_10_FULL) { + DEBUGOUT("Advertise 10mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; + } + + /* Do we want to advertise 100 Mb Half Duplex? */ + if(hw->autoneg_advertised & ADVERTISE_100_HALF) { + DEBUGOUT("Advertise 100mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; + } + + /* Do we want to advertise 100 Mb Full Duplex? */ + if(hw->autoneg_advertised & ADVERTISE_100_FULL) { + DEBUGOUT("Advertise 100mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; + } + + /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ + if(hw->autoneg_advertised & ADVERTISE_1000_HALF) { + DEBUGOUT("Advertise 1000mb Half duplex requested, request denied!\n"); + } + + /* Do we want to advertise 1000 Mb Full Duplex? */ + if(hw->autoneg_advertised & ADVERTISE_1000_FULL) { + DEBUGOUT("Advertise 1000mb Full duplex\n"); + mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; + } + + /* Check for a software override of the flow control settings, and + * setup the PHY advertisement registers accordingly. If + * auto-negotiation is enabled, then software will have to set the + * "PAUSE" bits to the correct value in the Auto-Negotiation + * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-negotiation. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * but we do not support receiving pause frames). + * 3: Both Rx and TX flow control (symmetric) are enabled. + * other: No software override. The flow control configuration + * in the EEPROM is used. + */ + switch (hw->fc) { + case e1000_fc_none: /* 0 */ + /* Flow control (RX & TX) is completely disabled by a + * software over-ride. + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case e1000_fc_rx_pause: /* 1 */ + /* RX Flow control is enabled, and TX Flow control is + * disabled, by a software over-ride. + */ + /* Since there really isn't a way to advertise that we are + * capable of RX Pause ONLY, we will advertise that we + * support both symmetric and asymmetric RX PAUSE. Later + * (in e1000_config_fc_after_link_up) we will disable the + *hw's ability to send PAUSE frames. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case e1000_fc_tx_pause: /* 2 */ + /* TX Flow control is enabled, and RX Flow control is + * disabled, by a software over-ride. + */ + mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; + mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; + break; + case e1000_fc_full: /* 3 */ + /* Flow control (both RX and TX) is enabled by a software + * over-ride. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + } + + if(e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + + DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); + + if(e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + return 0; +} + +/****************************************************************************** +* Force PHY speed and duplex settings to hw->forced_speed_duplex +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +static int32_t +e1000_phy_force_speed_duplex(struct e1000_hw *hw) +{ + uint32_t ctrl; + int32_t ret_val; + uint16_t mii_ctrl_reg; + uint16_t mii_status_reg; + uint16_t phy_data; + uint16_t i; + + DEBUGFUNC("e1000_phy_force_speed_duplex"); + + /* Turn off Flow control if we are forcing speed and duplex. */ + hw->fc = e1000_fc_none; + + DEBUGOUT1("hw->fc = %d\n", hw->fc); + + /* Read the Device Control Register. */ + ctrl = E1000_READ_REG(hw, CTRL); + + /* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */ + ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + ctrl &= ~(DEVICE_SPEED_MASK); + + /* Clear the Auto Speed Detect Enable bit. */ + ctrl &= ~E1000_CTRL_ASDE; + + /* Read the MII Control Register. */ + if(e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + + /* We need to disable autoneg in order to force link and duplex. */ + + mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN; + + /* Are we forcing Full or Half Duplex? */ + if(hw->forced_speed_duplex == e1000_100_full || + hw->forced_speed_duplex == e1000_10_full) { + /* We want to force full duplex so we SET the full duplex bits in the + * Device and MII Control Registers. + */ + ctrl |= E1000_CTRL_FD; + mii_ctrl_reg |= MII_CR_FULL_DUPLEX; + DEBUGOUT("Full Duplex\n"); + } else { + /* We want to force half duplex so we CLEAR the full duplex bits in + * the Device and MII Control Registers. + */ + ctrl &= ~E1000_CTRL_FD; + mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX; + DEBUGOUT("Half Duplex\n"); + } + + /* Are we forcing 100Mbps??? */ + if(hw->forced_speed_duplex == e1000_100_full || + hw->forced_speed_duplex == e1000_100_half) { + /* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */ + ctrl |= E1000_CTRL_SPD_100; + mii_ctrl_reg |= MII_CR_SPEED_100; + mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10); + DEBUGOUT("Forcing 100mb "); + } else { + /* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */ + ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); + mii_ctrl_reg |= MII_CR_SPEED_10; + mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); + DEBUGOUT("Forcing 10mb "); + } + + e1000_config_collision_dist(hw); + + /* Write the configured values back to the Device Control Reg. */ + E1000_WRITE_REG(hw, CTRL, ctrl); + + if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + + /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI + * forced whenever speed are duplex are forced. + */ + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + DEBUGOUT1("M88E1000 PSCR: %x \n", phy_data); + + /* Need to reset the PHY or these changes will be ignored */ + mii_ctrl_reg |= MII_CR_RESET; + + /* Write back the modified PHY MII control register. */ + if(e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + udelay(1); + + /* The wait_autoneg_complete flag may be a little misleading here. + * Since we are forcing speed and duplex, Auto-Neg is not enabled. + * But we do want to delay for a period while forcing only so we + * don't generate false No Link messages. So we will wait here + * only if the user has set wait_autoneg_complete to 1, which is + * the default. + */ + if(hw->wait_autoneg_complete) { + /* We will wait for autoneg to complete. */ + DEBUGOUT("Waiting for forced speed/duplex link.\n"); + mii_status_reg = 0; + + /* We will wait for autoneg to complete or 4.5 seconds to expire. */ + for(i = PHY_FORCE_TIME; i > 0; i--) { + /* Read the MII Status Register and wait for Auto-Neg Complete bit + * to be set. + */ + if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(mii_status_reg & MII_SR_LINK_STATUS) break; + msec_delay(100); + } + if(i == 0) { /* We didn't get link */ + /* Reset the DSP and wait again for link. */ + + ret_val = e1000_phy_reset_dsp(hw); + if(ret_val < 0) { + DEBUGOUT("Error Resetting PHY DSP\n"); + return ret_val; + } + } + /* This loop will early-out if the link condition has been met. */ + for(i = PHY_FORCE_TIME; i > 0; i--) { + if(mii_status_reg & MII_SR_LINK_STATUS) break; + msec_delay(100); + /* Read the MII Status Register and wait for Auto-Neg Complete bit + * to be set. + */ + if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + } + } + + /* Because we reset the PHY above, we need to re-force TX_CLK in the + * Extended PHY Specific Control Register to 25MHz clock. This value + * defaults back to a 2.5MHz clock when the PHY is reset. + */ + if(e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + phy_data |= M88E1000_EPSCR_TX_CLK_25; + if(e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + + /* In addition, because of the s/w reset above, we need to enable CRS on + * TX. This must be set for both full and half duplex operation. + */ + if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + return 0; +} + +/****************************************************************************** +* Sets the collision distance in the Transmit Control register +* +* hw - Struct containing variables accessed by shared code +* +* Link should have been established previously. Reads the speed and duplex +* information from the Device Status register. +******************************************************************************/ +void +e1000_config_collision_dist(struct e1000_hw *hw) +{ + uint32_t tctl; + + tctl = E1000_READ_REG(hw, TCTL); + + tctl &= ~E1000_TCTL_COLD; + tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; + + E1000_WRITE_REG(hw, TCTL, tctl); + E1000_WRITE_FLUSH(hw); +} + +/****************************************************************************** +* Sets MAC speed and duplex settings to reflect the those in the PHY +* +* hw - Struct containing variables accessed by shared code +* mii_reg - data to write to the MII control register +* +* The contents of the PHY register containing the needed information need to +* be passed in. +******************************************************************************/ +static int32_t +e1000_config_mac_to_phy(struct e1000_hw *hw) +{ + uint32_t ctrl; + uint16_t phy_data; + + DEBUGFUNC("e1000_config_mac_to_phy"); + + /* Read the Device Control Register and set the bits to Force Speed + * and Duplex. + */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS); + + /* Set up duplex in the Device Control and Transmit Control + * registers depending on negotiated values. + */ + if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(phy_data & M88E1000_PSSR_DPLX) ctrl |= E1000_CTRL_FD; + else ctrl &= ~E1000_CTRL_FD; + + e1000_config_collision_dist(hw); + + /* Set up speed in the Device Control register depending on + * negotiated values. + */ + if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) + ctrl |= E1000_CTRL_SPD_1000; + else if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS) + ctrl |= E1000_CTRL_SPD_100; + /* Write the configured values back to the Device Control Reg. */ + E1000_WRITE_REG(hw, CTRL, ctrl); + return 0; +} + +/****************************************************************************** + * Forces the MAC's flow control settings. + * + * hw - Struct containing variables accessed by shared code + * + * Sets the TFCE and RFCE bits in the device control register to reflect + * the adapter settings. TFCE and RFCE need to be explicitly set by + * software when a Copper PHY is used because autonegotiation is managed + * by the PHY rather than the MAC. Software must also configure these + * bits when link is forced on a fiber connection. + *****************************************************************************/ +static int32_t +e1000_force_mac_fc(struct e1000_hw *hw) +{ + uint32_t ctrl; + + DEBUGFUNC("e1000_force_mac_fc"); + + /* Get the current configuration of the Device Control Register */ + ctrl = E1000_READ_REG(hw, CTRL); + + /* Because we didn't get link via the internal auto-negotiation + * mechanism (we either forced link or we got link via PHY + * auto-neg), we have to manually enable/disable transmit an + * receive flow control. + * + * The "Case" statement below enables/disable flow control + * according to the "hw->fc" parameter. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause + * frames but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * frames but we do not receive pause frames). + * 3: Both Rx and TX flow control (symmetric) is enabled. + * other: No other values should be possible at this point. + */ + + switch (hw->fc) { + case e1000_fc_none: + ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); + break; + case e1000_fc_rx_pause: + ctrl &= (~E1000_CTRL_TFCE); + ctrl |= E1000_CTRL_RFCE; + break; + case e1000_fc_tx_pause: + ctrl &= (~E1000_CTRL_RFCE); + ctrl |= E1000_CTRL_TFCE; + break; + case e1000_fc_full: + ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + } + + /* Disable TX Flow Control for 82542 (rev 2.0) */ + if(hw->mac_type == e1000_82542_rev2_0) + ctrl &= (~E1000_CTRL_TFCE); + + E1000_WRITE_REG(hw, CTRL, ctrl); + return 0; +} + +/****************************************************************************** + * Configures flow control settings after link is established + * + * hw - Struct containing variables accessed by shared code + * + * Should be called immediately after a valid link has been established. + * Forces MAC flow control settings if link was forced. When in MII/GMII mode + * and autonegotiation is enabled, the MAC flow control settings will be set + * based on the flow control negotiated by the PHY. In TBI mode, the TFCE + * and RFCE bits will be automaticaly set to the negotiated flow control mode. + *****************************************************************************/ +int32_t +e1000_config_fc_after_link_up(struct e1000_hw *hw) +{ + int32_t ret_val; + uint16_t mii_status_reg; + uint16_t mii_nway_adv_reg; + uint16_t mii_nway_lp_ability_reg; + uint16_t speed; + uint16_t duplex; + + DEBUGFUNC("e1000_config_fc_after_link_up"); + + /* Check for the case where we have fiber media and auto-neg failed + * so we had to force link. In this case, we need to force the + * configuration of the MAC to match the "fc" parameter. + */ + if(((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) || + ((hw->media_type == e1000_media_type_copper) && (!hw->autoneg))) { + ret_val = e1000_force_mac_fc(hw); + if(ret_val < 0) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + } + + /* Check for the case where we have copper media and auto-neg is + * enabled. In this case, we need to check and see if Auto-Neg + * has completed, and if so, how the PHY and link partner has + * flow control configured. + */ + if((hw->media_type == e1000_media_type_copper) && hw->autoneg) { + /* Read the MII Status Register and check to see if AutoNeg + * has completed. We read this twice because this reg has + * some "sticky" (latched) bits. + */ + if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) { + DEBUGOUT("PHY Read Error \n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) { + DEBUGOUT("PHY Read Error \n"); + return -E1000_ERR_PHY; + } + + if(mii_status_reg & MII_SR_AUTONEG_COMPLETE) { + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement Register + * (Address 4) and the Auto_Negotiation Base Page Ability + * Register (Address 5) to determine how flow control was + * negotiated. + */ + if(e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + + /* Two bits in the Auto Negotiation Advertisement Register + * (Address 4) and two bits in the Auto Negotiation Base + * Page Ability Register (Address 5) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | e1000_fc_none + * 0 | 1 | 0 | DC | e1000_fc_none + * 0 | 1 | 1 | 0 | e1000_fc_none + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + * 1 | 0 | 0 | DC | e1000_fc_none + * 1 | DC | 1 | DC | e1000_fc_full + * 1 | 1 | 0 | 0 | e1000_fc_none + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + * + */ + /* Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | e1000_fc_full + * + */ + if((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { + /* Now we need to check if the user selected RX ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise RX + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if(hw->original_fc == e1000_fc_full) { + hw->fc = e1000_fc_full; + DEBUGOUT("Flow Control = FULL.\r\n"); + } else { + hw->fc = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n"); + } + } + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + * + */ + else if(!(mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc = e1000_fc_tx_pause; + DEBUGOUT("Flow Control = TX PAUSE frames only.\r\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + * + */ + else if((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n"); + } + /* Per the IEEE spec, at this point flow control should be + * disabled. However, we want to consider that we could + * be connected to a legacy switch that doesn't advertise + * desired flow control, but can be forced on the link + * partner. So if we advertised no flow control, that is + * what we will resolve to. If we advertised some kind of + * receive capability (Rx Pause Only or Full Flow Control) + * and the link partner advertised none, we will configure + * ourselves to enable Rx Flow Control only. We can do + * this safely for two reasons: If the link partner really + * didn't want flow control enabled, and we enable Rx, no + * harm done since we won't be receiving any PAUSE frames + * anyway. If the intent on the link partner was to have + * flow control enabled, then by us enabling RX only, we + * can at least receive pause frames and process them. + * This is a good idea because in most cases, since we are + * predominantly a server NIC, more times than not we will + * be asked to delay transmission of packets than asking + * our link partner to pause transmission of frames. + */ + else if(hw->original_fc == e1000_fc_none || + hw->original_fc == e1000_fc_tx_pause) { + hw->fc = e1000_fc_none; + DEBUGOUT("Flow Control = NONE.\r\n"); + } else { + hw->fc = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n"); + } + + /* Now we need to do one last check... If we auto- + * negotiated to HALF DUPLEX, flow control should not be + * enabled per IEEE 802.3 spec. + */ + e1000_get_speed_and_duplex(hw, &speed, &duplex); + + if(duplex == HALF_DUPLEX) + hw->fc = e1000_fc_none; + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + ret_val = e1000_force_mac_fc(hw); + if(ret_val < 0) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + } else { + DEBUGOUT("Copper PHY and Auto Neg has not completed.\r\n"); + } + } + return 0; +} + +/****************************************************************************** + * Checks to see if the link status of the hardware has changed. + * + * hw - Struct containing variables accessed by shared code + * + * Called by any function that needs to check the link status of the adapter. + *****************************************************************************/ +int32_t +e1000_check_for_link(struct e1000_hw *hw) +{ + uint32_t rxcw; + uint32_t ctrl; + uint32_t status; + uint32_t rctl; + uint32_t signal; + int32_t ret_val; + uint16_t phy_data; + uint16_t lp_capability; + + DEBUGFUNC("e1000_check_for_link"); + + /* On adapters with a MAC newer that 82544, SW Defineable pin 1 will be + * set when the optics detect a signal. On older adapters, it will be + * cleared when there is a signal + */ + if(hw->mac_type > e1000_82544) signal = E1000_CTRL_SWDPIN1; + else signal = 0; + + ctrl = E1000_READ_REG(hw, CTRL); + status = E1000_READ_REG(hw, STATUS); + rxcw = E1000_READ_REG(hw, RXCW); + + /* If we have a copper PHY then we only want to go out to the PHY + * registers to see if Auto-Neg has completed and/or if our link + * status has changed. The get_link_status flag will be set if we + * receive a Link Status Change interrupt or we have Rx Sequence + * Errors. + */ + if((hw->media_type == e1000_media_type_copper) && hw->get_link_status) { + /* First we want to see if the MII Status Register reports + * link. If so, then we want to get the current speed/duplex + * of the PHY. + * Read the register twice since the link bit is sticky. + */ + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + + if(phy_data & MII_SR_LINK_STATUS) { + hw->get_link_status = FALSE; + } else { + /* No link detected */ + return 0; + } + + /* If we are forcing speed/duplex, then we simply return since + * we have already determined whether we have link or not. + */ + if(!hw->autoneg) return -E1000_ERR_CONFIG; + + /* We have a M88E1000 PHY and Auto-Neg is enabled. If we + * have Si on board that is 82544 or newer, Auto + * Speed Detection takes care of MAC speed/duplex + * configuration. So we only need to configure Collision + * Distance in the MAC. Otherwise, we need to force + * speed/duplex on the MAC to the current PHY speed/duplex + * settings. + */ + if(hw->mac_type >= e1000_82544) + e1000_config_collision_dist(hw); + else { + ret_val = e1000_config_mac_to_phy(hw); + if(ret_val < 0) { + DEBUGOUT("Error configuring MAC to PHY settings\n"); + return ret_val; + } + } + + /* Configure Flow Control now that Auto-Neg has completed. First, we + * need to restore the desired flow control settings because we may + * have had to re-autoneg with a different link partner. + */ + ret_val = e1000_config_fc_after_link_up(hw); + if(ret_val < 0) { + DEBUGOUT("Error configuring flow control\n"); + return ret_val; + } + + /* At this point we know that we are on copper and we have + * auto-negotiated link. These are conditions for checking the link + * parter capability register. We use the link partner capability to + * determine if TBI Compatibility needs to be turned on or off. If + * the link partner advertises any speed in addition to Gigabit, then + * we assume that they are GMII-based, and TBI compatibility is not + * needed. If no other speeds are advertised, we assume the link + * partner is TBI-based, and we turn on TBI Compatibility. + */ + if(hw->tbi_compatibility_en) { + if(e1000_read_phy_reg(hw, PHY_LP_ABILITY, &lp_capability) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(lp_capability & (NWAY_LPAR_10T_HD_CAPS | + NWAY_LPAR_10T_FD_CAPS | + NWAY_LPAR_100TX_HD_CAPS | + NWAY_LPAR_100TX_FD_CAPS | + NWAY_LPAR_100T4_CAPS)) { + /* If our link partner advertises anything in addition to + * gigabit, we do not need to enable TBI compatibility. + */ + if(hw->tbi_compatibility_on) { + /* If we previously were in the mode, turn it off. */ + rctl = E1000_READ_REG(hw, RCTL); + rctl &= ~E1000_RCTL_SBP; + E1000_WRITE_REG(hw, RCTL, rctl); + hw->tbi_compatibility_on = FALSE; + } + } else { + /* If TBI compatibility is was previously off, turn it on. For + * compatibility with a TBI link partner, we will store bad + * packets. Some frames have an additional byte on the end and + * will look like CRC errors to to the hardware. + */ + if(!hw->tbi_compatibility_on) { + hw->tbi_compatibility_on = TRUE; + rctl = E1000_READ_REG(hw, RCTL); + rctl |= E1000_RCTL_SBP; + E1000_WRITE_REG(hw, RCTL, rctl); + } + } + } + } + /* If we don't have link (auto-negotiation failed or link partner cannot + * auto-negotiate), the cable is plugged in (we have signal), and our + * link partner is not trying to auto-negotiate with us (we are receiving + * idles or data), we need to force link up. We also need to give + * auto-negotiation time to complete, in case the cable was just plugged + * in. The autoneg_failed flag does this. + */ + else if((hw->media_type == e1000_media_type_fiber) && + (!(status & E1000_STATUS_LU)) && + ((ctrl & E1000_CTRL_SWDPIN1) == signal) && + (!(rxcw & E1000_RXCW_C))) { + if(hw->autoneg_failed == 0) { + hw->autoneg_failed = 1; + return 0; + } + DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\r\n"); + + /* Disable auto-negotiation in the TXCW register */ + E1000_WRITE_REG(hw, TXCW, (hw->txcw & ~E1000_TXCW_ANE)); + + /* Force link-up and also force full-duplex. */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); + E1000_WRITE_REG(hw, CTRL, ctrl); + + /* Configure Flow Control after forcing link up. */ + ret_val = e1000_config_fc_after_link_up(hw); + if(ret_val < 0) { + DEBUGOUT("Error configuring flow control\n"); + return ret_val; + } + } + /* If we are forcing link and we are receiving /C/ ordered sets, re-enable + * auto-negotiation in the TXCW register and disable forced link in the + * Device Control register in an attempt to auto-negotiate with our link + * partner. + */ + else if((hw->media_type == e1000_media_type_fiber) && + (ctrl & E1000_CTRL_SLU) && + (rxcw & E1000_RXCW_C)) { + DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\r\n"); + E1000_WRITE_REG(hw, TXCW, hw->txcw); + E1000_WRITE_REG(hw, CTRL, (ctrl & ~E1000_CTRL_SLU)); + } + return 0; +} + +/****************************************************************************** + * Detects the current speed and duplex settings of the hardware. + * + * hw - Struct containing variables accessed by shared code + * speed - Speed of the connection + * duplex - Duplex setting of the connection + *****************************************************************************/ +void +e1000_get_speed_and_duplex(struct e1000_hw *hw, + uint16_t *speed, + uint16_t *duplex) +{ + uint32_t status; + + DEBUGFUNC("e1000_get_speed_and_duplex"); + + if(hw->mac_type >= e1000_82543) { + status = E1000_READ_REG(hw, STATUS); + if(status & E1000_STATUS_SPEED_1000) { + *speed = SPEED_1000; + DEBUGOUT("1000 Mbs, "); + } else if(status & E1000_STATUS_SPEED_100) { + *speed = SPEED_100; + DEBUGOUT("100 Mbs, "); + } else { + *speed = SPEED_10; + DEBUGOUT("10 Mbs, "); + } + + if(status & E1000_STATUS_FD) { + *duplex = FULL_DUPLEX; + DEBUGOUT("Full Duplex\r\n"); + } else { + *duplex = HALF_DUPLEX; + DEBUGOUT(" Half Duplex\r\n"); + } + } else { + DEBUGOUT("1000 Mbs, Full Duplex\r\n"); + *speed = SPEED_1000; + *duplex = FULL_DUPLEX; + } +} + +/****************************************************************************** +* Blocks until autoneg completes or times out (~4.5 seconds) +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +int32_t +e1000_wait_autoneg(struct e1000_hw *hw) +{ + uint16_t i; + uint16_t phy_data; + + DEBUGFUNC("e1000_wait_autoneg"); + DEBUGOUT("Waiting for Auto-Neg to complete.\n"); + + /* We will wait for autoneg to complete or 4.5 seconds to expire. */ + for(i = PHY_AUTO_NEG_TIME; i > 0; i--) { + /* Read the MII Status Register and wait for Auto-Neg + * Complete bit to be set. + */ + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + if(phy_data & MII_SR_AUTONEG_COMPLETE) { + return 0; + } + msec_delay(100); + } + return 0; +} + +/****************************************************************************** +* Raises the Management Data Clock +* +* hw - Struct containing variables accessed by shared code +* ctrl - Device control register's current value +******************************************************************************/ +static void +e1000_raise_mdi_clk(struct e1000_hw *hw, + uint32_t *ctrl) +{ + /* Raise the clock input to the Management Data Clock (by setting the MDC + * bit), and then delay 2 microseconds. + */ + E1000_WRITE_REG(hw, CTRL, (*ctrl | E1000_CTRL_MDC)); + E1000_WRITE_FLUSH(hw); + udelay(2); +} + +/****************************************************************************** +* Lowers the Management Data Clock +* +* hw - Struct containing variables accessed by shared code +* ctrl - Device control register's current value +******************************************************************************/ +static void +e1000_lower_mdi_clk(struct e1000_hw *hw, + uint32_t *ctrl) +{ + /* Lower the clock input to the Management Data Clock (by clearing the MDC + * bit), and then delay 2 microseconds. + */ + E1000_WRITE_REG(hw, CTRL, (*ctrl & ~E1000_CTRL_MDC)); + E1000_WRITE_FLUSH(hw); + udelay(2); +} + +/****************************************************************************** +* Shifts data bits out to the PHY +* +* hw - Struct containing variables accessed by shared code +* data - Data to send out to the PHY +* count - Number of bits to shift out +* +* Bits are shifted out in MSB to LSB order. +******************************************************************************/ +static void +e1000_shift_out_mdi_bits(struct e1000_hw *hw, + uint32_t data, + uint16_t count) +{ + uint32_t ctrl; + uint32_t mask; + + /* We need to shift "count" number of bits out to the PHY. So, the value + * in the "data" parameter will be shifted out to the PHY one bit at a + * time. In order to do this, "data" must be broken down into bits. + */ + mask = 0x01; + mask <<= (count - 1); + + ctrl = E1000_READ_REG(hw, CTRL); + + /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */ + ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR); + + while(mask) { + /* A "1" is shifted out to the PHY by setting the MDIO bit to "1" and + * then raising and lowering the Management Data Clock. A "0" is + * shifted out to the PHY by setting the MDIO bit to "0" and then + * raising and lowering the clock. + */ + if(data & mask) ctrl |= E1000_CTRL_MDIO; + else ctrl &= ~E1000_CTRL_MDIO; + + E1000_WRITE_REG(hw, CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + udelay(2); + + e1000_raise_mdi_clk(hw, &ctrl); + e1000_lower_mdi_clk(hw, &ctrl); + + mask = mask >> 1; + } +} + +/****************************************************************************** +* Shifts data bits in from the PHY +* +* hw - Struct containing variables accessed by shared code +* +* Bits are shifted in in MSB to LSB order. +******************************************************************************/ +static uint16_t +e1000_shift_in_mdi_bits(struct e1000_hw *hw) +{ + uint32_t ctrl; + uint16_t data = 0; + uint8_t i; + + /* In order to read a register from the PHY, we need to shift in a total + * of 18 bits from the PHY. The first two bit (turnaround) times are used + * to avoid contention on the MDIO pin when a read operation is performed. + * These two bits are ignored by us and thrown away. Bits are "shifted in" + * by raising the input to the Management Data Clock (setting the MDC bit), + * and then reading the value of the MDIO bit. + */ + ctrl = E1000_READ_REG(hw, CTRL); + + /* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as input. */ + ctrl &= ~E1000_CTRL_MDIO_DIR; + ctrl &= ~E1000_CTRL_MDIO; + + E1000_WRITE_REG(hw, CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + /* Raise and Lower the clock before reading in the data. This accounts for + * the turnaround bits. The first clock occurred when we clocked out the + * last bit of the Register Address. + */ + e1000_raise_mdi_clk(hw, &ctrl); + e1000_lower_mdi_clk(hw, &ctrl); + + for(data = 0, i = 0; i < 16; i++) { + data = data << 1; + e1000_raise_mdi_clk(hw, &ctrl); + ctrl = E1000_READ_REG(hw, CTRL); + /* Check to see if we shifted in a "1". */ + if(ctrl & E1000_CTRL_MDIO) data |= 1; + e1000_lower_mdi_clk(hw, &ctrl); + } + + e1000_raise_mdi_clk(hw, &ctrl); + e1000_lower_mdi_clk(hw, &ctrl); + + return data; +} + +/***************************************************************************** +* Reads the value from a PHY register +* +* hw - Struct containing variables accessed by shared code +* reg_addr - address of the PHY register to read +******************************************************************************/ +int32_t +e1000_read_phy_reg(struct e1000_hw *hw, + uint32_t reg_addr, + uint16_t *phy_data) +{ + uint32_t i; + uint32_t mdic = 0; + const uint32_t phy_addr = 1; + + DEBUGFUNC("e1000_read_phy_reg"); + + if(reg_addr > MAX_PHY_REG_ADDRESS) { + DEBUGOUT1("PHY Address %d is out of range\n", reg_addr); + return -E1000_ERR_PARAM; + } + + if(hw->mac_type > e1000_82543) { + /* Set up Op-code, Phy Address, and register address in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) | + (phy_addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_READ)); + + E1000_WRITE_REG(hw, MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed */ + for(i = 0; i < 64; i++) { + udelay(10); + mdic = E1000_READ_REG(hw, MDIC); + if(mdic & E1000_MDIC_READY) break; + } + if(!(mdic & E1000_MDIC_READY)) { + DEBUGOUT("MDI Read did not complete\n"); + return -E1000_ERR_PHY; + } + if(mdic & E1000_MDIC_ERROR) { + DEBUGOUT("MDI Error\n"); + return -E1000_ERR_PHY; + } + *phy_data = (uint16_t) mdic; + } else { + /* We must first send a preamble through the MDIO pin to signal the + * beginning of an MII instruction. This is done by sending 32 + * consecutive "1" bits. + */ + e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); + + /* Now combine the next few fields that are required for a read + * operation. We use this method instead of calling the + * e1000_shift_out_mdi_bits routine five different times. The format of + * a MII read instruction consists of a shift out of 14 bits and is + * defined as follows: + * + * followed by a shift in of 18 bits. This first two bits shifted in + * are TurnAround bits used to avoid contention on the MDIO pin when a + * READ operation is performed. These two bits are thrown away + * followed by a shift in of 16 bits which contains the desired data. + */ + mdic = ((reg_addr) | (phy_addr << 5) | + (PHY_OP_READ << 10) | (PHY_SOF << 12)); + + e1000_shift_out_mdi_bits(hw, mdic, 14); + + /* Now that we've shifted out the read command to the MII, we need to + * "shift in" the 16-bit value (18 total bits) of the requested PHY + * register address. + */ + *phy_data = e1000_shift_in_mdi_bits(hw); + } + return 0; +} + +/****************************************************************************** +* Writes a value to a PHY register +* +* hw - Struct containing variables accessed by shared code +* reg_addr - address of the PHY register to write +* data - data to write to the PHY +******************************************************************************/ +int32_t +e1000_write_phy_reg(struct e1000_hw *hw, + uint32_t reg_addr, + uint16_t phy_data) +{ + uint32_t i; + uint32_t mdic = 0; + const uint32_t phy_addr = 1; + + DEBUGFUNC("e1000_write_phy_reg"); + + if(reg_addr > MAX_PHY_REG_ADDRESS) { + DEBUGOUT1("PHY Address %d is out of range\n", reg_addr); + return -E1000_ERR_PARAM; + } + + if(hw->mac_type > e1000_82543) { + /* Set up Op-code, Phy Address, register address, and data intended + * for the PHY register in the MDI Control register. The MAC will take + * care of interfacing with the PHY to send the desired data. + */ + mdic = (((uint32_t) phy_data) | + (reg_addr << E1000_MDIC_REG_SHIFT) | + (phy_addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_WRITE)); + + E1000_WRITE_REG(hw, MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed */ + for(i = 0; i < 64; i++) { + udelay(10); + mdic = E1000_READ_REG(hw, MDIC); + if(mdic & E1000_MDIC_READY) break; + } + if(!(mdic & E1000_MDIC_READY)) { + DEBUGOUT("MDI Write did not complete\n"); + return -E1000_ERR_PHY; + } + } else { + /* We'll need to use the SW defined pins to shift the write command + * out to the PHY. We first send a preamble to the PHY to signal the + * beginning of the MII instruction. This is done by sending 32 + * consecutive "1" bits. + */ + e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); + + /* Now combine the remaining required fields that will indicate a + * write operation. We use this method instead of calling the + * e1000_shift_out_mdi_bits routine for each field in the command. The + * format of a MII write instruction is as follows: + * . + */ + mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) | + (PHY_OP_WRITE << 12) | (PHY_SOF << 14)); + mdic <<= 16; + mdic |= (uint32_t) phy_data; + + e1000_shift_out_mdi_bits(hw, mdic, 32); + } + return 0; +} + +/****************************************************************************** +* Returns the PHY to the power-on reset state +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +void +e1000_phy_hw_reset(struct e1000_hw *hw) +{ + uint32_t ctrl; + uint32_t ctrl_ext; + + DEBUGFUNC("e1000_phy_hw_reset"); + + DEBUGOUT("Resetting Phy...\n"); + + if(hw->mac_type > e1000_82543) { + /* Read the device control register and assert the E1000_CTRL_PHY_RST + * bit. Then, take it out of reset. + */ + ctrl = E1000_READ_REG(hw, CTRL); + E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PHY_RST); + E1000_WRITE_FLUSH(hw); + msec_delay(10); + E1000_WRITE_REG(hw, CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + } else { + /* Read the Extended Device Control Register, assert the PHY_RESET_DIR + * bit to put the PHY into reset. Then, take it out of reset. + */ + ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR; + ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA; + E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); + msec_delay(10); + ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA; + E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); + } + udelay(150); +} + +/****************************************************************************** +* Resets the PHY +* +* hw - Struct containing variables accessed by shared code +* +* Sets bit 15 of the MII Control regiser +******************************************************************************/ +int32_t +e1000_phy_reset(struct e1000_hw *hw) +{ + uint16_t phy_data; + + DEBUGFUNC("e1000_phy_reset"); + + if(e1000_read_phy_reg(hw, PHY_CTRL, &phy_data) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + phy_data |= MII_CR_RESET; + if(e1000_write_phy_reg(hw, PHY_CTRL, phy_data) < 0) { + DEBUGOUT("PHY Write Error\n"); + return -E1000_ERR_PHY; + } + udelay(1); + return 0; +} + +/****************************************************************************** +* Probes the expected PHY address for known PHY IDs +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +int32_t +e1000_detect_gig_phy(struct e1000_hw *hw) +{ + uint16_t phy_id_high, phy_id_low; + boolean_t match = FALSE; + + DEBUGFUNC("e1000_detect_gig_phy"); + + /* Read the PHY ID Registers to identify which PHY is onboard. */ + if(e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + hw->phy_id = (uint32_t) (phy_id_high << 16); + udelay(2); + if(e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low) < 0) { + DEBUGOUT("PHY Read Error\n"); + return -E1000_ERR_PHY; + } + hw->phy_id |= (uint32_t) (phy_id_low & PHY_REVISION_MASK); + hw->phy_revision = (uint32_t) phy_id_low & ~PHY_REVISION_MASK; + + switch(hw->mac_type) { + case e1000_82543: + if(hw->phy_id == M88E1000_E_PHY_ID) match = TRUE; + break; + case e1000_82544: + if(hw->phy_id == M88E1000_I_PHY_ID) match = TRUE; + break; + case e1000_82540: + case e1000_82545: + case e1000_82546: + if(hw->phy_id == M88E1011_I_PHY_ID) match = TRUE; + break; + default: + DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type); + return -E1000_ERR_CONFIG; + } + if(match) { + DEBUGOUT1("PHY ID 0x%X detected\n", hw->phy_id); + return 0; + } + DEBUGOUT1("Invalid PHY ID 0x%X\n", hw->phy_id); + return -E1000_ERR_PHY; +} + +/****************************************************************************** +* Resets the PHY's DSP +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +static int32_t +e1000_phy_reset_dsp(struct e1000_hw *hw) +{ + int32_t ret_val = -E1000_ERR_PHY; + DEBUGFUNC("e1000_phy_reset_dsp"); + + do { + if(e1000_write_phy_reg(hw, 29, 0x001d) < 0) break; + if(e1000_write_phy_reg(hw, 30, 0x00c1) < 0) break; + if(e1000_write_phy_reg(hw, 30, 0x0000) < 0) break; + ret_val = 0; + } while(0); + + if(ret_val < 0) DEBUGOUT("PHY Write Error\n"); + return ret_val; +} + +/****************************************************************************** +* Get PHY information from various PHY registers +* +* hw - Struct containing variables accessed by shared code +* phy_info - PHY information structure +******************************************************************************/ +int32_t +e1000_phy_get_info(struct e1000_hw *hw, + struct e1000_phy_info *phy_info) +{ + int32_t ret_val = -E1000_ERR_PHY; + uint16_t phy_data; + + DEBUGFUNC("e1000_phy_get_info"); + + phy_info->cable_length = e1000_cable_length_undefined; + phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined; + phy_info->cable_polarity = e1000_rev_polarity_undefined; + phy_info->polarity_correction = e1000_polarity_reversal_undefined; + phy_info->mdix_mode = e1000_auto_x_mode_undefined; + phy_info->local_rx = e1000_1000t_rx_status_undefined; + phy_info->remote_rx = e1000_1000t_rx_status_undefined; + + if(hw->media_type != e1000_media_type_copper) { + DEBUGOUT("PHY info is only valid for copper media\n"); + return -E1000_ERR_CONFIG; + } + + do { + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) break; + if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) break; + if((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) { + DEBUGOUT("PHY info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) + break; + phy_info->extended_10bt_distance = + (phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >> + M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT; + phy_info->polarity_correction = + (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >> + M88E1000_PSCR_POLARITY_REVERSAL_SHIFT; + + if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data) < 0) + break; + phy_info->cable_polarity = (phy_data & M88E1000_PSSR_REV_POLARITY) >> + M88E1000_PSSR_REV_POLARITY_SHIFT; + phy_info->mdix_mode = (phy_data & M88E1000_PSSR_MDIX) >> + M88E1000_PSSR_MDIX_SHIFT; + if(phy_data & M88E1000_PSSR_1000MBS) { + /* Cable Length Estimation and Local/Remote Receiver Informatoion + * are only valid at 1000 Mbps + */ + phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT); + if(e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data) < 0) + break; + phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >> + SR_1000T_LOCAL_RX_STATUS_SHIFT; + phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >> + SR_1000T_REMOTE_RX_STATUS_SHIFT; + } + ret_val = 0; + } while(0); + + if(ret_val < 0) DEBUGOUT("PHY Read Error\n"); + return ret_val; +} + +int32_t +e1000_validate_mdi_setting(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_validate_mdi_settings"); + + if(!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) { + DEBUGOUT("Invalid MDI setting detected\n"); + hw->mdix = 1; + return -E1000_ERR_CONFIG; + } + return 0; +} + +/****************************************************************************** + * Raises the EEPROM's clock input. + * + * hw - Struct containing variables accessed by shared code + * eecd - EECD's current value + *****************************************************************************/ +static void +e1000_raise_ee_clk(struct e1000_hw *hw, + uint32_t *eecd) +{ + /* Raise the clock input to the EEPROM (by setting the SK bit), and then + * wait microseconds. + */ + *eecd = *eecd | E1000_EECD_SK; + E1000_WRITE_REG(hw, EECD, *eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); +} + +/****************************************************************************** + * Lowers the EEPROM's clock input. + * + * hw - Struct containing variables accessed by shared code + * eecd - EECD's current value + *****************************************************************************/ +static void +e1000_lower_ee_clk(struct e1000_hw *hw, + uint32_t *eecd) +{ + /* Lower the clock input to the EEPROM (by clearing the SK bit), and then + * wait 50 microseconds. + */ + *eecd = *eecd & ~E1000_EECD_SK; + E1000_WRITE_REG(hw, EECD, *eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); +} + +/****************************************************************************** + * Shift data bits out to the EEPROM. + * + * hw - Struct containing variables accessed by shared code + * data - data to send to the EEPROM + * count - number of bits to shift out + *****************************************************************************/ +static void +e1000_shift_out_ee_bits(struct e1000_hw *hw, + uint16_t data, + uint16_t count) +{ + uint32_t eecd; + uint32_t mask; + + /* We need to shift "count" bits out to the EEPROM. So, value in the + * "data" parameter will be shifted out to the EEPROM one bit at a time. + * In order to do this, "data" must be broken down into bits. + */ + mask = 0x01 << (count - 1); + eecd = E1000_READ_REG(hw, EECD); + eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); + do { + /* A "1" is shifted out to the EEPROM by setting bit "DI" to a "1", + * and then raising and then lowering the clock (the SK bit controls + * the clock input to the EEPROM). A "0" is shifted out to the EEPROM + * by setting "DI" to "0" and then raising and then lowering the clock. + */ + eecd &= ~E1000_EECD_DI; + + if(data & mask) + eecd |= E1000_EECD_DI; + + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + + udelay(50); + + e1000_raise_ee_clk(hw, &eecd); + e1000_lower_ee_clk(hw, &eecd); + + mask = mask >> 1; + + } while(mask); + + /* We leave the "DI" bit set to "0" when we leave this routine. */ + eecd &= ~E1000_EECD_DI; + E1000_WRITE_REG(hw, EECD, eecd); +} + +/****************************************************************************** + * Shift data bits in from the EEPROM + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +static uint16_t +e1000_shift_in_ee_bits(struct e1000_hw *hw) +{ + uint32_t eecd; + uint32_t i; + uint16_t data; + + /* In order to read a register from the EEPROM, we need to shift 'count' + * bits in from the EEPROM. Bits are "shifted in" by raising the clock + * input to the EEPROM (setting the SK bit), and then reading the value of + * the "DO" bit. During this "shifting in" process the "DI" bit should + * always be clear. + */ + + eecd = E1000_READ_REG(hw, EECD); + + eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); + data = 0; + + for(i = 0; i < 16; i++) { + data = data << 1; + e1000_raise_ee_clk(hw, &eecd); + + eecd = E1000_READ_REG(hw, EECD); + + eecd &= ~(E1000_EECD_DI); + if(eecd & E1000_EECD_DO) + data |= 1; + + e1000_lower_ee_clk(hw, &eecd); + } + + return data; +} + +/****************************************************************************** + * Prepares EEPROM for access + * + * hw - Struct containing variables accessed by shared code + * + * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This + * function should be called before issuing a command to the EEPROM. + *****************************************************************************/ +static void +e1000_setup_eeprom(struct e1000_hw *hw) +{ + uint32_t eecd; + + eecd = E1000_READ_REG(hw, EECD); + + /* Clear SK and DI */ + eecd &= ~(E1000_EECD_SK | E1000_EECD_DI); + E1000_WRITE_REG(hw, EECD, eecd); + + /* Set CS */ + eecd |= E1000_EECD_CS; + E1000_WRITE_REG(hw, EECD, eecd); +} + +/****************************************************************************** + * Returns EEPROM to a "standby" state + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +static void +e1000_standby_eeprom(struct e1000_hw *hw) +{ + uint32_t eecd; + + eecd = E1000_READ_REG(hw, EECD); + + /* Deselct EEPROM */ + eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); + + /* Clock high */ + eecd |= E1000_EECD_SK; + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); + + /* Select EEPROM */ + eecd |= E1000_EECD_CS; + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); + + /* Clock low */ + eecd &= ~E1000_EECD_SK; + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); +} + +/****************************************************************************** + * Raises then lowers the EEPROM's clock pin + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +static void +e1000_clock_eeprom(struct e1000_hw *hw) +{ + uint32_t eecd; + + eecd = E1000_READ_REG(hw, EECD); + + /* Rising edge of clock */ + eecd |= E1000_EECD_SK; + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); + + /* Falling edge of clock */ + eecd &= ~E1000_EECD_SK; + E1000_WRITE_REG(hw, EECD, eecd); + E1000_WRITE_FLUSH(hw); + udelay(50); +} + +/****************************************************************************** + * Terminates a command by lowering the EEPROM's chip select pin + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +static void +e1000_cleanup_eeprom(struct e1000_hw *hw) +{ + uint32_t eecd; + + eecd = E1000_READ_REG(hw, EECD); + + eecd &= ~(E1000_EECD_CS | E1000_EECD_DI); + + E1000_WRITE_REG(hw, EECD, eecd); + + e1000_clock_eeprom(hw); +} + +/****************************************************************************** + * Reads a 16 bit word from the EEPROM. + * + * hw - Struct containing variables accessed by shared code + * offset - offset of word in the EEPROM to read + * data - word read from the EEPROM + *****************************************************************************/ +int32_t +e1000_read_eeprom(struct e1000_hw *hw, + uint16_t offset, + uint16_t *data) +{ + uint32_t eecd; + uint32_t i = 0; + boolean_t large_eeprom = FALSE; + + DEBUGFUNC("e1000_read_eeprom"); + + /* Request EEPROM Access */ + if(hw->mac_type > e1000_82544) { + eecd = E1000_READ_REG(hw, EECD); + if(eecd & E1000_EECD_SIZE) large_eeprom = TRUE; + eecd |= E1000_EECD_REQ; + E1000_WRITE_REG(hw, EECD, eecd); + eecd = E1000_READ_REG(hw, EECD); + while((!(eecd & E1000_EECD_GNT)) && (i < 100)) { + i++; + udelay(5); + eecd = E1000_READ_REG(hw, EECD); + } + if(!(eecd & E1000_EECD_GNT)) { + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, EECD, eecd); + DEBUGOUT("Could not acquire EEPROM grant\n"); + return -E1000_ERR_EEPROM; + } + } + + /* Prepare the EEPROM for reading */ + e1000_setup_eeprom(hw); + + /* Send the READ command (opcode + addr) */ + e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE, 3); + if(large_eeprom) { + /* If we have a 256 word EEPROM, there are 8 address bits */ + e1000_shift_out_ee_bits(hw, offset, 8); + } else { + /* If we have a 64 word EEPROM, there are 6 address bits */ + e1000_shift_out_ee_bits(hw, offset, 6); + } + + /* Read the data */ + *data = e1000_shift_in_ee_bits(hw); + + /* End this read operation */ + e1000_standby_eeprom(hw); + + /* Stop requesting EEPROM access */ + if(hw->mac_type > e1000_82544) { + eecd = E1000_READ_REG(hw, EECD); + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, EECD, eecd); + } + + return 0; +} + +/****************************************************************************** + * Verifies that the EEPROM has a valid checksum + * + * hw - Struct containing variables accessed by shared code + * + * Reads the first 64 16 bit words of the EEPROM and sums the values read. + * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is + * valid. + *****************************************************************************/ +int32_t +e1000_validate_eeprom_checksum(struct e1000_hw *hw) +{ + uint16_t checksum = 0; + uint16_t i, eeprom_data; + + DEBUGFUNC("e1000_validate_eeprom_checksum"); + + for(i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) { + if(e1000_read_eeprom(hw, i, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + checksum += eeprom_data; + } + + if(checksum == (uint16_t) EEPROM_SUM) { + return 0; + } else { + DEBUGOUT("EEPROM Checksum Invalid\n"); + return -E1000_ERR_EEPROM; + } +} + +/****************************************************************************** + * Calculates the EEPROM checksum and writes it to the EEPROM + * + * hw - Struct containing variables accessed by shared code + * + * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA. + * Writes the difference to word offset 63 of the EEPROM. + *****************************************************************************/ +int32_t +e1000_update_eeprom_checksum(struct e1000_hw *hw) +{ + uint16_t checksum = 0; + uint16_t i, eeprom_data; + + DEBUGFUNC("e1000_update_eeprom_checksum"); + + for(i = 0; i < EEPROM_CHECKSUM_REG; i++) { + if(e1000_read_eeprom(hw, i, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + checksum += eeprom_data; + } + checksum = (uint16_t) EEPROM_SUM - checksum; + if(e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, checksum) < 0) { + DEBUGOUT("EEPROM Write Error\n"); + return -E1000_ERR_EEPROM; + } + return 0; +} + +/****************************************************************************** + * Writes a 16 bit word to a given offset in the EEPROM. + * + * hw - Struct containing variables accessed by shared code + * offset - offset within the EEPROM to be written to + * data - 16 bit word to be writen to the EEPROM + * + * If e1000_update_eeprom_checksum is not called after this function, the + * EEPROM will most likely contain an invalid checksum. + *****************************************************************************/ +int32_t +e1000_write_eeprom(struct e1000_hw *hw, + uint16_t offset, + uint16_t data) +{ + uint32_t eecd; + uint32_t i = 0; + int32_t status = 0; + boolean_t large_eeprom = FALSE; + + DEBUGFUNC("e1000_write_eeprom"); + + /* Request EEPROM Access */ + if(hw->mac_type > e1000_82544) { + eecd = E1000_READ_REG(hw, EECD); + if(eecd & E1000_EECD_SIZE) large_eeprom = TRUE; + eecd |= E1000_EECD_REQ; + E1000_WRITE_REG(hw, EECD, eecd); + eecd = E1000_READ_REG(hw, EECD); + while((!(eecd & E1000_EECD_GNT)) && (i < 100)) { + i++; + udelay(5); + eecd = E1000_READ_REG(hw, EECD); + } + if(!(eecd & E1000_EECD_GNT)) { + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, EECD, eecd); + DEBUGOUT("Could not acquire EEPROM grant\n"); + return -E1000_ERR_EEPROM; + } + } + + /* Prepare the EEPROM for writing */ + e1000_setup_eeprom(hw); + + /* Send the 9-bit (or 11-bit on large EEPROM) EWEN (write enable) command + * to the EEPROM (5-bit opcode plus 4/6-bit dummy). This puts the EEPROM + * into write/erase mode. + */ + e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE, 5); + if(large_eeprom) + e1000_shift_out_ee_bits(hw, 0, 6); + else + e1000_shift_out_ee_bits(hw, 0, 4); + + /* Prepare the EEPROM */ + e1000_standby_eeprom(hw); + + /* Send the Write command (3-bit opcode + addr) */ + e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE, 3); + if(large_eeprom) + /* If we have a 256 word EEPROM, there are 8 address bits */ + e1000_shift_out_ee_bits(hw, offset, 8); + else + /* If we have a 64 word EEPROM, there are 6 address bits */ + e1000_shift_out_ee_bits(hw, offset, 6); + + /* Send the data */ + e1000_shift_out_ee_bits(hw, data, 16); + + /* Toggle the CS line. This in effect tells to EEPROM to actually execute + * the command in question. + */ + e1000_standby_eeprom(hw); + + /* Now read DO repeatedly until is high (equal to '1'). The EEEPROM will + * signal that the command has been completed by raising the DO signal. + * If DO does not go high in 10 milliseconds, then error out. + */ + for(i = 0; i < 200; i++) { + eecd = E1000_READ_REG(hw, EECD); + if(eecd & E1000_EECD_DO) break; + udelay(50); + } + if(i == 200) { + DEBUGOUT("EEPROM Write did not complete\n"); + status = -E1000_ERR_EEPROM; + } + + /* Recover from write */ + e1000_standby_eeprom(hw); + + /* Send the 9-bit (or 11-bit on large EEPROM) EWDS (write disable) command + * to the EEPROM (5-bit opcode plus 4/6-bit dummy). This takes the EEPROM + * out of write/erase mode. + */ + e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE, 5); + if(large_eeprom) + e1000_shift_out_ee_bits(hw, 0, 6); + else + e1000_shift_out_ee_bits(hw, 0, 4); + + /* Done with writing */ + e1000_cleanup_eeprom(hw); + + /* Stop requesting EEPROM access */ + if(hw->mac_type > e1000_82544) { + eecd = E1000_READ_REG(hw, EECD); + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, EECD, eecd); + } + + return status; +} + +/****************************************************************************** + * Reads the adapter's part number from the EEPROM + * + * hw - Struct containing variables accessed by shared code + * part_num - Adapter's part number + *****************************************************************************/ +int32_t +e1000_read_part_num(struct e1000_hw *hw, + uint32_t *part_num) +{ + uint16_t offset = EEPROM_PBA_BYTE_1; + uint16_t eeprom_data; + + DEBUGFUNC("e1000_read_part_num"); + + /* Get word 0 from EEPROM */ + if(e1000_read_eeprom(hw, offset, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + /* Save word 0 in upper half of part_num */ + *part_num = (uint32_t) (eeprom_data << 16); + + /* Get word 1 from EEPROM */ + if(e1000_read_eeprom(hw, ++offset, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + /* Save word 1 in lower half of part_num */ + *part_num |= eeprom_data; + + return 0; +} + +/****************************************************************************** + * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the + * second function of dual function devices + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +int32_t +e1000_read_mac_addr(struct e1000_hw * hw) +{ + uint16_t offset; + uint16_t eeprom_data, i; + + DEBUGFUNC("e1000_read_mac_addr"); + + for(i = 0; i < NODE_ADDRESS_SIZE; i += 2) { + offset = i >> 1; + if(e1000_read_eeprom(hw, offset, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF); + hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8); + } + if((hw->mac_type == e1000_82546) && + (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { + if(hw->perm_mac_addr[5] & 0x01) + hw->perm_mac_addr[5] &= ~(0x01); + else + hw->perm_mac_addr[5] |= 0x01; + } + for(i = 0; i < NODE_ADDRESS_SIZE; i++) + hw->mac_addr[i] = hw->perm_mac_addr[i]; + return 0; +} + +/****************************************************************************** + * Initializes receive address filters. + * + * hw - Struct containing variables accessed by shared code + * + * Places the MAC address in receive address register 0 and clears the rest + * of the receive addresss registers. Clears the multicast table. Assumes + * the receiver is in reset when the routine is called. + *****************************************************************************/ +void +e1000_init_rx_addrs(struct e1000_hw *hw) +{ + uint32_t i; + uint32_t addr_low; + uint32_t addr_high; + + DEBUGFUNC("e1000_init_rx_addrs"); + + /* Setup the receive address. */ + DEBUGOUT("Programming MAC Address into RAR[0]\n"); + addr_low = (hw->mac_addr[0] | + (hw->mac_addr[1] << 8) | + (hw->mac_addr[2] << 16) | (hw->mac_addr[3] << 24)); + + addr_high = (hw->mac_addr[4] | + (hw->mac_addr[5] << 8) | E1000_RAH_AV); + + E1000_WRITE_REG_ARRAY(hw, RA, 0, addr_low); + E1000_WRITE_REG_ARRAY(hw, RA, 1, addr_high); + + /* Zero out the other 15 receive addresses. */ + DEBUGOUT("Clearing RAR[1-15]\n"); + for(i = 1; i < E1000_RAR_ENTRIES; i++) { + E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); + E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0); + } +} + +/****************************************************************************** + * Updates the MAC's list of multicast addresses. + * + * hw - Struct containing variables accessed by shared code + * mc_addr_list - the list of new multicast addresses + * mc_addr_count - number of addresses + * pad - number of bytes between addresses in the list + * + * The given list replaces any existing list. Clears the last 15 receive + * address registers and the multicast table. Uses receive address registers + * for the first 15 multicast addresses, and hashes the rest into the + * multicast table. + *****************************************************************************/ +void +e1000_mc_addr_list_update(struct e1000_hw *hw, + uint8_t *mc_addr_list, + uint32_t mc_addr_count, + uint32_t pad) +{ + uint32_t hash_value; + uint32_t i; + uint32_t rar_used_count = 1; /* RAR[0] is used for our MAC address */ + + DEBUGFUNC("e1000_mc_addr_list_update"); + + /* Set the new number of MC addresses that we are being requested to use. */ + hw->num_mc_addrs = mc_addr_count; + + /* Clear RAR[1-15] */ + DEBUGOUT(" Clearing RAR[1-15]\n"); + for(i = rar_used_count; i < E1000_RAR_ENTRIES; i++) { + E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); + E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0); + } + + /* Clear the MTA */ + DEBUGOUT(" Clearing MTA\n"); + for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++) { + E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); + } + + /* Add the new addresses */ + for(i = 0; i < mc_addr_count; i++) { + DEBUGOUT(" Adding the multicast addresses:\n"); + DEBUGOUT7(" MC Addr #%d =%.2X %.2X %.2X %.2X %.2X %.2X\n", i, + mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad)], + mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 1], + mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 2], + mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 3], + mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 4], + mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 5]); + + hash_value = e1000_hash_mc_addr(hw, + mc_addr_list + + (i * (ETH_LENGTH_OF_ADDRESS + pad))); + + DEBUGOUT1(" Hash value = 0x%03X\n", hash_value); + + /* Place this multicast address in the RAR if there is room, * + * else put it in the MTA + */ + if(rar_used_count < E1000_RAR_ENTRIES) { + e1000_rar_set(hw, + mc_addr_list + (i * (ETH_LENGTH_OF_ADDRESS + pad)), + rar_used_count); + rar_used_count++; + } else { + e1000_mta_set(hw, hash_value); + } + } + DEBUGOUT("MC Update Complete\n"); +} + +/****************************************************************************** + * Hashes an address to determine its location in the multicast table + * + * hw - Struct containing variables accessed by shared code + * mc_addr - the multicast address to hash + *****************************************************************************/ +uint32_t +e1000_hash_mc_addr(struct e1000_hw *hw, + uint8_t *mc_addr) +{ + uint32_t hash_value = 0; + + /* The portion of the address that is used for the hash table is + * determined by the mc_filter_type setting. + */ + switch (hw->mc_filter_type) { + /* [0] [1] [2] [3] [4] [5] + * 01 AA 00 12 34 56 + * LSB MSB + */ + case 0: + /* [47:36] i.e. 0x563 for above example address */ + hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4)); + break; + case 1: + /* [46:35] i.e. 0xAC6 for above example address */ + hash_value = ((mc_addr[4] >> 3) | (((uint16_t) mc_addr[5]) << 5)); + break; + case 2: + /* [45:34] i.e. 0x5D8 for above example address */ + hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6)); + break; + case 3: + /* [43:32] i.e. 0x634 for above example address */ + hash_value = ((mc_addr[4]) | (((uint16_t) mc_addr[5]) << 8)); + break; + } + + hash_value &= 0xFFF; + return hash_value; +} + +/****************************************************************************** + * Sets the bit in the multicast table corresponding to the hash value. + * + * hw - Struct containing variables accessed by shared code + * hash_value - Multicast address hash value + *****************************************************************************/ +void +e1000_mta_set(struct e1000_hw *hw, + uint32_t hash_value) +{ + uint32_t hash_bit, hash_reg; + uint32_t mta; + uint32_t temp; + + /* The MTA is a register array of 128 32-bit registers. + * It is treated like an array of 4096 bits. We want to set + * bit BitArray[hash_value]. So we figure out what register + * the bit is in, read it, OR in the new bit, then write + * back the new value. The register is determined by the + * upper 7 bits of the hash value and the bit within that + * register are determined by the lower 5 bits of the value. + */ + hash_reg = (hash_value >> 5) & 0x7F; + hash_bit = hash_value & 0x1F; + + mta = E1000_READ_REG_ARRAY(hw, MTA, hash_reg); + + mta |= (1 << hash_bit); + + /* If we are on an 82544 and we are trying to write an odd offset + * in the MTA, save off the previous entry before writing and + * restore the old value after writing. + */ + if((hw->mac_type == e1000_82544) && ((hash_reg & 0x1) == 1)) { + temp = E1000_READ_REG_ARRAY(hw, MTA, (hash_reg - 1)); + E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta); + E1000_WRITE_REG_ARRAY(hw, MTA, (hash_reg - 1), temp); + } else { + E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta); + } +} + +/****************************************************************************** + * Puts an ethernet address into a receive address register. + * + * hw - Struct containing variables accessed by shared code + * addr - Address to put into receive address register + * index - Receive address register to write + *****************************************************************************/ +void +e1000_rar_set(struct e1000_hw *hw, + uint8_t *addr, + uint32_t index) +{ + uint32_t rar_low, rar_high; + + /* HW expects these in little endian so we reverse the byte order + * from network order (big endian) to little endian + */ + rar_low = ((uint32_t) addr[0] | + ((uint32_t) addr[1] << 8) | + ((uint32_t) addr[2] << 16) | ((uint32_t) addr[3] << 24)); + + rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8) | E1000_RAH_AV); + + E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low); + E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high); +} + +/****************************************************************************** + * Writes a value to the specified offset in the VLAN filter table. + * + * hw - Struct containing variables accessed by shared code + * offset - Offset in VLAN filer table to write + * value - Value to write into VLAN filter table + *****************************************************************************/ +void +e1000_write_vfta(struct e1000_hw *hw, + uint32_t offset, + uint32_t value) +{ + uint32_t temp; + + if((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) { + temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1)); + E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value); + E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp); + } else { + E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value); + } +} + +/****************************************************************************** + * Clears the VLAN filer table + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +void +e1000_clear_vfta(struct e1000_hw *hw) +{ + uint32_t offset; + + for(offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) + E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0); +} + +static int32_t +e1000_id_led_init(struct e1000_hw * hw) +{ + uint32_t ledctl; + const uint32_t ledctl_mask = 0x000000FF; + const uint32_t ledctl_on = E1000_LEDCTL_MODE_LED_ON; + const uint32_t ledctl_off = E1000_LEDCTL_MODE_LED_OFF; + uint16_t eeprom_data, i, temp; + const uint16_t led_mask = 0x0F; + + DEBUGFUNC("e1000_id_led_init"); + + if(hw->mac_type < e1000_82540) { + /* Nothing to do */ + return 0; + } + + ledctl = E1000_READ_REG(hw, LEDCTL); + hw->ledctl_default = ledctl; + hw->ledctl_mode1 = hw->ledctl_default; + hw->ledctl_mode2 = hw->ledctl_default; + + if(e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, &eeprom_data) < 0) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } + if((eeprom_data== ID_LED_RESERVED_0000) || + (eeprom_data == ID_LED_RESERVED_FFFF)) eeprom_data = ID_LED_DEFAULT; + for(i = 0; i < 4; i++) { + temp = (eeprom_data >> (i << 2)) & led_mask; + switch(temp) { + case ID_LED_ON1_DEF2: + case ID_LED_ON1_ON2: + case ID_LED_ON1_OFF2: + hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); + hw->ledctl_mode1 |= ledctl_on << (i << 3); + break; + case ID_LED_OFF1_DEF2: + case ID_LED_OFF1_ON2: + case ID_LED_OFF1_OFF2: + hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); + hw->ledctl_mode1 |= ledctl_off << (i << 3); + break; + default: + /* Do nothing */ + break; + } + switch(temp) { + case ID_LED_DEF1_ON2: + case ID_LED_ON1_ON2: + case ID_LED_OFF1_ON2: + hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); + hw->ledctl_mode2 |= ledctl_on << (i << 3); + break; + case ID_LED_DEF1_OFF2: + case ID_LED_ON1_OFF2: + case ID_LED_OFF1_OFF2: + hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); + hw->ledctl_mode2 |= ledctl_off << (i << 3); + break; + default: + /* Do nothing */ + break; + } + } + return 0; +} + +/****************************************************************************** + * Prepares SW controlable LED for use and saves the current state of the LED. + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +int32_t +e1000_setup_led(struct e1000_hw *hw) +{ + uint32_t ledctl; + + DEBUGFUNC("e1000_setup_led"); + + switch(hw->device_id) { + case E1000_DEV_ID_82542: + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + case E1000_DEV_ID_82544EI_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + case E1000_DEV_ID_82544GC_COPPER: + case E1000_DEV_ID_82544GC_LOM: + /* No setup necessary */ + break; + case E1000_DEV_ID_82545EM_FIBER: + case E1000_DEV_ID_82546EB_FIBER: + ledctl = E1000_READ_REG(hw, LEDCTL); + /* Save current LEDCTL settings */ + hw->ledctl_default = ledctl; + /* Turn off LED0 */ + ledctl &= ~(E1000_LEDCTL_LED0_IVRT | + E1000_LEDCTL_LED0_BLINK | + E1000_LEDCTL_LED0_MODE_MASK); + ledctl |= (E1000_LEDCTL_MODE_LED_OFF << E1000_LEDCTL_LED0_MODE_SHIFT); + E1000_WRITE_REG(hw, LEDCTL, ledctl); + break; + case E1000_DEV_ID_82540EP: + case E1000_DEV_ID_82540EP_LOM: + case E1000_DEV_ID_82540EP_LP: + case E1000_DEV_ID_82540EM: + case E1000_DEV_ID_82540EM_LOM: + case E1000_DEV_ID_82545EM_COPPER: + case E1000_DEV_ID_82546EB_COPPER: + E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1); + break; + default: + DEBUGOUT("Invalid device ID\n"); + return -E1000_ERR_CONFIG; + } + return 0; +} + +/****************************************************************************** + * Restores the saved state of the SW controlable LED. + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +int32_t +e1000_cleanup_led(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_cleanup_led"); + + switch(hw->device_id) { + case E1000_DEV_ID_82542: + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + case E1000_DEV_ID_82544EI_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + case E1000_DEV_ID_82544GC_COPPER: + case E1000_DEV_ID_82544GC_LOM: + /* No cleanup necessary */ + break; + case E1000_DEV_ID_82540EP: + case E1000_DEV_ID_82540EP_LOM: + case E1000_DEV_ID_82540EP_LP: + case E1000_DEV_ID_82540EM: + case E1000_DEV_ID_82540EM_LOM: + case E1000_DEV_ID_82545EM_COPPER: + case E1000_DEV_ID_82545EM_FIBER: + case E1000_DEV_ID_82546EB_COPPER: + case E1000_DEV_ID_82546EB_FIBER: + /* Restore LEDCTL settings */ + E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_default); + break; + default: + DEBUGOUT("Invalid device ID\n"); + return -E1000_ERR_CONFIG; + } + return 0; +} + +/****************************************************************************** + * Turns on the software controllable LED + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +int32_t +e1000_led_on(struct e1000_hw *hw) +{ + uint32_t ctrl; + + DEBUGFUNC("e1000_led_on"); + + switch(hw->device_id) { + case E1000_DEV_ID_82542: + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + ctrl = E1000_READ_REG(hw, CTRL); + /* Set SW Defineable Pin 0 to turn on the LED */ + ctrl |= E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, CTRL, ctrl); + break; + case E1000_DEV_ID_82544EI_COPPER: + case E1000_DEV_ID_82544GC_COPPER: + case E1000_DEV_ID_82544GC_LOM: + case E1000_DEV_ID_82545EM_FIBER: + case E1000_DEV_ID_82546EB_FIBER: + ctrl = E1000_READ_REG(hw, CTRL); + /* Clear SW Defineable Pin 0 to turn on the LED */ + ctrl &= ~E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, CTRL, ctrl); + break; + case E1000_DEV_ID_82540EP: + case E1000_DEV_ID_82540EP_LOM: + case E1000_DEV_ID_82540EP_LP: + case E1000_DEV_ID_82540EM: + case E1000_DEV_ID_82540EM_LOM: + case E1000_DEV_ID_82545EM_COPPER: + case E1000_DEV_ID_82546EB_COPPER: + E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode2); + break; + default: + DEBUGOUT("Invalid device ID\n"); + return -E1000_ERR_CONFIG; + } + return 0; +} + +/****************************************************************************** + * Turns off the software controllable LED + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +int32_t +e1000_led_off(struct e1000_hw *hw) +{ + uint32_t ctrl; + + DEBUGFUNC("e1000_led_off"); + + switch(hw->device_id) { + case E1000_DEV_ID_82542: + case E1000_DEV_ID_82543GC_FIBER: + case E1000_DEV_ID_82543GC_COPPER: + case E1000_DEV_ID_82544EI_FIBER: + ctrl = E1000_READ_REG(hw, CTRL); + /* Clear SW Defineable Pin 0 to turn off the LED */ + ctrl &= ~E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, CTRL, ctrl); + break; + case E1000_DEV_ID_82544EI_COPPER: + case E1000_DEV_ID_82544GC_COPPER: + case E1000_DEV_ID_82544GC_LOM: + case E1000_DEV_ID_82545EM_FIBER: + case E1000_DEV_ID_82546EB_FIBER: + ctrl = E1000_READ_REG(hw, CTRL); + /* Set SW Defineable Pin 0 to turn off the LED */ + ctrl |= E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, CTRL, ctrl); + break; + case E1000_DEV_ID_82540EP: + case E1000_DEV_ID_82540EP_LOM: + case E1000_DEV_ID_82540EP_LP: + case E1000_DEV_ID_82540EM: + case E1000_DEV_ID_82540EM_LOM: + case E1000_DEV_ID_82545EM_COPPER: + case E1000_DEV_ID_82546EB_COPPER: + E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1); + break; + default: + DEBUGOUT("Invalid device ID\n"); + return -E1000_ERR_CONFIG; + } + return 0; +} + +/****************************************************************************** + * Clears all hardware statistics counters. + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +void +e1000_clear_hw_cntrs(struct e1000_hw *hw) +{ + volatile uint32_t temp; + + temp = E1000_READ_REG(hw, CRCERRS); + temp = E1000_READ_REG(hw, SYMERRS); + temp = E1000_READ_REG(hw, MPC); + temp = E1000_READ_REG(hw, SCC); + temp = E1000_READ_REG(hw, ECOL); + temp = E1000_READ_REG(hw, MCC); + temp = E1000_READ_REG(hw, LATECOL); + temp = E1000_READ_REG(hw, COLC); + temp = E1000_READ_REG(hw, DC); + temp = E1000_READ_REG(hw, SEC); + temp = E1000_READ_REG(hw, RLEC); + temp = E1000_READ_REG(hw, XONRXC); + temp = E1000_READ_REG(hw, XONTXC); + temp = E1000_READ_REG(hw, XOFFRXC); + temp = E1000_READ_REG(hw, XOFFTXC); + temp = E1000_READ_REG(hw, FCRUC); + temp = E1000_READ_REG(hw, PRC64); + temp = E1000_READ_REG(hw, PRC127); + temp = E1000_READ_REG(hw, PRC255); + temp = E1000_READ_REG(hw, PRC511); + temp = E1000_READ_REG(hw, PRC1023); + temp = E1000_READ_REG(hw, PRC1522); + temp = E1000_READ_REG(hw, GPRC); + temp = E1000_READ_REG(hw, BPRC); + temp = E1000_READ_REG(hw, MPRC); + temp = E1000_READ_REG(hw, GPTC); + temp = E1000_READ_REG(hw, GORCL); + temp = E1000_READ_REG(hw, GORCH); + temp = E1000_READ_REG(hw, GOTCL); + temp = E1000_READ_REG(hw, GOTCH); + temp = E1000_READ_REG(hw, RNBC); + temp = E1000_READ_REG(hw, RUC); + temp = E1000_READ_REG(hw, RFC); + temp = E1000_READ_REG(hw, ROC); + temp = E1000_READ_REG(hw, RJC); + temp = E1000_READ_REG(hw, TORL); + temp = E1000_READ_REG(hw, TORH); + temp = E1000_READ_REG(hw, TOTL); + temp = E1000_READ_REG(hw, TOTH); + temp = E1000_READ_REG(hw, TPR); + temp = E1000_READ_REG(hw, TPT); + temp = E1000_READ_REG(hw, PTC64); + temp = E1000_READ_REG(hw, PTC127); + temp = E1000_READ_REG(hw, PTC255); + temp = E1000_READ_REG(hw, PTC511); + temp = E1000_READ_REG(hw, PTC1023); + temp = E1000_READ_REG(hw, PTC1522); + temp = E1000_READ_REG(hw, MPTC); + temp = E1000_READ_REG(hw, BPTC); + + if(hw->mac_type < e1000_82543) return; + + temp = E1000_READ_REG(hw, ALGNERRC); + temp = E1000_READ_REG(hw, RXERRC); + temp = E1000_READ_REG(hw, TNCRS); + temp = E1000_READ_REG(hw, CEXTERR); + temp = E1000_READ_REG(hw, TSCTC); + temp = E1000_READ_REG(hw, TSCTFC); + + if(hw->mac_type <= e1000_82544) return; + + temp = E1000_READ_REG(hw, MGTPRC); + temp = E1000_READ_REG(hw, MGTPDC); + temp = E1000_READ_REG(hw, MGTPTC); +} + +/****************************************************************************** + * Resets Adaptive IFS to its default state. + * + * hw - Struct containing variables accessed by shared code + * + * Call this after e1000_init_hw. You may override the IFS defaults by setting + * hw->ifs_params_forced to TRUE. However, you must initialize hw-> + * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio + * before calling this function. + *****************************************************************************/ +void +e1000_reset_adaptive(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_reset_adaptive"); + + if(hw->adaptive_ifs) { + if(!hw->ifs_params_forced) { + hw->current_ifs_val = 0; + hw->ifs_min_val = IFS_MIN; + hw->ifs_max_val = IFS_MAX; + hw->ifs_step_size = IFS_STEP; + hw->ifs_ratio = IFS_RATIO; + } + hw->in_ifs_mode = FALSE; + E1000_WRITE_REG(hw, AIT, 0); + } else { + DEBUGOUT("Not in Adaptive IFS mode!\n"); + } +} + +/****************************************************************************** + * Called during the callback/watchdog routine to update IFS value based on + * the ratio of transmits to collisions. + * + * hw - Struct containing variables accessed by shared code + * tx_packets - Number of transmits since last callback + * total_collisions - Number of collisions since last callback + *****************************************************************************/ +void +e1000_update_adaptive(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_update_adaptive"); + + if(hw->adaptive_ifs) { + if((hw->collision_delta * hw->ifs_ratio) > + hw->tx_packet_delta) { + if(hw->tx_packet_delta > MIN_NUM_XMITS) { + hw->in_ifs_mode = TRUE; + if(hw->current_ifs_val < hw->ifs_max_val) { + if(hw->current_ifs_val == 0) + hw->current_ifs_val = hw->ifs_min_val; + else + hw->current_ifs_val += hw->ifs_step_size; + E1000_WRITE_REG(hw, AIT, hw->current_ifs_val); + } + } + } else { + if((hw->in_ifs_mode == TRUE) && + (hw->tx_packet_delta <= MIN_NUM_XMITS)) { + hw->current_ifs_val = 0; + hw->in_ifs_mode = FALSE; + E1000_WRITE_REG(hw, AIT, 0); + } + } + } else { + DEBUGOUT("Not in Adaptive IFS mode!\n"); + } +} + +/****************************************************************************** + * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT + * + * hw - Struct containing variables accessed by shared code + * frame_len - The length of the frame in question + * mac_addr - The Ethernet destination address of the frame in question + *****************************************************************************/ +void +e1000_tbi_adjust_stats(struct e1000_hw *hw, + struct e1000_hw_stats *stats, + uint32_t frame_len, + uint8_t *mac_addr) +{ + uint64_t carry_bit; + + /* First adjust the frame length. */ + frame_len--; + /* We need to adjust the statistics counters, since the hardware + * counters overcount this packet as a CRC error and undercount + * the packet as a good packet + */ + /* This packet should not be counted as a CRC error. */ + stats->crcerrs--; + /* This packet does count as a Good Packet Received. */ + stats->gprc++; + + /* Adjust the Good Octets received counters */ + carry_bit = 0x80000000 & stats->gorcl; + stats->gorcl += frame_len; + /* If the high bit of Gorcl (the low 32 bits of the Good Octets + * Received Count) was one before the addition, + * AND it is zero after, then we lost the carry out, + * need to add one to Gorch (Good Octets Received Count High). + * This could be simplified if all environments supported + * 64-bit integers. + */ + if(carry_bit && ((stats->gorcl & 0x80000000) == 0)) + stats->gorch++; + /* Is this a broadcast or multicast? Check broadcast first, + * since the test for a multicast frame will test positive on + * a broadcast frame. + */ + if((mac_addr[0] == (uint8_t) 0xff) && (mac_addr[1] == (uint8_t) 0xff)) + /* Broadcast packet */ + stats->bprc++; + else if(*mac_addr & 0x01) + /* Multicast packet */ + stats->mprc++; + + if(frame_len == hw->max_frame_size) { + /* In this case, the hardware has overcounted the number of + * oversize frames. + */ + if(stats->roc > 0) + stats->roc--; + } + + /* Adjust the bin counters when the extra byte put the frame in the + * wrong bin. Remember that the frame_len was adjusted above. + */ + if(frame_len == 64) { + stats->prc64++; + stats->prc127--; + } else if(frame_len == 127) { + stats->prc127++; + stats->prc255--; + } else if(frame_len == 255) { + stats->prc255++; + stats->prc511--; + } else if(frame_len == 511) { + stats->prc511++; + stats->prc1023--; + } else if(frame_len == 1023) { + stats->prc1023++; + stats->prc1522--; + } else if(frame_len == 1522) { + stats->prc1522++; + } +} + +/****************************************************************************** + * Gets the current PCI bus type, speed, and width of the hardware + * + * hw - Struct containing variables accessed by shared code + *****************************************************************************/ +void +e1000_get_bus_info(struct e1000_hw *hw) +{ + uint32_t status; + + if(hw->mac_type < e1000_82543) { + hw->bus_type = e1000_bus_type_unknown; + hw->bus_speed = e1000_bus_speed_unknown; + hw->bus_width = e1000_bus_width_unknown; + return; + } + + status = E1000_READ_REG(hw, STATUS); + hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ? + e1000_bus_type_pcix : e1000_bus_type_pci; + if(hw->bus_type == e1000_bus_type_pci) { + hw->bus_speed = (status & E1000_STATUS_PCI66) ? + e1000_bus_speed_66 : e1000_bus_speed_33; + } else { + switch (status & E1000_STATUS_PCIX_SPEED) { + case E1000_STATUS_PCIX_SPEED_66: + hw->bus_speed = e1000_bus_speed_66; + break; + case E1000_STATUS_PCIX_SPEED_100: + hw->bus_speed = e1000_bus_speed_100; + break; + case E1000_STATUS_PCIX_SPEED_133: + hw->bus_speed = e1000_bus_speed_133; + break; + default: + hw->bus_speed = e1000_bus_speed_reserved; + break; + } + } + hw->bus_width = (status & E1000_STATUS_BUS64) ? + e1000_bus_width_64 : e1000_bus_width_32; +} +/****************************************************************************** + * Reads a value from one of the devices registers using port I/O (as opposed + * memory mapped I/O). Only 82544 and newer devices support port I/O. + * + * hw - Struct containing variables accessed by shared code + * offset - offset to read from + *****************************************************************************/ +uint32_t +e1000_read_reg_io(struct e1000_hw *hw, + uint32_t offset) +{ + uint32_t io_addr = hw->io_base; + uint32_t io_data = hw->io_base + 4; + + e1000_io_write(hw, io_addr, offset); + return e1000_io_read(hw, io_data); +} + +/****************************************************************************** + * Writes a value to one of the devices registers using port I/O (as opposed to + * memory mapped I/O). Only 82544 and newer devices support port I/O. + * + * hw - Struct containing variables accessed by shared code + * offset - offset to write to + * value - value to write + *****************************************************************************/ +void +e1000_write_reg_io(struct e1000_hw *hw, + uint32_t offset, + uint32_t value) +{ + uint32_t io_addr = hw->io_base; + uint32_t io_data = hw->io_base + 4; + + e1000_io_write(hw, io_addr, offset); + e1000_io_write(hw, io_data, value); +} + diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_hw.h linux-2.4.19/drivers/net/e1000/e1000_hw.h --- linux-2.4.19.orig/drivers/net/e1000/e1000_hw.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_hw.h Wed Feb 12 12:09:01 2003 @@ -0,0 +1,1789 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* e1000_hw.h + * Structures, enums, and macros for the MAC + */ + +#ifndef _E1000_HW_H_ +#define _E1000_HW_H_ + +#include "e1000_osdep.h" + +/* Forward declarations of structures used by the shared code */ +struct e1000_hw; +struct e1000_hw_stats; + +/* Enumerated types specific to the e1000 hardware */ +/* Media Access Controlers */ +typedef enum { + e1000_undefined = 0, + e1000_82542_rev2_0, + e1000_82542_rev2_1, + e1000_82543, + e1000_82544, + e1000_82540, + e1000_82545, + e1000_82546, + e1000_num_macs +} e1000_mac_type; + +/* Media Types */ +typedef enum { + e1000_media_type_copper = 0, + e1000_media_type_fiber = 1, + e1000_num_media_types +} e1000_media_type; + +typedef enum { + e1000_10_half = 0, + e1000_10_full = 1, + e1000_100_half = 2, + e1000_100_full = 3 +} e1000_speed_duplex_type; + +/* Flow Control Settings */ +typedef enum { + e1000_fc_none = 0, + e1000_fc_rx_pause = 1, + e1000_fc_tx_pause = 2, + e1000_fc_full = 3, + e1000_fc_default = 0xFF +} e1000_fc_type; + +/* PCI bus types */ +typedef enum { + e1000_bus_type_unknown = 0, + e1000_bus_type_pci, + e1000_bus_type_pcix +} e1000_bus_type; + +/* PCI bus speeds */ +typedef enum { + e1000_bus_speed_unknown = 0, + e1000_bus_speed_33, + e1000_bus_speed_66, + e1000_bus_speed_100, + e1000_bus_speed_133, + e1000_bus_speed_reserved +} e1000_bus_speed; + +/* PCI bus widths */ +typedef enum { + e1000_bus_width_unknown = 0, + e1000_bus_width_32, + e1000_bus_width_64 +} e1000_bus_width; + +/* PHY status info structure and supporting enums */ +typedef enum { + e1000_cable_length_50 = 0, + e1000_cable_length_50_80, + e1000_cable_length_80_110, + e1000_cable_length_110_140, + e1000_cable_length_140, + e1000_cable_length_undefined = 0xFF +} e1000_cable_length; + +typedef enum { + e1000_10bt_ext_dist_enable_normal = 0, + e1000_10bt_ext_dist_enable_lower, + e1000_10bt_ext_dist_enable_undefined = 0xFF +} e1000_10bt_ext_dist_enable; + +typedef enum { + e1000_rev_polarity_normal = 0, + e1000_rev_polarity_reversed, + e1000_rev_polarity_undefined = 0xFF +} e1000_rev_polarity; + +typedef enum { + e1000_polarity_reversal_enabled = 0, + e1000_polarity_reversal_disabled, + e1000_polarity_reversal_undefined = 0xFF +} e1000_polarity_reversal; + +typedef enum { + e1000_auto_x_mode_manual_mdi = 0, + e1000_auto_x_mode_manual_mdix, + e1000_auto_x_mode_auto1, + e1000_auto_x_mode_auto2, + e1000_auto_x_mode_undefined = 0xFF +} e1000_auto_x_mode; + +typedef enum { + e1000_1000t_rx_status_not_ok = 0, + e1000_1000t_rx_status_ok, + e1000_1000t_rx_status_undefined = 0xFF +} e1000_1000t_rx_status; + +struct e1000_phy_info { + e1000_cable_length cable_length; + e1000_10bt_ext_dist_enable extended_10bt_distance; + e1000_rev_polarity cable_polarity; + e1000_polarity_reversal polarity_correction; + e1000_auto_x_mode mdix_mode; + e1000_1000t_rx_status local_rx; + e1000_1000t_rx_status remote_rx; +}; + +struct e1000_phy_stats { + uint32_t idle_errors; + uint32_t receive_errors; +}; + + + +/* Error Codes */ +#define E1000_SUCCESS 0 +#define E1000_ERR_EEPROM 1 +#define E1000_ERR_PHY 2 +#define E1000_ERR_CONFIG 3 +#define E1000_ERR_PARAM 4 +#define E1000_ERR_MAC_TYPE 5 + +/* Function prototypes */ +/* Initialization */ +void e1000_reset_hw(struct e1000_hw *hw); +int32_t e1000_init_hw(struct e1000_hw *hw); +int32_t e1000_set_mac_type(struct e1000_hw *hw); + +/* Link Configuration */ +int32_t e1000_setup_link(struct e1000_hw *hw); +int32_t e1000_phy_setup_autoneg(struct e1000_hw *hw); +void e1000_config_collision_dist(struct e1000_hw *hw); +int32_t e1000_config_fc_after_link_up(struct e1000_hw *hw); +int32_t e1000_check_for_link(struct e1000_hw *hw); +void e1000_get_speed_and_duplex(struct e1000_hw *hw, uint16_t * speed, uint16_t * duplex); +int32_t e1000_wait_autoneg(struct e1000_hw *hw); + +/* PHY */ +int32_t e1000_read_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *phy_data); +int32_t e1000_write_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data); +void e1000_phy_hw_reset(struct e1000_hw *hw); +int32_t e1000_phy_reset(struct e1000_hw *hw); +int32_t e1000_detect_gig_phy(struct e1000_hw *hw); +int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); +int32_t e1000_validate_mdi_setting(struct e1000_hw *hw); + +/* EEPROM Functions */ +int32_t e1000_read_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t *data); +int32_t e1000_validate_eeprom_checksum(struct e1000_hw *hw); +int32_t e1000_update_eeprom_checksum(struct e1000_hw *hw); +int32_t e1000_write_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t data); +int32_t e1000_read_part_num(struct e1000_hw *hw, uint32_t * part_num); +int32_t e1000_read_mac_addr(struct e1000_hw * hw); + +/* Filters (multicast, vlan, receive) */ +void e1000_init_rx_addrs(struct e1000_hw *hw); +void e1000_mc_addr_list_update(struct e1000_hw *hw, uint8_t * mc_addr_list, uint32_t mc_addr_count, uint32_t pad); +uint32_t e1000_hash_mc_addr(struct e1000_hw *hw, uint8_t * mc_addr); +void e1000_mta_set(struct e1000_hw *hw, uint32_t hash_value); +void e1000_rar_set(struct e1000_hw *hw, uint8_t * mc_addr, uint32_t rar_index); +void e1000_write_vfta(struct e1000_hw *hw, uint32_t offset, uint32_t value); +void e1000_clear_vfta(struct e1000_hw *hw); + +/* LED functions */ +int32_t e1000_setup_led(struct e1000_hw *hw); +int32_t e1000_cleanup_led(struct e1000_hw *hw); +int32_t e1000_led_on(struct e1000_hw *hw); +int32_t e1000_led_off(struct e1000_hw *hw); + +/* Adaptive IFS Functions */ + +/* Everything else */ +void e1000_clear_hw_cntrs(struct e1000_hw *hw); +void e1000_reset_adaptive(struct e1000_hw *hw); +void e1000_update_adaptive(struct e1000_hw *hw); +void e1000_tbi_adjust_stats(struct e1000_hw *hw, struct e1000_hw_stats *stats, uint32_t frame_len, uint8_t * mac_addr); +void e1000_get_bus_info(struct e1000_hw *hw); +void e1000_pci_set_mwi(struct e1000_hw *hw); +void e1000_pci_clear_mwi(struct e1000_hw *hw); +void e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value); +void e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value); +/* Port I/O is only supported on 82544 and newer */ +uint32_t e1000_io_read(struct e1000_hw *hw, uint32_t port); +uint32_t e1000_read_reg_io(struct e1000_hw *hw, uint32_t offset); +void e1000_io_write(struct e1000_hw *hw, uint32_t port, uint32_t value); +void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, uint32_t value); +#define E1000_READ_REG_IO(a, reg) \ + e1000_read_reg_io((a), E1000_##reg) +#define E1000_WRITE_REG_IO(a, reg, val) \ + e1000_write_reg_io((a), E1000_##reg, val) + +/* PCI Device IDs */ +#define E1000_DEV_ID_82542 0x1000 +#define E1000_DEV_ID_82543GC_FIBER 0x1001 +#define E1000_DEV_ID_82543GC_COPPER 0x1004 +#define E1000_DEV_ID_82544EI_COPPER 0x1008 +#define E1000_DEV_ID_82544EI_FIBER 0x1009 +#define E1000_DEV_ID_82544GC_COPPER 0x100C +#define E1000_DEV_ID_82544GC_LOM 0x100D +#define E1000_DEV_ID_82540EM 0x100E +#define E1000_DEV_ID_82540EM_LOM 0x1015 +#define E1000_DEV_ID_82540EP_LOM 0x1016 +#define E1000_DEV_ID_82540EP 0x1017 +#define E1000_DEV_ID_82540EP_LP 0x101E +#define E1000_DEV_ID_82545EM_COPPER 0x100F +#define E1000_DEV_ID_82545EM_FIBER 0x1011 +#define E1000_DEV_ID_82546EB_COPPER 0x1010 +#define E1000_DEV_ID_82546EB_FIBER 0x1012 +#define NUM_DEV_IDS 16 + +#define NODE_ADDRESS_SIZE 6 +#define ETH_LENGTH_OF_ADDRESS 6 + +/* MAC decode size is 128K - This is the size of BAR0 */ +#define MAC_DECODE_SIZE (128 * 1024) + +#define E1000_82542_2_0_REV_ID 2 +#define E1000_82542_2_1_REV_ID 3 + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define HALF_DUPLEX 1 +#define FULL_DUPLEX 2 + +/* The sizes (in bytes) of a ethernet packet */ +#define ENET_HEADER_SIZE 14 +#define MAXIMUM_ETHERNET_FRAME_SIZE 1518 /* With FCS */ +#define MINIMUM_ETHERNET_FRAME_SIZE 64 /* With FCS */ +#define ETHERNET_FCS_SIZE 4 +#define MAXIMUM_ETHERNET_PACKET_SIZE \ + (MAXIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE) +#define MINIMUM_ETHERNET_PACKET_SIZE \ + (MINIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE) +#define CRC_LENGTH ETHERNET_FCS_SIZE +#define MAX_JUMBO_FRAME_SIZE 0x3F00 + + +/* 802.1q VLAN Packet Sizes */ +#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMAed) */ + +/* Ethertype field values */ +#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ +#define ETHERNET_IP_TYPE 0x0800 /* IP packets */ +#define ETHERNET_ARP_TYPE 0x0806 /* Address Resolution Protocol (ARP) */ + +/* Packet Header defines */ +#define IP_PROTOCOL_TCP 6 +#define IP_PROTOCOL_UDP 0x11 + +/* This defines the bits that are set in the Interrupt Mask + * Set/Read Register. Each bit is documented below: + * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) + * o RXSEQ = Receive Sequence Error + */ +#define POLL_IMS_ENABLE_MASK ( \ + E1000_IMS_RXDMT0 | \ + E1000_IMS_RXSEQ) + +/* This defines the bits that are set in the Interrupt Mask + * Set/Read Register. Each bit is documented below: + * o RXT0 = Receiver Timer Interrupt (ring 0) + * o TXDW = Transmit Descriptor Written Back + * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) + * o RXSEQ = Receive Sequence Error + * o LSC = Link Status Change + */ +#define IMS_ENABLE_MASK ( \ + E1000_IMS_RXT0 | \ + E1000_IMS_TXDW | \ + E1000_IMS_RXDMT0 | \ + E1000_IMS_RXSEQ | \ + E1000_IMS_LSC) + +/* The number of high/low register pairs in the RAR. The RAR (Receive Address + * Registers) holds the directed and multicast addresses that we monitor. We + * reserve one of these spots for our directed address, allowing us room for + * E1000_RAR_ENTRIES - 1 multicast addresses. + */ +#define E1000_RAR_ENTRIES 16 + +#define MIN_NUMBER_OF_DESCRIPTORS 8 +#define MAX_NUMBER_OF_DESCRIPTORS 0xFFF8 + +/* Receive Descriptor */ +struct e1000_rx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + uint16_t length; /* Length of data DMAed into data buffer */ + uint16_t csum; /* Packet checksum */ + uint8_t status; /* Descriptor status */ + uint8_t errors; /* Descriptor Errors */ + uint16_t special; +}; + +/* Receive Decriptor bit definitions */ +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ +#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ +#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ +#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ +#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ +#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ +#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ +#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ +#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ +#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ +#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ +#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ +#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ +#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ +#define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ +#define E1000_RXD_SPC_PRI_SHIFT 0x000D /* Priority is in upper 3 of 16 */ +#define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ +#define E1000_RXD_SPC_CFI_SHIFT 0x000C /* CFI is bit 12 */ + +/* mask to determine if packets should be dropped due to frame errors */ +#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ + E1000_RXD_ERR_CE | \ + E1000_RXD_ERR_SE | \ + E1000_RXD_ERR_SEQ | \ + E1000_RXD_ERR_CXE | \ + E1000_RXD_ERR_RXE) + +/* Transmit Descriptor */ +struct e1000_tx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t cso; /* Checksum offset */ + uint8_t cmd; /* Descriptor control */ + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t css; /* Checksum start */ + uint16_t special; + } fields; + } upper; +}; + +/* Transmit Descriptor bit definitions */ +#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ +#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ +#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ +#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ +#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ +#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ +#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ +#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ +#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ +#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ +#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ +#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ +#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ + +/* Offload Context Descriptor */ +struct e1000_context_desc { + union { + uint32_t ip_config; + struct { + uint8_t ipcss; /* IP checksum start */ + uint8_t ipcso; /* IP checksum offset */ + uint16_t ipcse; /* IP checksum end */ + } ip_fields; + } lower_setup; + union { + uint32_t tcp_config; + struct { + uint8_t tucss; /* TCP checksum start */ + uint8_t tucso; /* TCP checksum offset */ + uint16_t tucse; /* TCP checksum end */ + } tcp_fields; + } upper_setup; + uint32_t cmd_and_length; /* */ + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t hdr_len; /* Header length */ + uint16_t mss; /* Maximum segment size */ + } fields; + } tcp_seg_setup; +}; + +/* Offload data descriptor */ +struct e1000_data_desc { + uint64_t buffer_addr; /* Address of the descriptor's buffer address */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t typ_len_ext; /* */ + uint8_t cmd; /* */ + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t popts; /* Packet Options */ + uint16_t special; /* */ + } fields; + } upper; +}; + +/* Filters */ +#define E1000_NUM_UNICAST 16 /* Unicast filter entries */ +#define E1000_MC_TBL_SIZE 128 /* Multicast Filter Table (4096 bits) */ +#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ + + +/* Receive Address Register */ +struct e1000_rar { + volatile uint32_t low; /* receive address low */ + volatile uint32_t high; /* receive address high */ +}; + +/* The number of entries in the Multicast Table Array (MTA). */ +#define E1000_NUM_MTA_REGISTERS 128 + +/* IPv4 Address Table Entry */ +struct e1000_ipv4_at_entry { + volatile uint32_t ipv4_addr; /* IP Address (RW) */ + volatile uint32_t reserved; +}; + +/* Four wakeup IP addresses are supported */ +#define E1000_WAKEUP_IP_ADDRESS_COUNT_MAX 4 +#define E1000_IP4AT_SIZE E1000_WAKEUP_IP_ADDRESS_COUNT_MAX +#define E1000_IP6AT_SIZE 1 + +/* IPv6 Address Table Entry */ +struct e1000_ipv6_at_entry { + volatile uint8_t ipv6_addr[16]; +}; + +/* Flexible Filter Length Table Entry */ +struct e1000_fflt_entry { + volatile uint32_t length; /* Flexible Filter Length (RW) */ + volatile uint32_t reserved; +}; + +/* Flexible Filter Mask Table Entry */ +struct e1000_ffmt_entry { + volatile uint32_t mask; /* Flexible Filter Mask (RW) */ + volatile uint32_t reserved; +}; + +/* Flexible Filter Value Table Entry */ +struct e1000_ffvt_entry { + volatile uint32_t value; /* Flexible Filter Value (RW) */ + volatile uint32_t reserved; +}; + +/* Four Flexible Filters are supported */ +#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4 + +/* Each Flexible Filter is at most 128 (0x80) bytes in length */ +#define E1000_FLEXIBLE_FILTER_SIZE_MAX 128 + +#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX +#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX +#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX + +/* Register Set. (82543, 82544) + * + * Registers are defined to be 32 bits and should be accessed as 32 bit values. + * These registers are physically located on the NIC, but are mapped into the + * host memory address space. + * + * RW - register is both readable and writable + * RO - register is read only + * WO - register is write only + * R/clr - register is read only and is cleared when read + * A - register array + */ +#define E1000_CTRL 0x00000 /* Device Control - RW */ +#define E1000_STATUS 0x00008 /* Device Status - RO */ +#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ +#define E1000_EERD 0x00014 /* EEPROM Read - RW */ +#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ +#define E1000_MDIC 0x00020 /* MDI Control - RW */ +#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ +#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ +#define E1000_FCT 0x00030 /* Flow Control Type - RW */ +#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ +#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ +#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ +#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ +#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ +#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ +#define E1000_RCTL 0x00100 /* RX Control - RW */ +#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ +#define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ +#define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ +#define E1000_TCTL 0x00400 /* TX Control - RW */ +#define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ +#define E1000_TBT 0x00448 /* TX Burst Timer - RW */ +#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ +#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ +#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ +#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ +#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ +#define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */ +#define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */ +#define E1000_RDLEN 0x02808 /* RX Descriptor Length - RW */ +#define E1000_RDH 0x02810 /* RX Descriptor Head - RW */ +#define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */ +#define E1000_RDTR 0x02820 /* RX Delay Timer - RW */ +#define E1000_RXDCTL 0x02828 /* RX Descriptor Control - RW */ +#define E1000_RADV 0x0282C /* RX Interrupt Absolute Delay Timer - RW */ +#define E1000_RSRPD 0x02C00 /* RX Small Packet Detect - RW */ +#define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ +#define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */ +#define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */ +#define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */ +#define E1000_TDH 0x03810 /* TX Descriptor Head - RW */ +#define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */ +#define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */ +#define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */ +#define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */ +#define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ +#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ +#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ +#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ +#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ +#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ +#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ +#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ +#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ +#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ +#define E1000_COLC 0x04028 /* Collision Count - R/clr */ +#define E1000_DC 0x04030 /* Defer Count - R/clr */ +#define E1000_TNCRS 0x04034 /* TX-No CRS - R/clr */ +#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ +#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ +#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ +#define E1000_XONRXC 0x04048 /* XON RX Count - R/clr */ +#define E1000_XONTXC 0x0404C /* XON TX Count - R/clr */ +#define E1000_XOFFRXC 0x04050 /* XOFF RX Count - R/clr */ +#define E1000_XOFFTXC 0x04054 /* XOFF TX Count - R/clr */ +#define E1000_FCRUC 0x04058 /* Flow Control RX Unsupported Count- R/clr */ +#define E1000_PRC64 0x0405C /* Packets RX (64 bytes) - R/clr */ +#define E1000_PRC127 0x04060 /* Packets RX (65-127 bytes) - R/clr */ +#define E1000_PRC255 0x04064 /* Packets RX (128-255 bytes) - R/clr */ +#define E1000_PRC511 0x04068 /* Packets RX (255-511 bytes) - R/clr */ +#define E1000_PRC1023 0x0406C /* Packets RX (512-1023 bytes) - R/clr */ +#define E1000_PRC1522 0x04070 /* Packets RX (1024-1522 bytes) - R/clr */ +#define E1000_GPRC 0x04074 /* Good Packets RX Count - R/clr */ +#define E1000_BPRC 0x04078 /* Broadcast Packets RX Count - R/clr */ +#define E1000_MPRC 0x0407C /* Multicast Packets RX Count - R/clr */ +#define E1000_GPTC 0x04080 /* Good Packets TX Count - R/clr */ +#define E1000_GORCL 0x04088 /* Good Octets RX Count Low - R/clr */ +#define E1000_GORCH 0x0408C /* Good Octets RX Count High - R/clr */ +#define E1000_GOTCL 0x04090 /* Good Octets TX Count Low - R/clr */ +#define E1000_GOTCH 0x04094 /* Good Octets TX Count High - R/clr */ +#define E1000_RNBC 0x040A0 /* RX No Buffers Count - R/clr */ +#define E1000_RUC 0x040A4 /* RX Undersize Count - R/clr */ +#define E1000_RFC 0x040A8 /* RX Fragment Count - R/clr */ +#define E1000_ROC 0x040AC /* RX Oversize Count - R/clr */ +#define E1000_RJC 0x040B0 /* RX Jabber Count - R/clr */ +#define E1000_MGTPRC 0x040B4 /* Management Packets RX Count - R/clr */ +#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ +#define E1000_MGTPTC 0x040BC /* Management Packets TX Count - R/clr */ +#define E1000_TORL 0x040C0 /* Total Octets RX Low - R/clr */ +#define E1000_TORH 0x040C4 /* Total Octets RX High - R/clr */ +#define E1000_TOTL 0x040C8 /* Total Octets TX Low - R/clr */ +#define E1000_TOTH 0x040CC /* Total Octets TX High - R/clr */ +#define E1000_TPR 0x040D0 /* Total Packets RX - R/clr */ +#define E1000_TPT 0x040D4 /* Total Packets TX - R/clr */ +#define E1000_PTC64 0x040D8 /* Packets TX (64 bytes) - R/clr */ +#define E1000_PTC127 0x040DC /* Packets TX (65-127 bytes) - R/clr */ +#define E1000_PTC255 0x040E0 /* Packets TX (128-255 bytes) - R/clr */ +#define E1000_PTC511 0x040E4 /* Packets TX (256-511 bytes) - R/clr */ +#define E1000_PTC1023 0x040E8 /* Packets TX (512-1023 bytes) - R/clr */ +#define E1000_PTC1522 0x040EC /* Packets TX (1024-1522 Bytes) - R/clr */ +#define E1000_MPTC 0x040F0 /* Multicast Packets TX Count - R/clr */ +#define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */ +#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */ +#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */ +#define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ +#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define E1000_RA 0x05400 /* Receive Address - RW Array */ +#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ +#define E1000_WUC 0x05800 /* Wakeup Control - RW */ +#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ +#define E1000_WUS 0x05810 /* Wakeup Status - RO */ +#define E1000_MANC 0x05820 /* Management Control - RW */ +#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ +#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ +#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ +#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ +#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ +#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ +#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ + +/* Register Set (82542) + * + * Some of the 82542 registers are located at different offsets than they are + * in more current versions of the 8254x. Despite the difference in location, + * the registers function in the same manner. + */ +#define E1000_82542_CTRL E1000_CTRL +#define E1000_82542_STATUS E1000_STATUS +#define E1000_82542_EECD E1000_EECD +#define E1000_82542_EERD E1000_EERD +#define E1000_82542_CTRL_EXT E1000_CTRL_EXT +#define E1000_82542_MDIC E1000_MDIC +#define E1000_82542_FCAL E1000_FCAL +#define E1000_82542_FCAH E1000_FCAH +#define E1000_82542_FCT E1000_FCT +#define E1000_82542_VET E1000_VET +#define E1000_82542_RA 0x00040 +#define E1000_82542_ICR E1000_ICR +#define E1000_82542_ITR E1000_ITR +#define E1000_82542_ICS E1000_ICS +#define E1000_82542_IMS E1000_IMS +#define E1000_82542_IMC E1000_IMC +#define E1000_82542_RCTL E1000_RCTL +#define E1000_82542_RDTR 0x00108 +#define E1000_82542_RDBAL 0x00110 +#define E1000_82542_RDBAH 0x00114 +#define E1000_82542_RDLEN 0x00118 +#define E1000_82542_RDH 0x00120 +#define E1000_82542_RDT 0x00128 +#define E1000_82542_FCRTH 0x00160 +#define E1000_82542_FCRTL 0x00168 +#define E1000_82542_FCTTV E1000_FCTTV +#define E1000_82542_TXCW E1000_TXCW +#define E1000_82542_RXCW E1000_RXCW +#define E1000_82542_MTA 0x00200 +#define E1000_82542_TCTL E1000_TCTL +#define E1000_82542_TIPG E1000_TIPG +#define E1000_82542_TDBAL 0x00420 +#define E1000_82542_TDBAH 0x00424 +#define E1000_82542_TDLEN 0x00428 +#define E1000_82542_TDH 0x00430 +#define E1000_82542_TDT 0x00438 +#define E1000_82542_TIDV 0x00440 +#define E1000_82542_TBT E1000_TBT +#define E1000_82542_AIT E1000_AIT +#define E1000_82542_VFTA 0x00600 +#define E1000_82542_LEDCTL E1000_LEDCTL +#define E1000_82542_PBA E1000_PBA +#define E1000_82542_RXDCTL E1000_RXDCTL +#define E1000_82542_RADV E1000_RADV +#define E1000_82542_RSRPD E1000_RSRPD +#define E1000_82542_TXDMAC E1000_TXDMAC +#define E1000_82542_TXDCTL E1000_TXDCTL +#define E1000_82542_TADV E1000_TADV +#define E1000_82542_TSPMT E1000_TSPMT +#define E1000_82542_CRCERRS E1000_CRCERRS +#define E1000_82542_ALGNERRC E1000_ALGNERRC +#define E1000_82542_SYMERRS E1000_SYMERRS +#define E1000_82542_RXERRC E1000_RXERRC +#define E1000_82542_MPC E1000_MPC +#define E1000_82542_SCC E1000_SCC +#define E1000_82542_ECOL E1000_ECOL +#define E1000_82542_MCC E1000_MCC +#define E1000_82542_LATECOL E1000_LATECOL +#define E1000_82542_COLC E1000_COLC +#define E1000_82542_DC E1000_DC +#define E1000_82542_TNCRS E1000_TNCRS +#define E1000_82542_SEC E1000_SEC +#define E1000_82542_CEXTERR E1000_CEXTERR +#define E1000_82542_RLEC E1000_RLEC +#define E1000_82542_XONRXC E1000_XONRXC +#define E1000_82542_XONTXC E1000_XONTXC +#define E1000_82542_XOFFRXC E1000_XOFFRXC +#define E1000_82542_XOFFTXC E1000_XOFFTXC +#define E1000_82542_FCRUC E1000_FCRUC +#define E1000_82542_PRC64 E1000_PRC64 +#define E1000_82542_PRC127 E1000_PRC127 +#define E1000_82542_PRC255 E1000_PRC255 +#define E1000_82542_PRC511 E1000_PRC511 +#define E1000_82542_PRC1023 E1000_PRC1023 +#define E1000_82542_PRC1522 E1000_PRC1522 +#define E1000_82542_GPRC E1000_GPRC +#define E1000_82542_BPRC E1000_BPRC +#define E1000_82542_MPRC E1000_MPRC +#define E1000_82542_GPTC E1000_GPTC +#define E1000_82542_GORCL E1000_GORCL +#define E1000_82542_GORCH E1000_GORCH +#define E1000_82542_GOTCL E1000_GOTCL +#define E1000_82542_GOTCH E1000_GOTCH +#define E1000_82542_RNBC E1000_RNBC +#define E1000_82542_RUC E1000_RUC +#define E1000_82542_RFC E1000_RFC +#define E1000_82542_ROC E1000_ROC +#define E1000_82542_RJC E1000_RJC +#define E1000_82542_MGTPRC E1000_MGTPRC +#define E1000_82542_MGTPDC E1000_MGTPDC +#define E1000_82542_MGTPTC E1000_MGTPTC +#define E1000_82542_TORL E1000_TORL +#define E1000_82542_TORH E1000_TORH +#define E1000_82542_TOTL E1000_TOTL +#define E1000_82542_TOTH E1000_TOTH +#define E1000_82542_TPR E1000_TPR +#define E1000_82542_TPT E1000_TPT +#define E1000_82542_PTC64 E1000_PTC64 +#define E1000_82542_PTC127 E1000_PTC127 +#define E1000_82542_PTC255 E1000_PTC255 +#define E1000_82542_PTC511 E1000_PTC511 +#define E1000_82542_PTC1023 E1000_PTC1023 +#define E1000_82542_PTC1522 E1000_PTC1522 +#define E1000_82542_MPTC E1000_MPTC +#define E1000_82542_BPTC E1000_BPTC +#define E1000_82542_TSCTC E1000_TSCTC +#define E1000_82542_TSCTFC E1000_TSCTFC +#define E1000_82542_RXCSUM E1000_RXCSUM +#define E1000_82542_WUC E1000_WUC +#define E1000_82542_WUFC E1000_WUFC +#define E1000_82542_WUS E1000_WUS +#define E1000_82542_MANC E1000_MANC +#define E1000_82542_IPAV E1000_IPAV +#define E1000_82542_IP4AT E1000_IP4AT +#define E1000_82542_IP6AT E1000_IP6AT +#define E1000_82542_WUPL E1000_WUPL +#define E1000_82542_WUPM E1000_WUPM +#define E1000_82542_FFLT E1000_FFLT +#define E1000_82542_FFMT E1000_FFMT +#define E1000_82542_FFVT E1000_FFVT + +/* Statistics counters collected by the MAC */ +struct e1000_hw_stats { + uint64_t crcerrs; + uint64_t algnerrc; + uint64_t symerrs; + uint64_t rxerrc; + uint64_t mpc; + uint64_t scc; + uint64_t ecol; + uint64_t mcc; + uint64_t latecol; + uint64_t colc; + uint64_t dc; + uint64_t tncrs; + uint64_t sec; + uint64_t cexterr; + uint64_t rlec; + uint64_t xonrxc; + uint64_t xontxc; + uint64_t xoffrxc; + uint64_t xofftxc; + uint64_t fcruc; + uint64_t prc64; + uint64_t prc127; + uint64_t prc255; + uint64_t prc511; + uint64_t prc1023; + uint64_t prc1522; + uint64_t gprc; + uint64_t bprc; + uint64_t mprc; + uint64_t gptc; + uint64_t gorcl; + uint64_t gorch; + uint64_t gotcl; + uint64_t gotch; + uint64_t rnbc; + uint64_t ruc; + uint64_t rfc; + uint64_t roc; + uint64_t rjc; + uint64_t mgprc; + uint64_t mgpdc; + uint64_t mgptc; + uint64_t torl; + uint64_t torh; + uint64_t totl; + uint64_t toth; + uint64_t tpr; + uint64_t tpt; + uint64_t ptc64; + uint64_t ptc127; + uint64_t ptc255; + uint64_t ptc511; + uint64_t ptc1023; + uint64_t ptc1522; + uint64_t mptc; + uint64_t bptc; + uint64_t tsctc; + uint64_t tsctfc; +}; + +/* Structure containing variables used by the shared code (e1000_hw.c) */ +struct e1000_hw { + uint8_t *hw_addr; + e1000_mac_type mac_type; + e1000_media_type media_type; + void *back; + e1000_fc_type fc; + e1000_bus_speed bus_speed; + e1000_bus_width bus_width; + e1000_bus_type bus_type; + uint32_t io_base; + uint32_t phy_id; + uint32_t phy_revision; + uint32_t phy_addr; + uint32_t original_fc; + uint32_t txcw; + uint32_t autoneg_failed; + uint32_t max_frame_size; + uint32_t min_frame_size; + uint32_t mc_filter_type; + uint32_t num_mc_addrs; + uint32_t collision_delta; + uint32_t tx_packet_delta; + uint32_t ledctl_default; + uint32_t ledctl_mode1; + uint32_t ledctl_mode2; + uint16_t autoneg_advertised; + uint16_t pci_cmd_word; + uint16_t fc_high_water; + uint16_t fc_low_water; + uint16_t fc_pause_time; + uint16_t current_ifs_val; + uint16_t ifs_min_val; + uint16_t ifs_max_val; + uint16_t ifs_step_size; + uint16_t ifs_ratio; + uint16_t device_id; + uint16_t vendor_id; + uint16_t subsystem_id; + uint16_t subsystem_vendor_id; + uint8_t revision_id; + uint8_t autoneg; + uint8_t mdix; + uint8_t forced_speed_duplex; + uint8_t wait_autoneg_complete; + uint8_t dma_fairness; + uint8_t mac_addr[NODE_ADDRESS_SIZE]; + uint8_t perm_mac_addr[NODE_ADDRESS_SIZE]; + boolean_t disable_polarity_correction; + boolean_t get_link_status; + boolean_t tbi_compatibility_en; + boolean_t tbi_compatibility_on; + boolean_t fc_send_xon; + boolean_t report_tx_early; + boolean_t adaptive_ifs; + boolean_t ifs_params_forced; + boolean_t in_ifs_mode; +}; + + +#define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */ +#define E1000_EEPROM_LED_LOGIC 0x0020 /* Led Logic Word */ + +/* Register Bit Masks */ +/* Device Control */ +#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ +#define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */ +#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ +#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ +#define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */ +#define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */ +#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ +#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ +#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ +#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ +#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ +#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ +#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ +#define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */ +#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ +#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ +#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ +#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ +#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ +#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ +#define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ +#define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ +#define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ +#define E1000_CTRL_RST 0x04000000 /* Global reset */ +#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ +#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ +#define E1000_CTRL_RTE 0x20000000 /* Routing tag enable */ +#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ +#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ + +/* Device Status */ +#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ +#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ +#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ +#define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */ +#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ +#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define E1000_STATUS_TBIMODE 0x00000020 /* TBI mode */ +#define E1000_STATUS_SPEED_MASK 0x000000C0 +#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ +#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ +#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +#define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ +#define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ +#define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ +#define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ +#define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ +#define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ + +/* Constants used to intrepret the masked PCI-X bus speed. */ +#define E1000_STATUS_PCIX_SPEED_66 0x00000000 /* PCI-X bus speed 50-66 MHz */ +#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed 66-100 MHz */ +#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */ + +/* EEPROM/Flash Control */ +#define E1000_EECD_SK 0x00000001 /* EEPROM Clock */ +#define E1000_EECD_CS 0x00000002 /* EEPROM Chip Select */ +#define E1000_EECD_DI 0x00000004 /* EEPROM Data In */ +#define E1000_EECD_DO 0x00000008 /* EEPROM Data Out */ +#define E1000_EECD_FWE_MASK 0x00000030 +#define E1000_EECD_FWE_DIS 0x00000010 /* Disable FLASH writes */ +#define E1000_EECD_FWE_EN 0x00000020 /* Enable FLASH writes */ +#define E1000_EECD_FWE_SHIFT 4 +#define E1000_EECD_SIZE 0x00000200 /* EEPROM Size (0=64 word 1=256 word) */ +#define E1000_EECD_REQ 0x00000040 /* EEPROM Access Request */ +#define E1000_EECD_GNT 0x00000080 /* EEPROM Access Grant */ +#define E1000_EECD_PRES 0x00000100 /* EEPROM Present */ + +/* EEPROM Read */ +#define E1000_EERD_START 0x00000001 /* Start Read */ +#define E1000_EERD_DONE 0x00000010 /* Read Done */ +#define E1000_EERD_ADDR_SHIFT 8 +#define E1000_EERD_ADDR_MASK 0x0000FF00 /* Read Address */ +#define E1000_EERD_DATA_SHIFT 16 +#define E1000_EERD_DATA_MASK 0xFFFF0000 /* Read Data */ + +/* Extended Device Control */ +#define E1000_CTRL_EXT_GPI0_EN 0x00000001 /* Maps SDP4 to GPI0 */ +#define E1000_CTRL_EXT_GPI1_EN 0x00000002 /* Maps SDP5 to GPI1 */ +#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN +#define E1000_CTRL_EXT_GPI2_EN 0x00000004 /* Maps SDP6 to GPI2 */ +#define E1000_CTRL_EXT_GPI3_EN 0x00000008 /* Maps SDP7 to GPI3 */ +#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */ +#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */ +#define E1000_CTRL_EXT_PHY_INT E1000_CTRL_EXT_SDP5_DATA +#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */ +#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */ +#define E1000_CTRL_EXT_SDP4_DIR 0x00000100 /* Direction of SDP4 0=in 1=out */ +#define E1000_CTRL_EXT_SDP5_DIR 0x00000200 /* Direction of SDP5 0=in 1=out */ +#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ +#define E1000_CTRL_EXT_SDP7_DIR 0x00000800 /* Direction of SDP7 0=in 1=out */ +#define E1000_CTRL_EXT_ASDCHK 0x00001000 /* Initiate an ASD sequence */ +#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ +#define E1000_CTRL_EXT_IPS 0x00004000 /* Invert Power State */ +#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ +#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 +#define E1000_CTRL_EXT_LINK_MODE_TBI 0x00C00000 +#define E1000_CTRL_EXT_WR_WMARK_MASK 0x03000000 +#define E1000_CTRL_EXT_WR_WMARK_256 0x00000000 +#define E1000_CTRL_EXT_WR_WMARK_320 0x01000000 +#define E1000_CTRL_EXT_WR_WMARK_384 0x02000000 +#define E1000_CTRL_EXT_WR_WMARK_448 0x03000000 + +/* MDI Control */ +#define E1000_MDIC_DATA_MASK 0x0000FFFF +#define E1000_MDIC_REG_MASK 0x001F0000 +#define E1000_MDIC_REG_SHIFT 16 +#define E1000_MDIC_PHY_MASK 0x03E00000 +#define E1000_MDIC_PHY_SHIFT 21 +#define E1000_MDIC_OP_WRITE 0x04000000 +#define E1000_MDIC_OP_READ 0x08000000 +#define E1000_MDIC_READY 0x10000000 +#define E1000_MDIC_INT_EN 0x20000000 +#define E1000_MDIC_ERROR 0x40000000 + +/* LED Control */ +#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F +#define E1000_LEDCTL_LED0_MODE_SHIFT 0 +#define E1000_LEDCTL_LED0_IVRT 0x00000040 +#define E1000_LEDCTL_LED0_BLINK 0x00000080 +#define E1000_LEDCTL_LED1_MODE_MASK 0x00000F00 +#define E1000_LEDCTL_LED1_MODE_SHIFT 8 +#define E1000_LEDCTL_LED1_IVRT 0x00004000 +#define E1000_LEDCTL_LED1_BLINK 0x00008000 +#define E1000_LEDCTL_LED2_MODE_MASK 0x000F0000 +#define E1000_LEDCTL_LED2_MODE_SHIFT 16 +#define E1000_LEDCTL_LED2_IVRT 0x00400000 +#define E1000_LEDCTL_LED2_BLINK 0x00800000 +#define E1000_LEDCTL_LED3_MODE_MASK 0x0F000000 +#define E1000_LEDCTL_LED3_MODE_SHIFT 24 +#define E1000_LEDCTL_LED3_IVRT 0x40000000 +#define E1000_LEDCTL_LED3_BLINK 0x80000000 + +#define E1000_LEDCTL_MODE_LINK_10_1000 0x0 +#define E1000_LEDCTL_MODE_LINK_100_1000 0x1 +#define E1000_LEDCTL_MODE_LINK_UP 0x2 +#define E1000_LEDCTL_MODE_ACTIVITY 0x3 +#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4 +#define E1000_LEDCTL_MODE_LINK_10 0x5 +#define E1000_LEDCTL_MODE_LINK_100 0x6 +#define E1000_LEDCTL_MODE_LINK_1000 0x7 +#define E1000_LEDCTL_MODE_PCIX_MODE 0x8 +#define E1000_LEDCTL_MODE_FULL_DUPLEX 0x9 +#define E1000_LEDCTL_MODE_COLLISION 0xA +#define E1000_LEDCTL_MODE_BUS_SPEED 0xB +#define E1000_LEDCTL_MODE_BUS_SIZE 0xC +#define E1000_LEDCTL_MODE_PAUSED 0xD +#define E1000_LEDCTL_MODE_LED_ON 0xE +#define E1000_LEDCTL_MODE_LED_OFF 0xF + +/* Receive Address */ +#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ + +/* Interrupt Cause Read */ +#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ +#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ +#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ +#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */ +#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ +#define E1000_ICR_RXO 0x00000040 /* rx overrun */ +#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ +#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ +#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ +#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ +#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ +#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ +#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ +#define E1000_ICR_TXD_LOW 0x00008000 +#define E1000_ICR_SRPD 0x00010000 + +/* Interrupt Cause Set */ +#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ +#define E1000_ICS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ +#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ +#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ +#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ +#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ +#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ +#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ +#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ +#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ +#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_ICS_SRPD E1000_ICR_SRPD + +/* Interrupt Mask Set */ +#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ +#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ +#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ +#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ +#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ +#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ +#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ +#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ +#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ +#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_IMS_SRPD E1000_ICR_SRPD + +/* Interrupt Mask Clear */ +#define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */ +#define E1000_IMC_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_IMC_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_IMC_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ +#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ +#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ +#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ +#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ +#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ +#define E1000_IMC_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ +#define E1000_IMC_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ +#define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ +#define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_IMC_SRPD E1000_ICR_SRPD + +/* Receive Control */ +#define E1000_RCTL_RST 0x00000001 /* Software reset */ +#define E1000_RCTL_EN 0x00000002 /* enable */ +#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ +#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ +#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ +#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ +#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ +#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ +#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ +#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ +#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ +#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ +#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ +#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ +#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ +#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ +#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ +#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ +#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ +#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ + +/* Receive Descriptor */ +#define E1000_RDT_DELAY 0x0000ffff /* Delay timer (1=1024us) */ +#define E1000_RDT_FPDB 0x80000000 /* Flush descriptor block */ +#define E1000_RDLEN_LEN 0x0007ff80 /* descriptor length */ +#define E1000_RDH_RDH 0x0000ffff /* receive descriptor head */ +#define E1000_RDT_RDT 0x0000ffff /* receive descriptor tail */ + +/* Flow Control */ +#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ +#define E1000_FCRTH_XFCE 0x80000000 /* External Flow Control Enable */ +#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ +#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ + +/* Receive Descriptor Control */ +#define E1000_RXDCTL_PTHRESH 0x0000003F /* RXDCTL Prefetch Threshold */ +#define E1000_RXDCTL_HTHRESH 0x00003F00 /* RXDCTL Host Threshold */ +#define E1000_RXDCTL_WTHRESH 0x003F0000 /* RXDCTL Writeback Threshold */ +#define E1000_RXDCTL_GRAN 0x01000000 /* RXDCTL Granularity */ + +/* Transmit Descriptor Control */ +#define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */ +#define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */ +#define E1000_TXDCTL_WTHRESH 0x00FF0000 /* TXDCTL Writeback Threshold */ +#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ +#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */ +#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ + +/* Transmit Configuration Word */ +#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ +#define E1000_TXCW_HD 0x00000040 /* TXCW half duplex */ +#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ +#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ +#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ +#define E1000_TXCW_RF 0x00003000 /* TXCW remote fault */ +#define E1000_TXCW_NP 0x00008000 /* TXCW next page */ +#define E1000_TXCW_CW 0x0000ffff /* TxConfigWord mask */ +#define E1000_TXCW_TXC 0x40000000 /* Transmit Config control */ +#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ + +/* Receive Configuration Word */ +#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ +#define E1000_RXCW_NC 0x04000000 /* Receive config no carrier */ +#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ +#define E1000_RXCW_CC 0x10000000 /* Receive config change */ +#define E1000_RXCW_C 0x20000000 /* Receive config */ +#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ +#define E1000_RXCW_ANC 0x80000000 /* Auto-neg complete */ + +/* Transmit Control */ +#define E1000_TCTL_RST 0x00000001 /* software reset */ +#define E1000_TCTL_EN 0x00000002 /* enable tx */ +#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ +#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ +#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ +#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ +#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ +#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ +#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ + +/* Receive Checksum Control */ +#define E1000_RXCSUM_PCSS_MASK 0x000000FF /* Packet Checksum Start */ +#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ +#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ +#define E1000_RXCSUM_IPV6OFL 0x00000400 /* IPv6 checksum offload */ + +/* Definitions for power management and wakeup registers */ +/* Wake Up Control */ +#define E1000_WUC_APME 0x00000001 /* APM Enable */ +#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ +#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ +#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ + +/* Wake Up Filter Control */ +#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ +#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ +#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */ +#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ +#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */ +#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */ +#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */ +#define E1000_WUFC_ALL_FILTERS 0x000F00FF /* Mask for all wakeup filters */ +#define E1000_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */ +#define E1000_WUFC_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */ + +/* Wake Up Status */ +#define E1000_WUS_LNKC 0x00000001 /* Link Status Changed */ +#define E1000_WUS_MAG 0x00000002 /* Magic Packet Received */ +#define E1000_WUS_EX 0x00000004 /* Directed Exact Received */ +#define E1000_WUS_MC 0x00000008 /* Directed Multicast Received */ +#define E1000_WUS_BC 0x00000010 /* Broadcast Received */ +#define E1000_WUS_ARP 0x00000020 /* ARP Request Packet Received */ +#define E1000_WUS_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Received */ +#define E1000_WUS_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Received */ +#define E1000_WUS_FLX0 0x00010000 /* Flexible Filter 0 Match */ +#define E1000_WUS_FLX1 0x00020000 /* Flexible Filter 1 Match */ +#define E1000_WUS_FLX2 0x00040000 /* Flexible Filter 2 Match */ +#define E1000_WUS_FLX3 0x00080000 /* Flexible Filter 3 Match */ +#define E1000_WUS_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */ + +/* Management Control */ +#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ +#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ +#define E1000_MANC_R_ON_FORCE 0x00000004 /* Reset on Force TCO - RO */ +#define E1000_MANC_RMCP_EN 0x00000100 /* Enable RCMP 026Fh Filtering */ +#define E1000_MANC_0298_EN 0x00000200 /* Enable RCMP 0298h Filtering */ +#define E1000_MANC_IPV4_EN 0x00000400 /* Enable IPv4 */ +#define E1000_MANC_IPV6_EN 0x00000800 /* Enable IPv6 */ +#define E1000_MANC_SNAP_EN 0x00001000 /* Accept LLC/SNAP */ +#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ +#define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery + * Filtering */ +#define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ +#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ +#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ +#define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */ +#define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */ +#define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */ +#define E1000_MANC_SMB_DATA_IN 0x08000000 /* SMBus Data In */ +#define E1000_MANC_SMB_DATA_OUT 0x10000000 /* SMBus Data Out */ +#define E1000_MANC_SMB_CLK_OUT 0x20000000 /* SMBus Clock Out */ + +#define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ +#define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ + +/* Wake Up Packet Length */ +#define E1000_WUPL_LENGTH_MASK 0x0FFF /* Only the lower 12 bits are valid */ + +#define E1000_MDALIGN 4096 + +/* EEPROM Commands */ +#define EEPROM_READ_OPCODE 0x6 /* EERPOM read opcode */ +#define EEPROM_WRITE_OPCODE 0x5 /* EERPOM write opcode */ +#define EEPROM_ERASE_OPCODE 0x7 /* EERPOM erase opcode */ +#define EEPROM_EWEN_OPCODE 0x13 /* EERPOM erase/write enable */ +#define EEPROM_EWDS_OPCODE 0x10 /* EERPOM erast/write disable */ + +/* EEPROM Word Offsets */ +#define EEPROM_COMPAT 0x0003 +#define EEPROM_ID_LED_SETTINGS 0x0004 +#define EEPROM_INIT_CONTROL1_REG 0x000A +#define EEPROM_INIT_CONTROL2_REG 0x000F +#define EEPROM_FLASH_VERSION 0x0032 +#define EEPROM_CHECKSUM_REG 0x003F + +/* Word definitions for ID LED Settings */ +#define ID_LED_RESERVED_0000 0x0000 +#define ID_LED_RESERVED_FFFF 0xFFFF +#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ + (ID_LED_OFF1_OFF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_DEF1_DEF2)) +#define ID_LED_DEF1_DEF2 0x1 +#define ID_LED_DEF1_ON2 0x2 +#define ID_LED_DEF1_OFF2 0x3 +#define ID_LED_ON1_DEF2 0x4 +#define ID_LED_ON1_ON2 0x5 +#define ID_LED_ON1_OFF2 0x6 +#define ID_LED_OFF1_DEF2 0x7 +#define ID_LED_OFF1_ON2 0x8 +#define ID_LED_OFF1_OFF2 0x9 + +/* Mask bits for fields in Word 0x03 of the EEPROM */ +#define EEPROM_COMPAT_SERVER 0x0400 +#define EEPROM_COMPAT_CLIENT 0x0200 + +/* Mask bits for fields in Word 0x0a of the EEPROM */ +#define EEPROM_WORD0A_ILOS 0x0010 +#define EEPROM_WORD0A_SWDPIO 0x01E0 +#define EEPROM_WORD0A_LRST 0x0200 +#define EEPROM_WORD0A_FD 0x0400 +#define EEPROM_WORD0A_66MHZ 0x0800 + +/* Mask bits for fields in Word 0x0f of the EEPROM */ +#define EEPROM_WORD0F_PAUSE_MASK 0x3000 +#define EEPROM_WORD0F_PAUSE 0x1000 +#define EEPROM_WORD0F_ASM_DIR 0x2000 +#define EEPROM_WORD0F_ANE 0x0800 +#define EEPROM_WORD0F_SWPDIO_EXT 0x00F0 + +/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ +#define EEPROM_SUM 0xBABA + +/* EEPROM Map defines (WORD OFFSETS)*/ +#define EEPROM_NODE_ADDRESS_BYTE_0 0 +#define EEPROM_PBA_BYTE_1 8 + +/* EEPROM Map Sizes (Byte Counts) */ +#define PBA_SIZE 4 + +/* Collision related configuration parameters */ +#define E1000_COLLISION_THRESHOLD 16 +#define E1000_CT_SHIFT 4 +#define E1000_COLLISION_DISTANCE 64 +#define E1000_FDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE +#define E1000_HDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE +#define E1000_GB_HDX_COLLISION_DISTANCE 512 +#define E1000_COLD_SHIFT 12 + +/* The number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define REQ_RX_DESCRIPTOR_MULTIPLE 8 + +/* Default values for the transmit IPG register */ +#define DEFAULT_82542_TIPG_IPGT 10 +#define DEFAULT_82543_TIPG_IPGT_FIBER 9 +#define DEFAULT_82543_TIPG_IPGT_COPPER 8 + +#define E1000_TIPG_IPGT_MASK 0x000003FF +#define E1000_TIPG_IPGR1_MASK 0x000FFC00 +#define E1000_TIPG_IPGR2_MASK 0x3FF00000 + +#define DEFAULT_82542_TIPG_IPGR1 2 +#define DEFAULT_82543_TIPG_IPGR1 8 +#define E1000_TIPG_IPGR1_SHIFT 10 + +#define DEFAULT_82542_TIPG_IPGR2 10 +#define DEFAULT_82543_TIPG_IPGR2 6 +#define E1000_TIPG_IPGR2_SHIFT 20 + +#define E1000_TXDMAC_DPP 0x00000001 + +/* Adaptive IFS defines */ +#define TX_THRESHOLD_START 8 +#define TX_THRESHOLD_INCREMENT 10 +#define TX_THRESHOLD_DECREMENT 1 +#define TX_THRESHOLD_STOP 190 +#define TX_THRESHOLD_DISABLE 0 +#define TX_THRESHOLD_TIMER_MS 10000 +#define MIN_NUM_XMITS 1000 +#define IFS_MAX 80 +#define IFS_STEP 10 +#define IFS_MIN 40 +#define IFS_RATIO 4 + +/* PBA constants */ +#define E1000_PBA_16K 0x0010 /* 16KB, default TX allocation */ +#define E1000_PBA_24K 0x0018 +#define E1000_PBA_40K 0x0028 +#define E1000_PBA_48K 0x0030 /* 48KB, default RX allocation */ + +/* Flow Control Constants */ +#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 +#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 +#define FLOW_CONTROL_TYPE 0x8808 + +/* The historical defaults for the flow control values are given below. */ +#define FC_DEFAULT_HI_THRESH (0x8000) /* 32KB */ +#define FC_DEFAULT_LO_THRESH (0x4000) /* 16KB */ +#define FC_DEFAULT_TX_TIMER (0x100) /* ~130 us */ + +/* PCIX Config space */ +#define PCIX_COMMAND_REGISTER 0xE6 +#define PCIX_STATUS_REGISTER_LO 0xE8 +#define PCIX_STATUS_REGISTER_HI 0xEA + +#define PCIX_COMMAND_MMRBC_MASK 0x000C +#define PCIX_COMMAND_MMRBC_SHIFT 0x2 +#define PCIX_STATUS_HI_MMRBC_MASK 0x0060 +#define PCIX_STATUS_HI_MMRBC_SHIFT 0x5 +#define PCIX_STATUS_HI_MMRBC_4K 0x3 +#define PCIX_STATUS_HI_MMRBC_2K 0x2 + + +/* The number of bits that we need to shift right to move the "pause" + * bits from the EEPROM (bits 13:12) to the "pause" (bits 8:7) field + * in the TXCW register + */ +#define PAUSE_SHIFT 5 + +/* The number of bits that we need to shift left to move the "SWDPIO" + * bits from the EEPROM (bits 8:5) to the "SWDPIO" (bits 25:22) field + * in the CTRL register + */ +#define SWDPIO_SHIFT 17 + +/* The number of bits that we need to shift left to move the "SWDPIO_EXT" + * bits from the EEPROM word F (bits 7:4) to the bits 11:8 of The + * Extended CTRL register. + * in the CTRL register + */ +#define SWDPIO__EXT_SHIFT 4 + +/* The number of bits that we need to shift left to move the "ILOS" + * bit from the EEPROM (bit 4) to the "ILOS" (bit 7) field + * in the CTRL register + */ +#define ILOS_SHIFT 3 + + +#define RECEIVE_BUFFER_ALIGN_SIZE (256) + +/* The number of milliseconds we wait for auto-negotiation to complete */ +#define LINK_UP_TIMEOUT 500 + +#define E1000_TX_BUFFER_SIZE ((uint32_t)1514) + +/* The carrier extension symbol, as received by the NIC. */ +#define CARRIER_EXTENSION 0x0F + +/* TBI_ACCEPT macro definition: + * + * This macro requires: + * adapter = a pointer to struct e1000_hw + * status = the 8 bit status field of the RX descriptor with EOP set + * error = the 8 bit error field of the RX descriptor with EOP set + * length = the sum of all the length fields of the RX descriptors that + * make up the current frame + * last_byte = the last byte of the frame DMAed by the hardware + * max_frame_length = the maximum frame length we want to accept. + * min_frame_length = the minimum frame length we want to accept. + * + * This macro is a conditional that should be used in the interrupt + * handler's Rx processing routine when RxErrors have been detected. + * + * Typical use: + * ... + * if (TBI_ACCEPT) { + * accept_frame = TRUE; + * e1000_tbi_adjust_stats(adapter, MacAddress); + * frame_length--; + * } else { + * accept_frame = FALSE; + * } + * ... + */ + +#define TBI_ACCEPT(adapter, status, errors, length, last_byte) \ + ((adapter)->tbi_compatibility_on && \ + (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ + ((last_byte) == CARRIER_EXTENSION) && \ + (((status) & E1000_RXD_STAT_VP) ? \ + (((length) > ((adapter)->min_frame_size - VLAN_TAG_SIZE)) && \ + ((length) <= ((adapter)->max_frame_size + 1))) : \ + (((length) > (adapter)->min_frame_size) && \ + ((length) <= ((adapter)->max_frame_size + VLAN_TAG_SIZE + 1))))) + + +/* Structures, enums, and macros for the PHY */ + +/* Bit definitions for the Management Data IO (MDIO) and Management Data + * Clock (MDC) pins in the Device Control Register. + */ +#define E1000_CTRL_PHY_RESET_DIR E1000_CTRL_SWDPIO0 +#define E1000_CTRL_PHY_RESET E1000_CTRL_SWDPIN0 +#define E1000_CTRL_MDIO_DIR E1000_CTRL_SWDPIO2 +#define E1000_CTRL_MDIO E1000_CTRL_SWDPIN2 +#define E1000_CTRL_MDC_DIR E1000_CTRL_SWDPIO3 +#define E1000_CTRL_MDC E1000_CTRL_SWDPIN3 +#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR +#define E1000_CTRL_PHY_RESET4 E1000_CTRL_EXT_SDP4_DATA + +/* PHY 1000 MII Register/Bit Definitions */ +/* PHY Registers defined by IEEE */ +#define PHY_CTRL 0x00 /* Control Register */ +#define PHY_STATUS 0x01 /* Status Regiser */ +#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ +#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ +#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ +#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ +#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ +#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */ +#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ +#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ +#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ +#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ + +/* M88E1000 Specific Registers */ +#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ +#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ +#define M88E1000_INT_ENABLE 0x12 /* Interrupt Enable Register */ +#define M88E1000_INT_STATUS 0x13 /* Interrupt Status Register */ +#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ +#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ + +#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ + +/* PHY Control Register */ +#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ +#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ +#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ +#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ +#define MII_CR_POWER_DOWN 0x0800 /* Power down */ +#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ +#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ +#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ + +/* PHY Status Register */ +#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ +#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ +#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ +#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ +#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ +#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ +#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ +#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ +#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ +#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ +#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ +#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ +#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ +#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ +#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ + +/* Autoneg Advertisement Register */ +#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ +#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ +#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ +#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ +#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ +#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ +#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ +#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ +#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ +#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ + +/* Link Partner Ability Register (Base Page) */ +#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ +#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP is 10T Half Duplex Capable */ +#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP is 10T Full Duplex Capable */ +#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP is 100TX Half Duplex Capable */ +#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP is 100TX Full Duplex Capable */ +#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ +#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ +#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ +#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP has detected Remote Fault */ +#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP has rx'd link code word */ +#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ + +/* Autoneg Expansion Register */ +#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ +#define NWAY_ER_PAGE_RXD 0x0002 /* LP is 10T Half Duplex Capable */ +#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP is 10T Full Duplex Capable */ +#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */ +#define NWAY_ER_PAR_DETECT_FAULT 0x0100 /* LP is 100TX Full Duplex Capable */ + +/* Next Page TX Register */ +#define NPTX_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */ +#define NPTX_TOGGLE 0x0800 /* Toggles between exchanges + * of different NP + */ +#define NPTX_ACKNOWLDGE2 0x1000 /* 1 = will comply with msg + * 0 = cannot comply with msg + */ +#define NPTX_MSG_PAGE 0x2000 /* formatted(1)/unformatted(0) pg */ +#define NPTX_NEXT_PAGE 0x8000 /* 1 = addition NP will follow + * 0 = sending last NP + */ + +/* Link Partner Next Page Register */ +#define LP_RNPR_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */ +#define LP_RNPR_TOGGLE 0x0800 /* Toggles between exchanges + * of different NP + */ +#define LP_RNPR_ACKNOWLDGE2 0x1000 /* 1 = will comply with msg + * 0 = cannot comply with msg + */ +#define LP_RNPR_MSG_PAGE 0x2000 /* formatted(1)/unformatted(0) pg */ +#define LP_RNPR_ACKNOWLDGE 0x4000 /* 1 = ACK / 0 = NO ACK */ +#define LP_RNPR_NEXT_PAGE 0x8000 /* 1 = addition NP will follow + * 0 = sending last NP + */ + +/* 1000BASE-T Control Register */ +#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ +#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ +#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ +#define CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port */ + /* 0=DTE device */ +#define CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */ + /* 0=Configure PHY as Slave */ +#define CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */ + /* 0=Automatic Master/Slave config */ +#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ +#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ +#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ +#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ +#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ + +/* 1000BASE-T Status Register */ +#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle errors since last read */ +#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asymmetric pause direction bit */ +#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ +#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ +#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ +#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ +#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local TX is Master, 0=Slave */ +#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ +#define SR_1000T_REMOTE_RX_STATUS_SHIFT 12 +#define SR_1000T_LOCAL_RX_STATUS_SHIFT 13 + +/* Extended Status Register */ +#define IEEE_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */ +#define IEEE_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */ +#define IEEE_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */ +#define IEEE_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */ + +#define PHY_TX_POLARITY_MASK 0x0100 /* register 10h bit 8 (polarity bit) */ +#define PHY_TX_NORMAL_POLARITY 0 /* register 10h bit 8 (normal polarity) */ + +#define AUTO_POLARITY_DISABLE 0x0010 /* register 11h bit 4 */ + /* (0=enable, 1=disable) */ + +/* M88E1000 PHY Specific Control Register */ +#define M88E1000_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */ +#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */ +#define M88E1000_PSCR_SQE_TEST 0x0004 /* 1=SQE Test enabled */ +#define M88E1000_PSCR_CLK125_DISABLE 0x0010 /* 1=CLK125 low, + * 0=CLK125 toggling + */ +#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 /* MDI Crossover Mode bits 6:5 */ + /* Manual MDI configuration */ +#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ +#define M88E1000_PSCR_AUTO_X_1000T 0x0040 /* 1000BASE-T: Auto crossover, + * 100BASE-TX/10BASE-T: + * MDI Mode + */ +#define M88E1000_PSCR_AUTO_X_MODE 0x0060 /* Auto crossover enabled + * all speeds. + */ +#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE 0x0080 + /* 1=Enable Extended 10BASE-T distance + * (Lower 10BASE-T RX Threshold) + * 0=Normal 10BASE-T RX Threshold */ +#define M88E1000_PSCR_MII_5BIT_ENABLE 0x0100 + /* 1=5-Bit interface in 100BASE-TX + * 0=MII interface in 100BASE-TX */ +#define M88E1000_PSCR_SCRAMBLER_DISABLE 0x0200 /* 1=Scrambler disable */ +#define M88E1000_PSCR_FORCE_LINK_GOOD 0x0400 /* 1=Force link good */ +#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */ + +#define M88E1000_PSCR_POLARITY_REVERSAL_SHIFT 1 +#define M88E1000_PSCR_AUTO_X_MODE_SHIFT 5 +#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT 7 + +/* M88E1000 PHY Specific Status Register */ +#define M88E1000_PSSR_JABBER 0x0001 /* 1=Jabber */ +#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ +#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ +#define M88E1000_PSSR_CABLE_LENGTH 0x0380 /* 0=<50M;1=50-80M;2=80-110M; + * 3=110-140M;4=>140M */ +#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ +#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ +#define M88E1000_PSSR_PAGE_RCVD 0x1000 /* 1=Page received */ +#define M88E1000_PSSR_DPLX 0x2000 /* 1=Duplex 0=Half Duplex */ +#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ +#define M88E1000_PSSR_10MBS 0x0000 /* 00=10Mbs */ +#define M88E1000_PSSR_100MBS 0x4000 /* 01=100Mbs */ +#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ + +#define M88E1000_PSSR_REV_POLARITY_SHIFT 1 +#define M88E1000_PSSR_MDIX_SHIFT 6 +#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 + +/* M88E1000 Extended PHY Specific Control Register */ +#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */ +#define M88E1000_EPSCR_DOWN_NO_IDLE 0x8000 /* 1=Lost lock detect enabled. + * Will assert lost lock and bring + * link down if idle not seen + * within 1ms in 1000BASE-T + */ +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the master */ +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X 0x0400 +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X 0x0800 +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X 0x0C00 +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the slave */ +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS 0x0000 +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X 0x0200 +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X 0x0300 +#define M88E1000_EPSCR_TX_CLK_2_5 0x0060 /* 2.5 MHz TX_CLK */ +#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ +#define M88E1000_EPSCR_TX_CLK_0 0x0000 /* NO TX_CLK */ + +/* Bit definitions for valid PHY IDs. */ +#define M88E1000_E_PHY_ID 0x01410C50 +#define M88E1000_I_PHY_ID 0x01410C30 +#define M88E1011_I_PHY_ID 0x01410C20 +#define M88E1000_12_PHY_ID M88E1000_E_PHY_ID +#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID +#define M88E1011_I_REV_4 0x04 + +/* Miscellaneous PHY bit definitions. */ +#define PHY_PREAMBLE 0xFFFFFFFF +#define PHY_SOF 0x01 +#define PHY_OP_READ 0x02 +#define PHY_OP_WRITE 0x01 +#define PHY_TURNAROUND 0x02 +#define PHY_PREAMBLE_SIZE 32 +#define MII_CR_SPEED_1000 0x0040 +#define MII_CR_SPEED_100 0x2000 +#define MII_CR_SPEED_10 0x0000 +#define E1000_PHY_ADDRESS 0x01 +#define PHY_AUTO_NEG_TIME 45 /* 4.5 Seconds */ +#define PHY_FORCE_TIME 20 /* 2.0 Seconds */ +#define PHY_REVISION_MASK 0xFFFFFFF0 +#define DEVICE_SPEED_MASK 0x00000300 /* Device Ctrl Reg Speed Mask */ +#define REG4_SPEED_MASK 0x01E0 +#define REG9_SPEED_MASK 0x0300 +#define ADVERTISE_10_HALF 0x0001 +#define ADVERTISE_10_FULL 0x0002 +#define ADVERTISE_100_HALF 0x0004 +#define ADVERTISE_100_FULL 0x0008 +#define ADVERTISE_1000_HALF 0x0010 +#define ADVERTISE_1000_FULL 0x0020 +#define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x002F /* Everything but 1000-Half */ + +#endif /* _E1000_HW_H_ */ diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_main.c linux-2.4.19/drivers/net/e1000/e1000_main.c --- linux-2.4.19.orig/drivers/net/e1000/e1000_main.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_main.c Wed Feb 12 12:09:01 2003 @@ -0,0 +1,2287 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#define __E1000_MAIN__ +#include "e1000.h" + +/* Change Log + * + * 4.4.12 10/15/02 + * o Clean up: use members of pci_device rather than direct calls to + * pci_read_config_word. + * o Bug fix: changed default flow control settings. + * o Clean up: ethtool file now has an inclusive list for adapters in the + * Wake-On-LAN capabilities instead of an exclusive list. + * o Bug fix: miscellaneous WoL bug fixes. + * o Added software interrupt for clearing rx ring + * o Bug fix: easier to undo "forcing" of 1000/fd using ethtool. + * o Now setting netdev->mem_end in e1000_probe. + * o Clean up: Moved tx_timeout from interrupt context to process context + * using schedule_task. + * + * 4.3.15 8/9/02 + * o Converted from Dual BSD/GPL license to GPL license. + * o Clean up: use pci_[clear|set]_mwi rather than direct calls to + * pci_write_config_word. + * o Bug fix: added read-behind-write calls to post writes before delays. + * o Bug fix: removed mdelay busy-waits in interrupt context. + * o Clean up: direct clear of descriptor bits rather than using memset. + * o Bug fix: added wmb() for ia-64 between descritor writes and advancing + * descriptor tail. + * o Feature: added locking mechanism for asf functionality. + * o Feature: exposed two Tx and one Rx interrupt delay knobs for finer + * control over interurpt rate tuning. + * o Misc ethtool bug fixes. + * + * 4.3.2 7/5/02 + */ + +char e1000_driver_name[] = "e1000"; +char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; +char e1000_driver_version[] = "4.4.12-k1"; +char e1000_copyright[] = "Copyright (c) 1999-2002 Intel Corporation."; + +/* e1000_pci_tbl - PCI Device ID Table + * + * Private driver_data field (last one) stores an index into e1000_strings + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, String Index } + */ +static struct pci_device_id e1000_pci_tbl[] __devinitdata = { + /* Intel(R) PRO/1000 Network Connection */ + {0x8086, 0x1000, 0x8086, 0x1000, 0, 0, 0}, + {0x8086, 0x1001, 0x8086, 0x1003, 0, 0, 0}, + {0x8086, 0x1004, 0x8086, 0x1004, 0, 0, 0}, + {0x8086, 0x1008, 0x8086, 0x1107, 0, 0, 0}, + {0x8086, 0x1009, 0x8086, 0x1109, 0, 0, 0}, + {0x8086, 0x100C, 0x8086, 0x1112, 0, 0, 0}, + {0x8086, 0x100E, 0x8086, 0x001E, 0, 0, 0}, + /* Compaq Gigabit Ethernet Server Adapter */ + {0x8086, 0x1000, 0x0E11, PCI_ANY_ID, 0, 0, 1}, + {0x8086, 0x1001, 0x0E11, PCI_ANY_ID, 0, 0, 1}, + {0x8086, 0x1004, 0x0E11, PCI_ANY_ID, 0, 0, 1}, + /* IBM Mobile, Desktop & Server Adapters */ + {0x8086, 0x1000, 0x1014, PCI_ANY_ID, 0, 0, 2}, + {0x8086, 0x1001, 0x1014, PCI_ANY_ID, 0, 0, 2}, + {0x8086, 0x1004, 0x1014, PCI_ANY_ID, 0, 0, 2}, + /* Generic */ + {0x8086, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x100C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x100D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x100E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x100F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1011, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1010, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1016, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x1017, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x101E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + /* required last entry */ + {0,} +}; + +MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); + +static char *e1000_strings[] = { + "Intel(R) PRO/1000 Network Connection", + "Compaq Gigabit Ethernet Server Adapter", + "IBM Mobile, Desktop & Server Adapters" +}; + +/* Local Function Prototypes */ + +int e1000_up(struct e1000_adapter *adapter); +void e1000_down(struct e1000_adapter *adapter); +void e1000_reset(struct e1000_adapter *adapter); + +static int e1000_init_module(void); +static void e1000_exit_module(void); +static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); +static void e1000_remove(struct pci_dev *pdev); +static int e1000_sw_init(struct e1000_adapter *adapter); +static int e1000_open(struct net_device *netdev); +static int e1000_close(struct net_device *netdev); +static int e1000_setup_tx_resources(struct e1000_adapter *adapter); +static int e1000_setup_rx_resources(struct e1000_adapter *adapter); +static void e1000_configure_tx(struct e1000_adapter *adapter); +static void e1000_configure_rx(struct e1000_adapter *adapter); +static void e1000_setup_rctl(struct e1000_adapter *adapter); +static void e1000_clean_tx_ring(struct e1000_adapter *adapter); +static void e1000_clean_rx_ring(struct e1000_adapter *adapter); +static void e1000_free_tx_resources(struct e1000_adapter *adapter); +static void e1000_free_rx_resources(struct e1000_adapter *adapter); +static void e1000_set_multi(struct net_device *netdev); +static void e1000_update_phy_info(unsigned long data); +static void e1000_watchdog(unsigned long data); +static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +static struct net_device_stats * e1000_get_stats(struct net_device *netdev); +static int e1000_change_mtu(struct net_device *netdev, int new_mtu); +static int e1000_set_mac(struct net_device *netdev, void *p); +static void e1000_update_stats(struct e1000_adapter *adapter); +static inline void e1000_irq_disable(struct e1000_adapter *adapter); +static inline void e1000_irq_enable(struct e1000_adapter *adapter); +static void e1000_intr(int irq, void *data, struct pt_regs *regs); +static void e1000_clean_tx_irq(struct e1000_adapter *adapter); +static void e1000_clean_rx_irq(struct e1000_adapter *adapter); +static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter); +static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); +static void e1000_enter_82542_rst(struct e1000_adapter *adapter); +static void e1000_leave_82542_rst(struct e1000_adapter *adapter); +static inline void e1000_rx_checksum(struct e1000_adapter *adapter, + struct e1000_rx_desc *rx_desc, + struct sk_buff *skb); +static void e1000_tx_timeout(struct net_device *dev); +static void e1000_tx_timeout_task(struct net_device *dev); + +static void e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp); +static void e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid); +static void e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid); + +static int e1000_notify_reboot(struct notifier_block *, unsigned long event, void *ptr); +static int e1000_notify_netdev(struct notifier_block *, unsigned long event, void *ptr); +static int e1000_suspend(struct pci_dev *pdev, uint32_t state); +#ifdef CONFIG_PM +static int e1000_resume(struct pci_dev *pdev); +#endif + +struct notifier_block e1000_notifier_reboot = { + .notifier_call = e1000_notify_reboot, + .next = NULL, + .priority = 0 +}; + +struct notifier_block e1000_notifier_netdev = { + .notifier_call = e1000_notify_netdev, + .next = NULL, + .priority = 0 +}; + +/* Exported from other modules */ + +extern void e1000_check_options(struct e1000_adapter *adapter); +extern void e1000_proc_dev_setup(struct e1000_adapter *adapter); +extern void e1000_proc_dev_free(struct e1000_adapter *adapter); +extern int e1000_ethtool_ioctl(struct net_device *netdev, struct ifreq *ifr); + +static struct pci_driver e1000_driver = { + .name = e1000_driver_name, + .id_table = e1000_pci_tbl, + .probe = e1000_probe, + .remove = __devexit_p(e1000_remove), + /* Power Managment Hooks */ +#ifdef CONFIG_PM + .suspend = e1000_suspend, + .resume = e1000_resume +#endif +}; + +MODULE_AUTHOR("Intel Corporation, "); +MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver"); +MODULE_LICENSE("GPL"); + +/** + * e1000_init_module - Driver Registration Routine + * + * e1000_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + **/ + +static int __init +e1000_init_module(void) +{ + int ret; + printk(KERN_INFO "%s - version %s\n", + e1000_driver_string, e1000_driver_version); + + printk(KERN_INFO "%s\n", e1000_copyright); + + ret = pci_module_init(&e1000_driver); + if(ret >= 0) { + register_reboot_notifier(&e1000_notifier_reboot); + register_netdevice_notifier(&e1000_notifier_netdev); + } + return ret; +} + +module_init(e1000_init_module); + +/** + * e1000_exit_module - Driver Exit Cleanup Routine + * + * e1000_exit_module is called just before the driver is removed + * from memory. + **/ + +static void __exit +e1000_exit_module(void) +{ + unregister_reboot_notifier(&e1000_notifier_reboot); + unregister_netdevice_notifier(&e1000_notifier_netdev); + pci_unregister_driver(&e1000_driver); +} + +module_exit(e1000_exit_module); + + +int +e1000_up(struct e1000_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + if(request_irq(netdev->irq, &e1000_intr, SA_SHIRQ | SA_SAMPLE_RANDOM, + netdev->name, netdev)) + return -1; + + /* hardware has been reset, we need to reload some things */ + + e1000_set_multi(netdev); + + e1000_configure_tx(adapter); + e1000_setup_rctl(adapter); + e1000_configure_rx(adapter); + e1000_alloc_rx_buffers(adapter); + + mod_timer(&adapter->watchdog_timer, jiffies); + e1000_irq_enable(adapter); + + return 0; +} + +void +e1000_down(struct e1000_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + e1000_irq_disable(adapter); + free_irq(netdev->irq, netdev); + del_timer_sync(&adapter->watchdog_timer); + del_timer_sync(&adapter->phy_info_timer); + adapter->link_speed = 0; + adapter->link_duplex = 0; + netif_carrier_off(netdev); + netif_stop_queue(netdev); + + e1000_reset(adapter); + e1000_clean_tx_ring(adapter); + e1000_clean_rx_ring(adapter); +} + +void +e1000_reset(struct e1000_adapter *adapter) +{ + /* Repartition Pba for greater than 9k mtu + * To take effect CTRL.RST is required. + */ + + if(adapter->rx_buffer_len > E1000_RXBUFFER_8192) + E1000_WRITE_REG(&adapter->hw, PBA, E1000_JUMBO_PBA); + else + E1000_WRITE_REG(&adapter->hw, PBA, E1000_DEFAULT_PBA); + + adapter->hw.fc = adapter->hw.original_fc; + e1000_reset_hw(&adapter->hw); + if(adapter->hw.mac_type >= e1000_82544) + E1000_WRITE_REG(&adapter->hw, WUC, 0); + e1000_init_hw(&adapter->hw); + e1000_reset_adaptive(&adapter->hw); + e1000_phy_get_info(&adapter->hw, &adapter->phy_info); +} + +/** + * e1000_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in e1000_pci_tbl + * + * Returns 0 on success, negative on failure + * + * e1000_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ + +static int __devinit +e1000_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *netdev; + struct e1000_adapter *adapter; + static int cards_found = 0; + unsigned long mmio_start; + int mmio_len; + int pci_using_dac; + int i; + + if((i = pci_enable_device(pdev))) + return i; + + if(!(i = pci_set_dma_mask(pdev, PCI_DMA_64BIT))) { + pci_using_dac = 1; + } else { + if((i = pci_set_dma_mask(pdev, PCI_DMA_32BIT))) { + E1000_ERR("No usable DMA configuration, aborting\n"); + return i; + } + pci_using_dac = 0; + } + + if((i = pci_request_regions(pdev, e1000_driver_name))) + return i; + + pci_set_master(pdev); + + netdev = alloc_etherdev(sizeof(struct e1000_adapter)); + if(!netdev) + goto err_alloc_etherdev; + + SET_MODULE_OWNER(netdev); + + pci_set_drvdata(pdev, netdev); + adapter = netdev->priv; + adapter->netdev = netdev; + adapter->pdev = pdev; + adapter->hw.back = adapter; + + mmio_start = pci_resource_start(pdev, BAR_0); + mmio_len = pci_resource_len(pdev, BAR_0); + + adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); + if(!adapter->hw.hw_addr) + goto err_ioremap; + + for(i = BAR_1; i <= BAR_5; i++) { + if(pci_resource_len(pdev, i) == 0) + continue; + if(pci_resource_flags(pdev, i) & IORESOURCE_IO) { + adapter->hw.io_base = pci_resource_start(pdev, i); + break; + } + } + + netdev->open = &e1000_open; + netdev->stop = &e1000_close; + netdev->hard_start_xmit = &e1000_xmit_frame; + netdev->get_stats = &e1000_get_stats; + netdev->set_multicast_list = &e1000_set_multi; + netdev->set_mac_address = &e1000_set_mac; + netdev->change_mtu = &e1000_change_mtu; + netdev->do_ioctl = &e1000_ioctl; + netdev->tx_timeout = &e1000_tx_timeout; + netdev->watchdog_timeo = HZ; + netdev->vlan_rx_register = e1000_vlan_rx_register; + netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid; + netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid; + + netdev->irq = pdev->irq; + netdev->mem_start = mmio_start; + netdev->mem_end = mmio_start + mmio_len; + netdev->base_addr = adapter->hw.io_base; + + adapter->bd_number = cards_found; + adapter->id_string = e1000_strings[ent->driver_data]; + + /* setup the private structure */ + + if(e1000_sw_init(adapter)) + goto err_sw_init; + + if(adapter->hw.mac_type >= e1000_82543) { + netdev->features = NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_FILTER; + } else { + netdev->features = NETIF_F_SG; + } + + if(pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + + /* make sure the EEPROM is good */ + + if(e1000_validate_eeprom_checksum(&adapter->hw) < 0) { + printk(KERN_ERR "The EEPROM Checksum Is Not Valid\n"); + goto err_eeprom; + } + + /* copy the MAC address out of the EEPROM */ + + e1000_read_mac_addr(&adapter->hw); + memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len); + + if(!is_valid_ether_addr(netdev->dev_addr)) + goto err_eeprom; + + e1000_read_part_num(&adapter->hw, &(adapter->part_num)); + + e1000_get_bus_info(&adapter->hw); + + if((adapter->hw.mac_type == e1000_82544) && + (adapter->hw.bus_type == e1000_bus_type_pcix)) + + adapter->max_data_per_txd = 4096; + else + adapter->max_data_per_txd = MAX_JUMBO_FRAME_SIZE; + + + init_timer(&adapter->watchdog_timer); + adapter->watchdog_timer.function = &e1000_watchdog; + adapter->watchdog_timer.data = (unsigned long) adapter; + + init_timer(&adapter->phy_info_timer); + adapter->phy_info_timer.function = &e1000_update_phy_info; + adapter->phy_info_timer.data = (unsigned long) adapter; + + INIT_TQUEUE(&adapter->tx_timeout_task, + (void (*)(void *))e1000_tx_timeout_task, netdev); + + register_netdev(netdev); + memcpy(adapter->ifname, netdev->name, IFNAMSIZ); + adapter->ifname[IFNAMSIZ-1] = 0; + + /* we're going to reset, so assume we have no link for now */ + + netif_carrier_off(netdev); + netif_stop_queue(netdev); + + printk(KERN_INFO "%s: %s\n", netdev->name, adapter->id_string); + e1000_check_options(adapter); + e1000_proc_dev_setup(adapter); + + /* Initial Wake on LAN setting + * If APM wake is enabled in the EEPROM, + * enable the ACPI Magic Packet filter + */ + + if((adapter->hw.mac_type >= e1000_82544) && + (E1000_READ_REG(&adapter->hw, WUC) & E1000_WUC_APME)) + adapter->wol |= E1000_WUFC_MAG; + + /* reset the hardware with the new settings */ + + e1000_reset(adapter); + + cards_found++; + return 0; + +err_sw_init: +err_eeprom: + iounmap(adapter->hw.hw_addr); +err_ioremap: + pci_release_regions(pdev); + kfree(netdev); +err_alloc_etherdev: + return -ENOMEM; +} + +/** + * e1000_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * e1000_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ + +static void __devexit +e1000_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + uint32_t manc; + + if(adapter->hw.mac_type >= e1000_82540) { + manc = E1000_READ_REG(&adapter->hw, MANC); + if(manc & E1000_MANC_SMBUS_EN) { + manc |= E1000_MANC_ARP_EN; + E1000_WRITE_REG(&adapter->hw, MANC, manc); + } + } + + unregister_netdev(netdev); + + e1000_phy_hw_reset(&adapter->hw); + + e1000_proc_dev_free(adapter); + + iounmap(adapter->hw.hw_addr); + pci_release_regions(pdev); + + kfree(netdev); +} + +/** + * e1000_sw_init - Initialize general software structures (struct e1000_adapter) + * @adapter: board private structure to initialize + * + * e1000_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + **/ + +static int __devinit +e1000_sw_init(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + + /* PCI config space info */ + + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_id = pdev->subsystem_device; + + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word); + + adapter->rx_buffer_len = E1000_RXBUFFER_2048; + hw->max_frame_size = netdev->mtu + + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; + hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE; + + /* identify the MAC */ + + if (e1000_set_mac_type(hw)) { + E1000_ERR("Unknown MAC Type\n"); + return -1; + } + + /* flow control settings */ + + hw->fc_high_water = E1000_FC_HIGH_THRESH; + hw->fc_low_water = E1000_FC_LOW_THRESH; + hw->fc_pause_time = E1000_FC_PAUSE_TIME; + hw->fc_send_xon = 1; + + /* Media type - copper or fiber */ + + if(hw->mac_type >= e1000_82543) { + uint32_t status = E1000_READ_REG(hw, STATUS); + + if(status & E1000_STATUS_TBIMODE) + hw->media_type = e1000_media_type_fiber; + else + hw->media_type = e1000_media_type_copper; + } else { + hw->media_type = e1000_media_type_fiber; + } + + if(hw->mac_type < e1000_82543) + hw->report_tx_early = 0; + else + hw->report_tx_early = 1; + + hw->wait_autoneg_complete = FALSE; + hw->tbi_compatibility_en = TRUE; + hw->adaptive_ifs = TRUE; + + /* Copper options */ + + if(hw->media_type == e1000_media_type_copper) { + hw->mdix = AUTO_ALL_MODES; + hw->disable_polarity_correction = FALSE; + } + + atomic_set(&adapter->irq_sem, 1); + spin_lock_init(&adapter->stats_lock); + + return 0; +} + +/** + * e1000_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + **/ + +static int +e1000_open(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + + /* allocate transmit descriptors */ + + if(e1000_setup_tx_resources(adapter)) + goto err_setup_tx; + + /* allocate receive descriptors */ + + if(e1000_setup_rx_resources(adapter)) + goto err_setup_rx; + + if(e1000_up(adapter)) + goto err_up; + + return 0; + +err_up: + e1000_free_rx_resources(adapter); +err_setup_rx: + e1000_free_tx_resources(adapter); +err_setup_tx: + e1000_reset(adapter); + + return -EBUSY; +} + +/** + * e1000_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + **/ + +static int +e1000_close(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + + e1000_down(adapter); + + e1000_free_tx_resources(adapter); + e1000_free_rx_resources(adapter); + + return 0; +} + +/** + * e1000_setup_tx_resources - allocate Tx resources (Descriptors) + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ + +static int +e1000_setup_tx_resources(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *txdr = &adapter->tx_ring; + struct pci_dev *pdev = adapter->pdev; + int size; + + size = sizeof(struct e1000_buffer) * txdr->count; + txdr->buffer_info = kmalloc(size, GFP_KERNEL); + if(!txdr->buffer_info) { + return -ENOMEM; + } + memset(txdr->buffer_info, 0, size); + + /* round up to nearest 4K */ + + txdr->size = txdr->count * sizeof(struct e1000_tx_desc); + E1000_ROUNDUP(txdr->size, 4096); + + txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma); + if(!txdr->desc) { + kfree(txdr->buffer_info); + return -ENOMEM; + } + memset(txdr->desc, 0, txdr->size); + + txdr->next_to_use = 0; + txdr->next_to_clean = 0; + + return 0; +} + +/** + * e1000_configure_tx - Configure 8254x Transmit Unit after Reset + * @adapter: board private structure + * + * Configure the Tx unit of the MAC after a reset. + **/ + +static void +e1000_configure_tx(struct e1000_adapter *adapter) +{ + uint64_t tdba = adapter->tx_ring.dma; + uint32_t tdlen = adapter->tx_ring.count * sizeof(struct e1000_tx_desc); + uint32_t tctl, tipg; + + E1000_WRITE_REG(&adapter->hw, TDBAL, (tdba & 0x00000000ffffffffULL)); + E1000_WRITE_REG(&adapter->hw, TDBAH, (tdba >> 32)); + + E1000_WRITE_REG(&adapter->hw, TDLEN, tdlen); + + /* Setup the HW Tx Head and Tail descriptor pointers */ + + E1000_WRITE_REG(&adapter->hw, TDH, 0); + E1000_WRITE_REG(&adapter->hw, TDT, 0); + + /* Set the default values for the Tx Inter Packet Gap timer */ + + switch (adapter->hw.mac_type) { + case e1000_82542_rev2_0: + case e1000_82542_rev2_1: + tipg = DEFAULT_82542_TIPG_IPGT; + tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; + tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + break; + default: + if(adapter->hw.media_type == e1000_media_type_fiber) + tipg = DEFAULT_82543_TIPG_IPGT_FIBER; + else + tipg = DEFAULT_82543_TIPG_IPGT_COPPER; + tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; + tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + } + E1000_WRITE_REG(&adapter->hw, TIPG, tipg); + + /* Set the Tx Interrupt Delay register */ + + E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay); + if(adapter->hw.mac_type >= e1000_82540) + E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay); + + /* Program the Transmit Control Register */ + + tctl = E1000_READ_REG(&adapter->hw, TCTL); + + tctl &= ~E1000_TCTL_CT; + tctl |= E1000_TCTL_EN | E1000_TCTL_PSP | + (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); + + E1000_WRITE_REG(&adapter->hw, TCTL, tctl); + + e1000_config_collision_dist(&adapter->hw); + + /* Setup Transmit Descriptor Settings for this adapter */ + adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_IDE; + + if(adapter->hw.report_tx_early == 1) + adapter->txd_cmd |= E1000_TXD_CMD_RS; + else + adapter->txd_cmd |= E1000_TXD_CMD_RPS; +} + +/** + * e1000_setup_rx_resources - allocate Rx resources (Descriptors) + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ + +static int +e1000_setup_rx_resources(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *rxdr = &adapter->rx_ring; + struct pci_dev *pdev = adapter->pdev; + int size; + + size = sizeof(struct e1000_buffer) * rxdr->count; + rxdr->buffer_info = kmalloc(size, GFP_KERNEL); + if(!rxdr->buffer_info) { + return -ENOMEM; + } + memset(rxdr->buffer_info, 0, size); + + /* Round up to nearest 4K */ + + rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc); + E1000_ROUNDUP(rxdr->size, 4096); + + rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma); + + if(!rxdr->desc) { + kfree(rxdr->buffer_info); + return -ENOMEM; + } + memset(rxdr->desc, 0, rxdr->size); + + rxdr->next_to_clean = 0; + rxdr->next_to_use = 0; + + return 0; +} + +/** + * e1000_setup_rctl - configure the receive control register + * @adapter: Board private structure + **/ + +static void +e1000_setup_rctl(struct e1000_adapter *adapter) +{ + uint32_t rctl; + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | + E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT); + + if(adapter->hw.tbi_compatibility_on == 1) + rctl |= E1000_RCTL_SBP; + else + rctl &= ~E1000_RCTL_SBP; + + rctl &= ~(E1000_RCTL_SZ_4096); + switch (adapter->rx_buffer_len) { + case E1000_RXBUFFER_2048: + default: + rctl |= E1000_RCTL_SZ_2048; + rctl &= ~(E1000_RCTL_BSEX | E1000_RCTL_LPE); + break; + case E1000_RXBUFFER_4096: + rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE; + break; + case E1000_RXBUFFER_8192: + rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE; + break; + case E1000_RXBUFFER_16384: + rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE; + break; + } + + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); +} + +/** + * e1000_configure_rx - Configure 8254x Receive Unit after Reset + * @adapter: board private structure + * + * Configure the Rx unit of the MAC after a reset. + **/ + +static void +e1000_configure_rx(struct e1000_adapter *adapter) +{ + uint64_t rdba = adapter->rx_ring.dma; + uint32_t rdlen = adapter->rx_ring.count * sizeof(struct e1000_rx_desc); + uint32_t rctl; + uint32_t rxcsum; + + /* make sure receives are disabled while setting up the descriptors */ + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + E1000_WRITE_REG(&adapter->hw, RCTL, rctl & ~E1000_RCTL_EN); + + /* set the Receive Delay Timer Register */ + + E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay); + + if(adapter->hw.mac_type >= e1000_82540) { + E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay); + + /* Set the interrupt throttling rate. Value is calculated + * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ +#define MAX_INTS_PER_SEC 8000 +#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) + E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR); + } + + /* Setup the Base and Length of the Rx Descriptor Ring */ + + E1000_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL)); + E1000_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32)); + + E1000_WRITE_REG(&adapter->hw, RDLEN, rdlen); + + /* Setup the HW Rx Head and Tail Descriptor Pointers */ + E1000_WRITE_REG(&adapter->hw, RDH, 0); + E1000_WRITE_REG(&adapter->hw, RDT, 0); + + /* Enable 82543 Receive Checksum Offload for TCP and UDP */ + if((adapter->hw.mac_type >= e1000_82543) && + (adapter->rx_csum == TRUE)) { + rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM); + rxcsum |= E1000_RXCSUM_TUOFL; + E1000_WRITE_REG(&adapter->hw, RXCSUM, rxcsum); + } + + /* Enable Receives */ + + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); +} + +/** + * e1000_free_tx_resources - Free Tx Resources + * @adapter: board private structure + * + * Free all transmit software resources + **/ + +static void +e1000_free_tx_resources(struct e1000_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + + e1000_clean_tx_ring(adapter); + + kfree(adapter->tx_ring.buffer_info); + adapter->tx_ring.buffer_info = NULL; + + pci_free_consistent(pdev, adapter->tx_ring.size, + adapter->tx_ring.desc, adapter->tx_ring.dma); + + adapter->tx_ring.desc = NULL; +} + +/** + * e1000_clean_tx_ring - Free Tx Buffers + * @adapter: board private structure + **/ + +static void +e1000_clean_tx_ring(struct e1000_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + unsigned long size; + int i; + + /* Free all the Tx ring sk_buffs */ + + for(i = 0; i < adapter->tx_ring.count; i++) { + if(adapter->tx_ring.buffer_info[i].skb) { + + pci_unmap_page(pdev, + adapter->tx_ring.buffer_info[i].dma, + adapter->tx_ring.buffer_info[i].length, + PCI_DMA_TODEVICE); + + dev_kfree_skb(adapter->tx_ring.buffer_info[i].skb); + + adapter->tx_ring.buffer_info[i].skb = NULL; + } + } + + size = sizeof(struct e1000_buffer) * adapter->tx_ring.count; + memset(adapter->tx_ring.buffer_info, 0, size); + + /* Zero out the descriptor ring */ + + memset(adapter->tx_ring.desc, 0, adapter->tx_ring.size); + + adapter->tx_ring.next_to_use = 0; + adapter->tx_ring.next_to_clean = 0; + + E1000_WRITE_REG(&adapter->hw, TDH, 0); + E1000_WRITE_REG(&adapter->hw, TDT, 0); +} + +/** + * e1000_free_rx_resources - Free Rx Resources + * @adapter: board private structure + * + * Free all receive software resources + **/ + +static void +e1000_free_rx_resources(struct e1000_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + + e1000_clean_rx_ring(adapter); + + kfree(adapter->rx_ring.buffer_info); + adapter->rx_ring.buffer_info = NULL; + + pci_free_consistent(pdev, adapter->rx_ring.size, + adapter->rx_ring.desc, adapter->rx_ring.dma); + + adapter->rx_ring.desc = NULL; +} + +/** + * e1000_clean_rx_ring - Free Rx Buffers + * @adapter: board private structure + **/ + +static void +e1000_clean_rx_ring(struct e1000_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + unsigned long size; + int i; + + /* Free all the Rx ring sk_buffs */ + + for(i = 0; i < adapter->rx_ring.count; i++) { + if(adapter->rx_ring.buffer_info[i].skb) { + + pci_unmap_single(pdev, + adapter->rx_ring.buffer_info[i].dma, + adapter->rx_ring.buffer_info[i].length, + PCI_DMA_FROMDEVICE); + + dev_kfree_skb(adapter->rx_ring.buffer_info[i].skb); + + adapter->rx_ring.buffer_info[i].skb = NULL; + } + } + + size = sizeof(struct e1000_buffer) * adapter->rx_ring.count; + memset(adapter->rx_ring.buffer_info, 0, size); + + /* Zero out the descriptor ring */ + + memset(adapter->rx_ring.desc, 0, adapter->rx_ring.size); + + adapter->rx_ring.next_to_clean = 0; + adapter->rx_ring.next_to_use = 0; + + E1000_WRITE_REG(&adapter->hw, RDH, 0); + E1000_WRITE_REG(&adapter->hw, RDT, 0); +} + +/* The 82542 2.0 (revision 2) needs to have the receive unit in reset + * and memory write and invalidate disabled for certain operations + */ +static void +e1000_enter_82542_rst(struct e1000_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + uint32_t rctl; + + e1000_pci_clear_mwi(&adapter->hw); + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl |= E1000_RCTL_RST; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + E1000_WRITE_FLUSH(&adapter->hw); + mdelay(5); + + if(netif_running(netdev)) + e1000_clean_rx_ring(adapter); +} + +static void +e1000_leave_82542_rst(struct e1000_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + uint32_t rctl; + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl &= ~E1000_RCTL_RST; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + E1000_WRITE_FLUSH(&adapter->hw); + mdelay(5); + + if(adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE) + e1000_pci_set_mwi(&adapter->hw); + + if(netif_running(netdev)) { + e1000_configure_rx(adapter); + e1000_alloc_rx_buffers(adapter); + } +} + +/** + * e1000_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ + +static int +e1000_set_mac(struct net_device *netdev, void *p) +{ + struct e1000_adapter *adapter = netdev->priv; + struct sockaddr *addr = p; + + /* 82542 2.0 needs to be in reset to write receive address registers */ + + if(adapter->hw.mac_type == e1000_82542_rev2_0) + e1000_enter_82542_rst(adapter); + + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + memcpy(adapter->hw.mac_addr, addr->sa_data, netdev->addr_len); + + e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0); + + if(adapter->hw.mac_type == e1000_82542_rev2_0) + e1000_leave_82542_rst(adapter); + + return 0; +} + +/** + * e1000_set_multi - Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_multi entry point is called whenever the multicast address + * list or the network interface flags are updated. This routine is + * resposible for configuring the hardware for proper multicast, + * promiscuous mode, and all-multi behavior. + **/ + +static void +e1000_set_multi(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + struct e1000_hw *hw = &adapter->hw; + struct dev_mc_list *mc_ptr; + uint32_t rctl; + uint32_t hash_value; + int i; + + /* Check for Promiscuous and All Multicast modes */ + + rctl = E1000_READ_REG(hw, RCTL); + + if(netdev->flags & IFF_PROMISC) { + rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); + } else if(netdev->flags & IFF_ALLMULTI) { + rctl |= E1000_RCTL_MPE; + rctl &= ~E1000_RCTL_UPE; + } else { + rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE); + } + + E1000_WRITE_REG(hw, RCTL, rctl); + + /* 82542 2.0 needs to be in reset to write receive address registers */ + + if(hw->mac_type == e1000_82542_rev2_0) + e1000_enter_82542_rst(adapter); + + /* load the first 15 multicast address into the exact filters 1-15 + * RAR 0 is used for the station MAC adddress + * if there are not 15 addresses, go ahead and clear the filters + */ + mc_ptr = netdev->mc_list; + + for(i = 1; i < E1000_RAR_ENTRIES; i++) { + if(mc_ptr) { + e1000_rar_set(hw, mc_ptr->dmi_addr, i); + mc_ptr = mc_ptr->next; + } else { + E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0); + E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0); + } + } + + /* clear the old settings from the multicast hash table */ + + for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++) + E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); + + /* load any remaining addresses into the hash table */ + + for(; mc_ptr; mc_ptr = mc_ptr->next) { + hash_value = e1000_hash_mc_addr(hw, mc_ptr->dmi_addr); + e1000_mta_set(hw, hash_value); + } + + if(hw->mac_type == e1000_82542_rev2_0) + e1000_leave_82542_rst(adapter); +} + + +/* need to wait a few seconds after link up to get diagnostic information from the phy */ + +static void +e1000_update_phy_info(unsigned long data) +{ + struct e1000_adapter *adapter = (struct e1000_adapter *) data; + e1000_phy_get_info(&adapter->hw, &adapter->phy_info); +} + +/** + * e1000_watchdog - Timer Call-back + * @data: pointer to netdev cast into an unsigned long + **/ + +static void +e1000_watchdog(unsigned long data) +{ + struct e1000_adapter *adapter = (struct e1000_adapter *) data; + struct net_device *netdev = adapter->netdev; + struct e1000_desc_ring *txdr = &adapter->tx_ring; + int i; + + e1000_check_for_link(&adapter->hw); + + if(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) { + if(!netif_carrier_ok(netdev)) { + e1000_get_speed_and_duplex(&adapter->hw, + &adapter->link_speed, + &adapter->link_duplex); + + printk(KERN_INFO + "e1000: %s NIC Link is Up %d Mbps %s\n", + netdev->name, adapter->link_speed, + adapter->link_duplex == FULL_DUPLEX ? + "Full Duplex" : "Half Duplex"); + + netif_carrier_on(netdev); + netif_wake_queue(netdev); + mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); + } + } else { + if(netif_carrier_ok(netdev)) { + adapter->link_speed = 0; + adapter->link_duplex = 0; + printk(KERN_INFO + "e1000: %s NIC Link is Down\n", + netdev->name); + netif_carrier_off(netdev); + netif_stop_queue(netdev); + mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); + } + } + + e1000_update_stats(adapter); + e1000_update_adaptive(&adapter->hw); + + + /* Cause software interrupt to ensure rx ring is cleaned */ + E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0); + + /* Early detection of hung controller */ + i = txdr->next_to_clean; + if(txdr->buffer_info[i].dma && + time_after(jiffies, txdr->buffer_info[i].time_stamp + HZ) && + !(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF)) + netif_stop_queue(netdev); + + /* Reset the timer */ + mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ); +} + +#define E1000_TX_FLAGS_CSUM 0x00000001 +#define E1000_TX_FLAGS_VLAN 0x00000002 +#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 +#define E1000_TX_FLAGS_VLAN_SHIFT 16 + +static inline boolean_t +e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb) +{ + struct e1000_context_desc *context_desc; + int i; + uint8_t css, cso; + + if(skb->ip_summed == CHECKSUM_HW) { + css = skb->h.raw - skb->data; + cso = (skb->h.raw + skb->csum) - skb->data; + + i = adapter->tx_ring.next_to_use; + context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i); + + context_desc->upper_setup.tcp_fields.tucss = css; + context_desc->upper_setup.tcp_fields.tucso = cso; + context_desc->upper_setup.tcp_fields.tucse = 0; + context_desc->tcp_seg_setup.data = 0; + context_desc->cmd_and_length = + cpu_to_le32(adapter->txd_cmd | E1000_TXD_CMD_DEXT); + + i = (i + 1) % adapter->tx_ring.count; + adapter->tx_ring.next_to_use = i; + + return TRUE; + } + + return FALSE; +} + +static inline int +e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb) +{ + struct e1000_desc_ring *tx_ring = &adapter->tx_ring; + int len, offset, size, count, i; + + int f; + len = skb->len - skb->data_len; + + i = (tx_ring->next_to_use + tx_ring->count - 1) % tx_ring->count; + count = 0; + + offset = 0; + + while(len) { + i = (i + 1) % tx_ring->count; + size = min(len, adapter->max_data_per_txd); + tx_ring->buffer_info[i].length = size; + tx_ring->buffer_info[i].dma = + pci_map_single(adapter->pdev, + skb->data + offset, + size, + PCI_DMA_TODEVICE); + tx_ring->buffer_info[i].time_stamp = jiffies; + + len -= size; + offset += size; + count++; + } + + for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) { + struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[f]; + len = frag->size; + offset = 0; + + while(len) { + i = (i + 1) % tx_ring->count; + size = min(len, adapter->max_data_per_txd); + tx_ring->buffer_info[i].length = size; + tx_ring->buffer_info[i].dma = + pci_map_page(adapter->pdev, + frag->page, + frag->page_offset + offset, + size, + PCI_DMA_TODEVICE); + + len -= size; + offset += size; + count++; + } + } + tx_ring->buffer_info[i].skb = skb; + + return count; +} + +static inline void +e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags) +{ + struct e1000_desc_ring *tx_ring = &adapter->tx_ring; + struct e1000_tx_desc *tx_desc = NULL; + uint32_t txd_upper, txd_lower; + int i; + + txd_upper = 0; + txd_lower = adapter->txd_cmd; + + if(tx_flags & E1000_TX_FLAGS_CSUM) { + txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + txd_upper |= E1000_TXD_POPTS_TXSM << 8; + } + + if(tx_flags & E1000_TX_FLAGS_VLAN) { + txd_lower |= E1000_TXD_CMD_VLE; + txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); + } + + i = tx_ring->next_to_use; + + while(count--) { + tx_desc = E1000_TX_DESC(*tx_ring, i); + tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma); + tx_desc->lower.data = + cpu_to_le32(txd_lower | tx_ring->buffer_info[i].length); + tx_desc->upper.data = cpu_to_le32(txd_upper); + i = (i + 1) % tx_ring->count; + } + + tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP); + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). */ + wmb(); + + tx_ring->next_to_use = i; + E1000_WRITE_REG(&adapter->hw, TDT, i); +} + +#define TXD_USE_COUNT(S, X) (((S) / (X)) + (((S) % (X)) ? 1 : 0)) + +static int +e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + int tx_flags = 0, count; + + int f; + + count = TXD_USE_COUNT(skb->len - skb->data_len, + adapter->max_data_per_txd); + for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, + adapter->max_data_per_txd); + + if(skb->ip_summed == CHECKSUM_HW) + count++; + + if(E1000_DESC_UNUSED(&adapter->tx_ring) < count) { + netif_stop_queue(netdev); + return 1; + } + + if(e1000_tx_csum(adapter, skb)) + tx_flags |= E1000_TX_FLAGS_CSUM; + + if(adapter->vlgrp && vlan_tx_tag_present(skb)) { + tx_flags |= E1000_TX_FLAGS_VLAN; + tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT); + } + + count = e1000_tx_map(adapter, skb); + + e1000_tx_queue(adapter, count, tx_flags); + + netdev->trans_start = jiffies; + + return 0; +} + +/** + * e1000_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + **/ + +static void +e1000_tx_timeout(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + + /* Do the reset outside of interrupt context */ + schedule_task(&adapter->tx_timeout_task); +} + +static void +e1000_tx_timeout_task(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + + netif_device_detach(netdev); + e1000_down(adapter); + e1000_up(adapter); + netif_device_attach(netdev); +} + +/** + * e1000_get_stats - Get System Network Statistics + * @netdev: network interface device structure + * + * Returns the address of the device statistics structure. + * The statistics are actually updated from the timer callback. + **/ + +static struct net_device_stats * +e1000_get_stats(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev->priv; + + return &adapter->net_stats; +} + +/** + * e1000_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + **/ + +static int +e1000_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct e1000_adapter *adapter = netdev->priv; + int old_mtu = adapter->rx_buffer_len; + int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; + + if((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || + (max_frame > MAX_JUMBO_FRAME_SIZE)) { + E1000_ERR("Invalid MTU setting\n"); + return -EINVAL; + } + + if(max_frame <= MAXIMUM_ETHERNET_FRAME_SIZE) { + adapter->rx_buffer_len = E1000_RXBUFFER_2048; + + } else if(adapter->hw.mac_type < e1000_82543) { + E1000_ERR("Jumbo Frames not supported on 82542\n"); + return -EINVAL; + + } else if(max_frame <= E1000_RXBUFFER_4096) { + adapter->rx_buffer_len = E1000_RXBUFFER_4096; + + } else if(max_frame <= E1000_RXBUFFER_8192) { + adapter->rx_buffer_len = E1000_RXBUFFER_8192; + + } else { + adapter->rx_buffer_len = E1000_RXBUFFER_16384; + } + + if(old_mtu != adapter->rx_buffer_len && netif_running(netdev)) { + + e1000_down(adapter); + e1000_up(adapter); + } + + netdev->mtu = new_mtu; + adapter->hw.max_frame_size = max_frame; + + return 0; +} + +/** + * e1000_update_stats - Update the board statistics counters + * @adapter: board private structure + **/ + +static void +e1000_update_stats(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + unsigned long flags; + uint16_t phy_tmp; + +#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF + + spin_lock_irqsave(&adapter->stats_lock, flags); + + /* these counters are modified from e1000_adjust_tbi_stats, + * called from the interrupt context, so they must only + * be written while holding adapter->stats_lock + */ + + adapter->stats.crcerrs += E1000_READ_REG(hw, CRCERRS); + adapter->stats.gprc += E1000_READ_REG(hw, GPRC); + adapter->stats.gorcl += E1000_READ_REG(hw, GORCL); + adapter->stats.gorch += E1000_READ_REG(hw, GORCH); + adapter->stats.bprc += E1000_READ_REG(hw, BPRC); + adapter->stats.mprc += E1000_READ_REG(hw, MPRC); + adapter->stats.roc += E1000_READ_REG(hw, ROC); + adapter->stats.prc64 += E1000_READ_REG(hw, PRC64); + adapter->stats.prc127 += E1000_READ_REG(hw, PRC127); + adapter->stats.prc255 += E1000_READ_REG(hw, PRC255); + adapter->stats.prc511 += E1000_READ_REG(hw, PRC511); + adapter->stats.prc1023 += E1000_READ_REG(hw, PRC1023); + adapter->stats.prc1522 += E1000_READ_REG(hw, PRC1522); + + spin_unlock_irqrestore(&adapter->stats_lock, flags); + + /* the rest of the counters are only modified here */ + + adapter->stats.symerrs += E1000_READ_REG(hw, SYMERRS); + adapter->stats.mpc += E1000_READ_REG(hw, MPC); + adapter->stats.scc += E1000_READ_REG(hw, SCC); + adapter->stats.ecol += E1000_READ_REG(hw, ECOL); + adapter->stats.mcc += E1000_READ_REG(hw, MCC); + adapter->stats.latecol += E1000_READ_REG(hw, LATECOL); + adapter->stats.dc += E1000_READ_REG(hw, DC); + adapter->stats.sec += E1000_READ_REG(hw, SEC); + adapter->stats.rlec += E1000_READ_REG(hw, RLEC); + adapter->stats.xonrxc += E1000_READ_REG(hw, XONRXC); + adapter->stats.xontxc += E1000_READ_REG(hw, XONTXC); + adapter->stats.xoffrxc += E1000_READ_REG(hw, XOFFRXC); + adapter->stats.xofftxc += E1000_READ_REG(hw, XOFFTXC); + adapter->stats.fcruc += E1000_READ_REG(hw, FCRUC); + adapter->stats.gptc += E1000_READ_REG(hw, GPTC); + adapter->stats.gotcl += E1000_READ_REG(hw, GOTCL); + adapter->stats.gotch += E1000_READ_REG(hw, GOTCH); + adapter->stats.rnbc += E1000_READ_REG(hw, RNBC); + adapter->stats.ruc += E1000_READ_REG(hw, RUC); + adapter->stats.rfc += E1000_READ_REG(hw, RFC); + adapter->stats.rjc += E1000_READ_REG(hw, RJC); + adapter->stats.torl += E1000_READ_REG(hw, TORL); + adapter->stats.torh += E1000_READ_REG(hw, TORH); + adapter->stats.totl += E1000_READ_REG(hw, TOTL); + adapter->stats.toth += E1000_READ_REG(hw, TOTH); + adapter->stats.tpr += E1000_READ_REG(hw, TPR); + adapter->stats.ptc64 += E1000_READ_REG(hw, PTC64); + adapter->stats.ptc127 += E1000_READ_REG(hw, PTC127); + adapter->stats.ptc255 += E1000_READ_REG(hw, PTC255); + adapter->stats.ptc511 += E1000_READ_REG(hw, PTC511); + adapter->stats.ptc1023 += E1000_READ_REG(hw, PTC1023); + adapter->stats.ptc1522 += E1000_READ_REG(hw, PTC1522); + adapter->stats.mptc += E1000_READ_REG(hw, MPTC); + adapter->stats.bptc += E1000_READ_REG(hw, BPTC); + + /* used for adaptive IFS */ + + hw->tx_packet_delta = E1000_READ_REG(hw, TPT); + adapter->stats.tpt += hw->tx_packet_delta; + hw->collision_delta = E1000_READ_REG(hw, COLC); + adapter->stats.colc += hw->collision_delta; + + if(hw->mac_type >= e1000_82543) { + adapter->stats.algnerrc += E1000_READ_REG(hw, ALGNERRC); + adapter->stats.rxerrc += E1000_READ_REG(hw, RXERRC); + adapter->stats.tncrs += E1000_READ_REG(hw, TNCRS); + adapter->stats.cexterr += E1000_READ_REG(hw, CEXTERR); + adapter->stats.tsctc += E1000_READ_REG(hw, TSCTC); + adapter->stats.tsctfc += E1000_READ_REG(hw, TSCTFC); + } + + /* Fill out the OS statistics structure */ + + adapter->net_stats.rx_packets = adapter->stats.gprc; + adapter->net_stats.tx_packets = adapter->stats.gptc; + adapter->net_stats.rx_bytes = adapter->stats.gorcl; + adapter->net_stats.tx_bytes = adapter->stats.gotcl; + adapter->net_stats.multicast = adapter->stats.mprc; + adapter->net_stats.collisions = adapter->stats.colc; + + /* Rx Errors */ + + adapter->net_stats.rx_errors = adapter->stats.rxerrc + + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.rlec + adapter->stats.rnbc + + adapter->stats.mpc + adapter->stats.cexterr; + adapter->net_stats.rx_dropped = adapter->stats.rnbc; + adapter->net_stats.rx_length_errors = adapter->stats.rlec; + adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs; + adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc; + adapter->net_stats.rx_fifo_errors = adapter->stats.mpc; + adapter->net_stats.rx_missed_errors = adapter->stats.mpc; + + /* Tx Errors */ + + adapter->net_stats.tx_errors = adapter->stats.ecol + + adapter->stats.latecol; + adapter->net_stats.tx_aborted_errors = adapter->stats.ecol; + adapter->net_stats.tx_window_errors = adapter->stats.latecol; + adapter->net_stats.tx_carrier_errors = adapter->stats.tncrs; + + /* Tx Dropped needs to be maintained elsewhere */ + + /* Phy Stats */ + + if(hw->media_type == e1000_media_type_copper) { + if((adapter->link_speed == SPEED_1000) && + (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { + phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; + adapter->phy_stats.idle_errors += phy_tmp; + } + + if((hw->mac_type <= e1000_82546) && + !e1000_read_phy_reg(hw, M88E1000_RX_ERR_CNTR, &phy_tmp)) + adapter->phy_stats.receive_errors += phy_tmp; + } +} + +/** + * e1000_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + **/ + +static inline void +e1000_irq_disable(struct e1000_adapter *adapter) +{ + atomic_inc(&adapter->irq_sem); + E1000_WRITE_REG(&adapter->hw, IMC, ~0); + E1000_WRITE_FLUSH(&adapter->hw); + synchronize_irq(); +} + +/** + * e1000_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + **/ + +static inline void +e1000_irq_enable(struct e1000_adapter *adapter) +{ + if(atomic_dec_and_test(&adapter->irq_sem)) { + E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK); + E1000_WRITE_FLUSH(&adapter->hw); + } +} + +/** + * e1000_intr - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + * @pt_regs: CPU registers structure + **/ + +static void +e1000_intr(int irq, void *data, struct pt_regs *regs) +{ + struct net_device *netdev = data; + struct e1000_adapter *adapter = netdev->priv; + uint32_t icr; + int i = E1000_MAX_INTR; + + while(i && (icr = E1000_READ_REG(&adapter->hw, ICR))) { + + if(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + adapter->hw.get_link_status = 1; + mod_timer(&adapter->watchdog_timer, jiffies); + } + + e1000_clean_rx_irq(adapter); + e1000_clean_tx_irq(adapter); + i--; + + } +} + +/** + * e1000_clean_tx_irq - Reclaim resources after transmit completes + * @adapter: board private structure + **/ + +static void +e1000_clean_tx_irq(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *tx_ring = &adapter->tx_ring; + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct e1000_tx_desc *tx_desc; + int i; + + i = tx_ring->next_to_clean; + tx_desc = E1000_TX_DESC(*tx_ring, i); + + while(tx_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { + + if(tx_ring->buffer_info[i].dma) { + + pci_unmap_page(pdev, + tx_ring->buffer_info[i].dma, + tx_ring->buffer_info[i].length, + PCI_DMA_TODEVICE); + + tx_ring->buffer_info[i].dma = 0; + } + + if(tx_ring->buffer_info[i].skb) { + + dev_kfree_skb_any(tx_ring->buffer_info[i].skb); + + tx_ring->buffer_info[i].skb = NULL; + } + + tx_desc->upper.data = 0; + + i = (i + 1) % tx_ring->count; + tx_desc = E1000_TX_DESC(*tx_ring, i); + } + + tx_ring->next_to_clean = i; + + if(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) && + (E1000_DESC_UNUSED(tx_ring) > E1000_TX_QUEUE_WAKE)) { + + netif_wake_queue(netdev); + } +} + +/** + * e1000_clean_rx_irq - Send received data up the network stack, + * @adapter: board private structure + **/ + +static void +e1000_clean_rx_irq(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *rx_ring = &adapter->rx_ring; + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct e1000_rx_desc *rx_desc; + struct sk_buff *skb; + unsigned long flags; + uint32_t length; + uint8_t last_byte; + int i; + + i = rx_ring->next_to_clean; + rx_desc = E1000_RX_DESC(*rx_ring, i); + + while(rx_desc->status & E1000_RXD_STAT_DD) { + + pci_unmap_single(pdev, + rx_ring->buffer_info[i].dma, + rx_ring->buffer_info[i].length, + PCI_DMA_FROMDEVICE); + + skb = rx_ring->buffer_info[i].skb; + length = le16_to_cpu(rx_desc->length); + + if(!(rx_desc->status & E1000_RXD_STAT_EOP)) { + + /* All receives must fit into a single buffer */ + + E1000_DBG("Receive packet consumed multiple buffers\n"); + + dev_kfree_skb_irq(skb); + rx_desc->status = 0; + rx_ring->buffer_info[i].skb = NULL; + + i = (i + 1) % rx_ring->count; + + rx_desc = E1000_RX_DESC(*rx_ring, i); + continue; + } + + if(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { + + last_byte = *(skb->data + length - 1); + + if(TBI_ACCEPT(&adapter->hw, rx_desc->status, + rx_desc->errors, length, last_byte)) { + + spin_lock_irqsave(&adapter->stats_lock, flags); + + e1000_tbi_adjust_stats(&adapter->hw, + &adapter->stats, + length, skb->data); + + spin_unlock_irqrestore(&adapter->stats_lock, + flags); + length--; + } else { + + dev_kfree_skb_irq(skb); + rx_desc->status = 0; + rx_ring->buffer_info[i].skb = NULL; + + i = (i + 1) % rx_ring->count; + + rx_desc = E1000_RX_DESC(*rx_ring, i); + continue; + } + } + + /* Good Receive */ + skb_put(skb, length - ETHERNET_FCS_SIZE); + + /* Receive Checksum Offload */ + e1000_rx_checksum(adapter, rx_desc, skb); + + skb->protocol = eth_type_trans(skb, netdev); + if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) { + vlan_hwaccel_rx(skb, adapter->vlgrp, + (rx_desc->special & E1000_RXD_SPC_VLAN_MASK)); + } else { + netif_rx(skb); + } + netdev->last_rx = jiffies; + + rx_desc->status = 0; + rx_ring->buffer_info[i].skb = NULL; + + i = (i + 1) % rx_ring->count; + + rx_desc = E1000_RX_DESC(*rx_ring, i); + } + + rx_ring->next_to_clean = i; + + e1000_alloc_rx_buffers(adapter); +} + +/** + * e1000_alloc_rx_buffers - Replace used receive buffers + * @data: address of board private structure + **/ + +static void +e1000_alloc_rx_buffers(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *rx_ring = &adapter->rx_ring; + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct e1000_rx_desc *rx_desc; + struct sk_buff *skb; + int reserve_len; + int i; + + reserve_len = 2; + + i = rx_ring->next_to_use; + + while(!rx_ring->buffer_info[i].skb) { + rx_desc = E1000_RX_DESC(*rx_ring, i); + + skb = dev_alloc_skb(adapter->rx_buffer_len + reserve_len); + + if(!skb) { + /* Better luck next round */ + break; + } + + /* Make buffer alignment 2 beyond a 16 byte boundary + * this will result in a 16 byte aligned IP header after + * the 14 byte MAC header is removed + */ + skb_reserve(skb, reserve_len); + + skb->dev = netdev; + + rx_ring->buffer_info[i].skb = skb; + rx_ring->buffer_info[i].length = adapter->rx_buffer_len; + rx_ring->buffer_info[i].dma = + pci_map_single(pdev, + skb->data, + adapter->rx_buffer_len, + PCI_DMA_FROMDEVICE); + + rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma); + + if(!(i % E1000_RX_BUFFER_WRITE)) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). */ + wmb(); + + E1000_WRITE_REG(&adapter->hw, RDT, i); + } + + i = (i + 1) % rx_ring->count; + } + + rx_ring->next_to_use = i; +} + +/** + * e1000_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ + +static int +e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCETHTOOL: + return e1000_ethtool_ioctl(netdev, ifr); + default: + return -EOPNOTSUPP; + } +} + +/** + * e1000_rx_checksum - Receive Checksum Offload for 82543 + * @adapter: board private structure + * @rx_desc: receive descriptor + * @sk_buff: socket buffer with received data + **/ + +static inline void +e1000_rx_checksum(struct e1000_adapter *adapter, + struct e1000_rx_desc *rx_desc, + struct sk_buff *skb) +{ + /* 82543 or newer only */ + if((adapter->hw.mac_type < e1000_82543) || + /* Ignore Checksum bit is set */ + (rx_desc->status & E1000_RXD_STAT_IXSM) || + /* TCP Checksum has not been calculated */ + (!(rx_desc->status & E1000_RXD_STAT_TCPCS))) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* At this point we know the hardware did the TCP checksum */ + /* now look at the TCP checksum error bit */ + if(rx_desc->errors & E1000_RXD_ERR_TCPE) { + /* let the stack verify checksum errors */ + skb->ip_summed = CHECKSUM_NONE; + adapter->hw_csum_err++; + } else { + /* TCP checksum is good */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + adapter->hw_csum_good++; + } +} + +void +e1000_pci_set_mwi(struct e1000_hw *hw) +{ + struct e1000_adapter *adapter = hw->back; + + pci_set_mwi(adapter->pdev); +} + +void +e1000_pci_clear_mwi(struct e1000_hw *hw) +{ + struct e1000_adapter *adapter = hw->back; + + pci_clear_mwi(adapter->pdev); +} + +void +e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) +{ + struct e1000_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, value); +} + +void +e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) +{ + struct e1000_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, *value); +} + +uint32_t +e1000_io_read(struct e1000_hw *hw, uint32_t port) +{ + return inl(port); +} + +void +e1000_io_write(struct e1000_hw *hw, uint32_t port, uint32_t value) +{ + outl(value, port); +} + +static void +e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) +{ + struct e1000_adapter *adapter = netdev->priv; + uint32_t ctrl, rctl; + + e1000_irq_disable(adapter); + adapter->vlgrp = grp; + + if(grp) { + /* enable VLAN tag insert/strip */ + + E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE); + + ctrl = E1000_READ_REG(&adapter->hw, CTRL); + ctrl |= E1000_CTRL_VME; + E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); + + /* enable VLAN receive filtering */ + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl |= E1000_RCTL_VFE; + rctl &= ~E1000_RCTL_CFIEN; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + } else { + /* disable VLAN tag insert/strip */ + + ctrl = E1000_READ_REG(&adapter->hw, CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); + + /* disable VLAN filtering */ + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl &= ~E1000_RCTL_VFE; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + } + + e1000_irq_enable(adapter); +} + +static void +e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid) +{ + struct e1000_adapter *adapter = netdev->priv; + uint32_t vfta, index; + + /* add VID to filter table */ + + index = (vid >> 5) & 0x7F; + vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index); + vfta |= (1 << (vid & 0x1F)); + e1000_write_vfta(&adapter->hw, index, vfta); +} + +static void +e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid) +{ + struct e1000_adapter *adapter = netdev->priv; + uint32_t vfta, index; + + e1000_irq_disable(adapter); + + if(adapter->vlgrp) + adapter->vlgrp->vlan_devices[vid] = NULL; + + e1000_irq_enable(adapter); + + /* remove VID from filter table*/ + + index = (vid >> 5) & 0x7F; + vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index); + vfta &= ~(1 << (vid & 0x1F)); + e1000_write_vfta(&adapter->hw, index, vfta); +} + +static int +e1000_notify_reboot(struct notifier_block *nb, unsigned long event, void *p) +{ + struct pci_dev *pdev = NULL; + + switch(event) { + case SYS_DOWN: + case SYS_HALT: + case SYS_POWER_OFF: + pci_for_each_dev(pdev) { + if(pci_dev_driver(pdev) == &e1000_driver) + e1000_suspend(pdev, 3); + } + } + return NOTIFY_DONE; +} + +static int +e1000_notify_netdev(struct notifier_block *nb, unsigned long event, void *p) +{ + struct e1000_adapter *adapter; + struct net_device *netdev = p; + if(netdev == NULL) + return NOTIFY_DONE; + + switch(event) { + case NETDEV_CHANGENAME: + if(netdev->open == e1000_open) { + adapter = netdev->priv; + /* rename the proc nodes the easy way */ + e1000_proc_dev_free(adapter); + memcpy(adapter->ifname, netdev->name, IFNAMSIZ); + adapter->ifname[IFNAMSIZ-1] = 0; + e1000_proc_dev_setup(adapter); + } + break; + } + return NOTIFY_DONE; +} + +static int +e1000_suspend(struct pci_dev *pdev, uint32_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + uint32_t ctrl, ctrl_ext, rctl, manc; + + netif_device_detach(netdev); + + if(netif_running(netdev)) + e1000_down(adapter); + + if(adapter->wol) { + e1000_setup_rctl(adapter); + e1000_set_multi(netdev); + + /* turn on all-multi mode if wake on multicast is enabled */ + if(adapter->wol & E1000_WUFC_MC) { + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + } + + if(adapter->hw.mac_type >= e1000_82540) { + ctrl = E1000_READ_REG(&adapter->hw, CTRL); + /* advertise wake from D3Cold */ + #define E1000_CTRL_ADVD3WUC 0x00100000 + /* phy power management enable */ + #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 + ctrl |= E1000_CTRL_ADVD3WUC | + E1000_CTRL_EN_PHY_PWR_MGMT; + E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); + } + + if(adapter->hw.media_type == e1000_media_type_fiber) { + /* keep the laser running in D3 */ + ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA; + E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ctrl_ext); + } + + E1000_WRITE_REG(&adapter->hw, WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(&adapter->hw, WUFC, adapter->wol); + pci_enable_wake(pdev, 3, 1); + pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */ + } else { + E1000_WRITE_REG(&adapter->hw, WUC, 0); + E1000_WRITE_REG(&adapter->hw, WUFC, 0); + pci_enable_wake(pdev, 3, 0); + pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */ + } + + pci_save_state(pdev, adapter->pci_state); + + if(adapter->hw.mac_type >= e1000_82540) { + manc = E1000_READ_REG(&adapter->hw, MANC); + if(manc & E1000_MANC_SMBUS_EN) { + manc |= E1000_MANC_ARP_EN; + E1000_WRITE_REG(&adapter->hw, MANC, manc); + state = 0; + } + } + + state = (state > 0) ? 3 : 0; + pci_set_power_state(pdev, state); + + return 0; +} + +#ifdef CONFIG_PM +static int +e1000_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + uint32_t manc; + + pci_set_power_state(pdev, 0); + pci_restore_state(pdev, adapter->pci_state); + + pci_enable_wake(pdev, 3, 0); + pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */ + + e1000_reset(adapter); + E1000_WRITE_REG(&adapter->hw, WUS, ~0); + + if(netif_running(netdev)) + e1000_up(adapter); + + netif_device_attach(netdev); + + if(adapter->hw.mac_type >= e1000_82540) { + manc = E1000_READ_REG(&adapter->hw, MANC); + manc &= ~(E1000_MANC_ARP_EN); + E1000_WRITE_REG(&adapter->hw, MANC, manc); + } + + return 0; +} +#endif + +/* e1000_main.c */ diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_osdep.h linux-2.4.19/drivers/net/e1000/e1000_osdep.h --- linux-2.4.19.orig/drivers/net/e1000/e1000_osdep.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_osdep.h Wed Feb 12 12:09:01 2003 @@ -0,0 +1,101 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +/* glue for the OS independant part of e1000 + * includes register access macros + */ + +#ifndef _E1000_OSDEP_H_ +#define _E1000_OSDEP_H_ + +#include +#include +#include +#include +#include +#include + +#ifndef msec_delay +#define msec_delay(x) do { if(in_interrupt()) { \ + /* Don't mdelay in interrupt context! */ \ + BUG(); \ + } else { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + schedule_timeout((x * HZ)/1000); \ + } } while(0) +#endif + +#define PCI_COMMAND_REGISTER PCI_COMMAND +#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE + +typedef enum { + FALSE = 0, + TRUE = 1 +} boolean_t; + +#define ASSERT(x) if(!(x)) BUG() +#define MSGOUT(S, A, B) printk(KERN_DEBUG S "\n", A, B) + +#if DBG +#define DEBUGOUT(S) printk(KERN_DEBUG S "\n") +#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S "\n", A) +#else +#define DEBUGOUT(S) +#define DEBUGOUT1(S, A...) +#endif + +#define DEBUGFUNC(F) DEBUGOUT(F) +#define DEBUGOUT2 DEBUGOUT1 +#define DEBUGOUT3 DEBUGOUT2 +#define DEBUGOUT7 DEBUGOUT3 + + +#define E1000_WRITE_REG(a, reg, value) ( \ + ((a)->mac_type >= e1000_82543) ? \ + (writel((value), ((a)->hw_addr + E1000_##reg))) : \ + (writel((value), ((a)->hw_addr + E1000_82542_##reg)))) + +#define E1000_READ_REG(a, reg) ( \ + ((a)->mac_type >= e1000_82543) ? \ + readl((a)->hw_addr + E1000_##reg) : \ + readl((a)->hw_addr + E1000_82542_##reg)) + +#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \ + ((a)->mac_type >= e1000_82543) ? \ + writel((value), ((a)->hw_addr + E1000_##reg + ((offset) << 2))) : \ + writel((value), ((a)->hw_addr + E1000_82542_##reg + ((offset) << 2)))) + +#define E1000_READ_REG_ARRAY(a, reg, offset) ( \ + ((a)->mac_type >= e1000_82543) ? \ + readl((a)->hw_addr + E1000_##reg + ((offset) << 2)) : \ + readl((a)->hw_addr + E1000_82542_##reg + ((offset) << 2))) + +#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, STATUS) + +#endif /* _E1000_OSDEP_H_ */ diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_param.c linux-2.4.19/drivers/net/e1000/e1000_param.c --- linux-2.4.19.orig/drivers/net/e1000/e1000_param.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_param.c Wed Feb 12 12:09:01 2003 @@ -0,0 +1,654 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000.h" + +/* This is the only thing that needs to be changed to adjust the + * maximum number of ports that the driver can manage. + */ + +#define E1000_MAX_NIC 32 + +#define OPTION_UNSET -1 +#define OPTION_DISABLED 0 +#define OPTION_ENABLED 1 + +/* Module Parameters are always initialized to -1, so that the driver + * can tell the difference between no user specified value or the + * user asking for the default value. + * The true default values are loaded in when e1000_check_options is called. + * + * This is a GCC extension to ANSI C. + * See the item "Labeled Elements in Initializers" in the section + * "Extensions to the C Language Family" of the GCC documentation. + */ + +#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET } + +/* All parameters are treated the same, as an integer array of values. + * This macro just reduces the need to repeat the same declaration code + * over and over (plus this helps to avoid typo bugs). + */ + +#define E1000_PARAM(X, S) \ +static const int __devinitdata X[E1000_MAX_NIC + 1] = E1000_PARAM_INIT; \ +MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \ +MODULE_PARM_DESC(X, S); + +/* Transmit Descriptor Count + * + * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers + * Valid Range: 80-4096 for 82544 + * + * Default Value: 256 + */ + +E1000_PARAM(TxDescriptors, "Number of transmit descriptors"); + +/* Receive Descriptor Count + * + * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers + * Valid Range: 80-4096 for 82544 + * + * Default Value: 80 + */ + +E1000_PARAM(RxDescriptors, "Number of receive descriptors"); + +/* User Specified Speed Override + * + * Valid Range: 0, 10, 100, 1000 + * - 0 - auto-negotiate at all supported speeds + * - 10 - only link at 10 Mbps + * - 100 - only link at 100 Mbps + * - 1000 - only link at 1000 Mbps + * + * Default Value: 0 + */ + +E1000_PARAM(Speed, "Speed setting"); + +/* User Specified Duplex Override + * + * Valid Range: 0-2 + * - 0 - auto-negotiate for duplex + * - 1 - only link at half duplex + * - 2 - only link at full duplex + * + * Default Value: 0 + */ + +E1000_PARAM(Duplex, "Duplex setting"); + +/* Auto-negotiation Advertisement Override + * + * Valid Range: 0x01-0x0F, 0x20-0x2F + * + * The AutoNeg value is a bit mask describing which speed and duplex + * combinations should be advertised during auto-negotiation. + * The supported speed and duplex modes are listed below + * + * Bit 7 6 5 4 3 2 1 0 + * Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 + * Duplex Full Full Half Full Half + * + * Default Value: 0x2F + */ + +E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting"); + +/* User Specified Flow Control Override + * + * Valid Range: 0-3 + * - 0 - No Flow Control + * - 1 - Rx only, respond to PAUSE frames but do not generate them + * - 2 - Tx only, generate PAUSE frames but ignore them on receive + * - 3 - Full Flow Control Support + * + * Default Value: Read flow control settings from the EEPROM + */ + +E1000_PARAM(FlowControl, "Flow Control setting"); + +/* XsumRX - Receive Checksum Offload Enable/Disable + * + * Valid Range: 0, 1 + * - 0 - disables all checksum offload + * - 1 - enables receive IP/TCP/UDP checksum offload + * on 82543 based NICs + * + * Default Value: 1 + */ + +E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload"); + +/* Transmit Interrupt Delay in units of 1.024 microseconds + * + * Valid Range: 0-65535 + * + * Default Value: 64 + */ + +E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay"); + +/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds + * + * Valid Range: 0-65535 + * + * Default Value: 0 + */ + +E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay"); + +/* Receive Interrupt Delay in units of 1.024 microseconds + * + * Valid Range: 0-65535 + * + * Default Value: 0/128 + */ + +E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); + +/* Receive Absolute Interrupt Delay in units of 1.024 microseconds + * + * Valid Range: 0-65535 + * + * Default Value: 128 + */ + +E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); + +#define AUTONEG_ADV_DEFAULT 0x2F +#define AUTONEG_ADV_MASK 0x2F +#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL + +#define DEFAULT_TXD 256 +#define MAX_TXD 256 +#define MIN_TXD 80 +#define MAX_82544_TXD 4096 + +#define DEFAULT_RXD 80 +#define MAX_RXD 256 +#define MIN_RXD 80 +#define MAX_82544_RXD 4096 + +#define DEFAULT_RDTR 0 +#define MAX_RXDELAY 0xFFFF +#define MIN_RXDELAY 0 + +#define DEFAULT_RADV 128 +#define MAX_RXABSDELAY 0xFFFF +#define MIN_RXABSDELAY 0 + +#define DEFAULT_TIDV 64 +#define MAX_TXDELAY 0xFFFF +#define MIN_TXDELAY 0 + +#define DEFAULT_TADV 64 +#define MAX_TXABSDELAY 0xFFFF +#define MIN_TXABSDELAY 0 + +struct e1000_option { + enum { enable_option, range_option, list_option } type; + char *name; + char *err; + int def; + union { + struct { /* range_option info */ + int min; + int max; + } r; + struct { /* list_option info */ + int nr; + struct e1000_opt_list { int i; char *str; } *p; + } l; + } arg; +}; + + +static int __devinit +e1000_validate_option(int *value, struct e1000_option *opt) +{ + if(*value == OPTION_UNSET) { + *value = opt->def; + return 0; + } + + switch (opt->type) { + case enable_option: + switch (*value) { + case OPTION_ENABLED: + printk(KERN_INFO "%s Enabled\n", opt->name); + return 0; + case OPTION_DISABLED: + printk(KERN_INFO "%s Disabled\n", opt->name); + return 0; + } + break; + case range_option: + if(*value >= opt->arg.r.min && *value <= opt->arg.r.max) { + printk(KERN_INFO "%s set to %i\n", opt->name, *value); + return 0; + } + break; + case list_option: { + int i; + struct e1000_opt_list *ent; + + for(i = 0; i < opt->arg.l.nr; i++) { + ent = &opt->arg.l.p[i]; + if(*value == ent->i) { + if(ent->str[0] != '\0') + printk(KERN_INFO "%s\n", ent->str); + return 0; + } + } + } + break; + default: + BUG(); + } + + printk(KERN_INFO "Invalid %s specified (%i) %s\n", + opt->name, *value, opt->err); + *value = opt->def; + return -1; +} + +static void e1000_check_fiber_options(struct e1000_adapter *adapter); +static void e1000_check_copper_options(struct e1000_adapter *adapter); + +/** + * e1000_check_options - Range Checking for Command Line Parameters + * @adapter: board private structure + * + * This routine checks all command line paramters for valid user + * input. If an invalid value is given, or if no user specified + * value exists, a default value is used. The final value is stored + * in a variable in the adapter structure. + **/ + +void __devinit +e1000_check_options(struct e1000_adapter *adapter) +{ + int bd = adapter->bd_number; + if(bd >= E1000_MAX_NIC) { + printk(KERN_NOTICE + "Warning: no configuration for board #%i\n", bd); + printk(KERN_NOTICE "Using defaults for all values\n"); + bd = E1000_MAX_NIC; + } + + { /* Transmit Descriptor Count */ + struct e1000_option opt = { + .type = range_option, + .name = "Transmit Descriptors", + .err = "using default of " __MODULE_STRING(DEFAULT_TXD), + .def = DEFAULT_TXD, + .arg = { r: { min: MIN_TXD }} + }; + struct e1000_desc_ring *tx_ring = &adapter->tx_ring; + e1000_mac_type mac_type = adapter->hw.mac_type; + opt.arg.r.max = mac_type < e1000_82544 ? + MAX_TXD : MAX_82544_TXD; + + tx_ring->count = TxDescriptors[bd]; + e1000_validate_option(&tx_ring->count, &opt); + E1000_ROUNDUP(tx_ring->count, REQ_TX_DESCRIPTOR_MULTIPLE); + } + { /* Receive Descriptor Count */ + struct e1000_option opt = { + .type = range_option, + .name = "Receive Descriptors", + .err = "using default of " __MODULE_STRING(DEFAULT_RXD), + .def = DEFAULT_RXD, + .arg = { r: { min: MIN_RXD }} + }; + struct e1000_desc_ring *rx_ring = &adapter->rx_ring; + e1000_mac_type mac_type = adapter->hw.mac_type; + opt.arg.r.max = mac_type < e1000_82544 ? MAX_RXD : MAX_82544_RXD; + + rx_ring->count = RxDescriptors[bd]; + e1000_validate_option(&rx_ring->count, &opt); + E1000_ROUNDUP(rx_ring->count, REQ_RX_DESCRIPTOR_MULTIPLE); + } + { /* Checksum Offload Enable/Disable */ + struct e1000_option opt = { + .type = enable_option, + .name = "Checksum Offload", + .err = "defaulting to Enabled", + .def = OPTION_ENABLED + }; + + int rx_csum = XsumRX[bd]; + e1000_validate_option(&rx_csum, &opt); + adapter->rx_csum = rx_csum; + } + { /* Flow Control */ + + struct e1000_opt_list fc_list[] = + {{ e1000_fc_none, "Flow Control Disabled" }, + { e1000_fc_rx_pause,"Flow Control Receive Only" }, + { e1000_fc_tx_pause,"Flow Control Transmit Only" }, + { e1000_fc_full, "Flow Control Enabled" }, + { e1000_fc_default, "Flow Control Hardware Default" }}; + + struct e1000_option opt = { + .type = list_option, + .name = "Flow Control", + .err = "reading default settings from EEPROM", + .def = e1000_fc_default, + .arg = { l: { nr: ARRAY_SIZE(fc_list), p: fc_list }} + }; + + int fc = FlowControl[bd]; + e1000_validate_option(&fc, &opt); + adapter->hw.fc = adapter->hw.original_fc = fc; + } + { /* Transmit Interrupt Delay */ + char *tidv = "using default of " __MODULE_STRING(DEFAULT_TIDV); + struct e1000_option opt = { + .type = range_option, + .name = "Transmit Interrupt Delay", + .arg = { r: { min: MIN_TXDELAY, max: MAX_TXDELAY }} + }; + opt.def = DEFAULT_TIDV; + opt.err = tidv; + + adapter->tx_int_delay = TxIntDelay[bd]; + e1000_validate_option(&adapter->tx_int_delay, &opt); + } + { /* Transmit Absolute Interrupt Delay */ + char *tadv = "using default of " __MODULE_STRING(DEFAULT_TADV); + struct e1000_option opt = { + .type = range_option, + .name = "Transmit Absolute Interrupt Delay", + .arg = { r: { min: MIN_TXABSDELAY, max: MAX_TXABSDELAY }} + }; + opt.def = DEFAULT_TADV; + opt.err = tadv; + + adapter->tx_abs_int_delay = TxAbsIntDelay[bd]; + e1000_validate_option(&adapter->tx_abs_int_delay, &opt); + } + { /* Receive Interrupt Delay */ + char *rdtr = "using default of " __MODULE_STRING(DEFAULT_RDTR); + struct e1000_option opt = { + .type = range_option, + .name = "Receive Interrupt Delay", + .arg = { r: { min: MIN_RXDELAY, max: MAX_RXDELAY }} + }; + opt.def = DEFAULT_RDTR; + opt.err = rdtr; + + adapter->rx_int_delay = RxIntDelay[bd]; + e1000_validate_option(&adapter->rx_int_delay, &opt); + } + { /* Receive Absolute Interrupt Delay */ + char *radv = "using default of " __MODULE_STRING(DEFAULT_RADV); + struct e1000_option opt = { + .type = range_option, + .name = "Receive Absolute Interrupt Delay", + .arg = { r: { min: MIN_RXABSDELAY, max: MAX_RXABSDELAY }} + }; + opt.def = DEFAULT_RADV; + opt.err = radv; + + adapter->rx_abs_int_delay = RxAbsIntDelay[bd]; + e1000_validate_option(&adapter->rx_abs_int_delay, &opt); + } + + switch(adapter->hw.media_type) { + case e1000_media_type_fiber: + e1000_check_fiber_options(adapter); + break; + case e1000_media_type_copper: + e1000_check_copper_options(adapter); + break; + default: + BUG(); + } +} + +/** + * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version + * @adapter: board private structure + * + * Handles speed and duplex options on fiber adapters + **/ + +static void __devinit +e1000_check_fiber_options(struct e1000_adapter *adapter) +{ + int bd = adapter->bd_number; + bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd; + + if((Speed[bd] != OPTION_UNSET)) { + printk(KERN_INFO "Speed not valid for fiber adapters, " + "parameter ignored\n"); + } + if((Duplex[bd] != OPTION_UNSET)) { + printk(KERN_INFO "Duplex not valid for fiber adapters, " + "parameter ignored\n"); + } + if((AutoNeg[bd] != OPTION_UNSET)) { + printk(KERN_INFO "AutoNeg not valid for fiber adapters, " + "parameter ignored\n"); + } +} + +/** + * e1000_check_copper_options - Range Checking for Link Options, Copper Version + * @adapter: board private structure + * + * Handles speed and duplex options on copper adapters + **/ + +static void __devinit +e1000_check_copper_options(struct e1000_adapter *adapter) +{ + int speed, dplx; + int bd = adapter->bd_number; + bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd; + + { /* Speed */ + struct e1000_opt_list speed_list[] = {{ 0, "" }, + { SPEED_10, "" }, + { SPEED_100, "" }, + { SPEED_1000, "" }}; + struct e1000_option opt = { + .type = list_option, + .name = "Speed", + .err = "parameter ignored", + .def = 0, + .arg = { l: { nr: ARRAY_SIZE(speed_list), p: speed_list }} + }; + + speed = Speed[bd]; + e1000_validate_option(&speed, &opt); + } + { /* Duplex */ + struct e1000_opt_list dplx_list[] = {{ 0, "" }, + { HALF_DUPLEX, "" }, + { FULL_DUPLEX, "" }}; + struct e1000_option opt = { + .type = list_option, + .name = "Duplex", + .err = "parameter ignored", + .def = 0, + .arg = { l: { nr: ARRAY_SIZE(dplx_list), p: dplx_list }} + }; + + dplx = Duplex[bd]; + e1000_validate_option(&dplx, &opt); + } + + if(AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) { + printk(KERN_INFO + "AutoNeg specified along with Speed or Duplex, " + "parameter ignored\n"); + adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT; + } else { /* Autoneg */ + struct e1000_opt_list an_list[] = + #define AA "AutoNeg advertising " + {{ 0x01, AA "10/HD" }, + { 0x02, AA "10/FD" }, + { 0x03, AA "10/FD, 10/HD" }, + { 0x04, AA "100/HD" }, + { 0x05, AA "100/HD, 10/HD" }, + { 0x06, AA "100/HD, 10/FD" }, + { 0x07, AA "100/HD, 10/FD, 10/HD" }, + { 0x08, AA "100/FD" }, + { 0x09, AA "100/FD, 10/HD" }, + { 0x0a, AA "100/FD, 10/FD" }, + { 0x0b, AA "100/FD, 10/FD, 10/HD" }, + { 0x0c, AA "100/FD, 100/HD" }, + { 0x0d, AA "100/FD, 100/HD, 10/HD" }, + { 0x0e, AA "100/FD, 100/HD, 10/FD" }, + { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" }, + { 0x20, AA "1000/FD" }, + { 0x21, AA "1000/FD, 10/HD" }, + { 0x22, AA "1000/FD, 10/FD" }, + { 0x23, AA "1000/FD, 10/FD, 10/HD" }, + { 0x24, AA "1000/FD, 100/HD" }, + { 0x25, AA "1000/FD, 100/HD, 10/HD" }, + { 0x26, AA "1000/FD, 100/HD, 10/FD" }, + { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" }, + { 0x28, AA "1000/FD, 100/FD" }, + { 0x29, AA "1000/FD, 100/FD, 10/HD" }, + { 0x2a, AA "1000/FD, 100/FD, 10/FD" }, + { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" }, + { 0x2c, AA "1000/FD, 100/FD, 100/HD" }, + { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" }, + { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" }, + { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }}; + + struct e1000_option opt = { + .type = list_option, + .name = "AutoNeg", + .err = "parameter ignored", + .def = AUTONEG_ADV_DEFAULT, + .arg = { l: { nr: ARRAY_SIZE(an_list), p: an_list }} + }; + + int an = AutoNeg[bd]; + e1000_validate_option(&an, &opt); + adapter->hw.autoneg_advertised = an; + } + + switch (speed + dplx) { + case 0: + adapter->hw.autoneg = 1; + if(Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET) + printk(KERN_INFO + "Speed and duplex autonegotiation enabled\n"); + break; + case HALF_DUPLEX: + printk(KERN_INFO "Half Duplex specified without Speed\n"); + printk(KERN_INFO "Using Autonegotiation at Half Duplex only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_10_HALF | + ADVERTISE_100_HALF; + break; + case FULL_DUPLEX: + printk(KERN_INFO "Full Duplex specified without Speed\n"); + printk(KERN_INFO "Using Autonegotiation at Full Duplex only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_10_FULL | + ADVERTISE_100_FULL | + ADVERTISE_1000_FULL; + break; + case SPEED_10: + printk(KERN_INFO "10 Mbps Speed specified without Duplex\n"); + printk(KERN_INFO "Using Autonegotiation at 10 Mbps only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_10_HALF | + ADVERTISE_10_FULL; + break; + case SPEED_10 + HALF_DUPLEX: + printk(KERN_INFO "Forcing to 10 Mbps Half Duplex\n"); + adapter->hw.autoneg = 0; + adapter->hw.forced_speed_duplex = e1000_10_half; + adapter->hw.autoneg_advertised = 0; + break; + case SPEED_10 + FULL_DUPLEX: + printk(KERN_INFO "Forcing to 10 Mbps Full Duplex\n"); + adapter->hw.autoneg = 0; + adapter->hw.forced_speed_duplex = e1000_10_full; + adapter->hw.autoneg_advertised = 0; + break; + case SPEED_100: + printk(KERN_INFO "100 Mbps Speed specified without Duplex\n"); + printk(KERN_INFO "Using Autonegotiation at 100 Mbps only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_100_HALF | + ADVERTISE_100_FULL; + break; + case SPEED_100 + HALF_DUPLEX: + printk(KERN_INFO "Forcing to 100 Mbps Half Duplex\n"); + adapter->hw.autoneg = 0; + adapter->hw.forced_speed_duplex = e1000_100_half; + adapter->hw.autoneg_advertised = 0; + break; + case SPEED_100 + FULL_DUPLEX: + printk(KERN_INFO "Forcing to 100 Mbps Full Duplex\n"); + adapter->hw.autoneg = 0; + adapter->hw.forced_speed_duplex = e1000_100_full; + adapter->hw.autoneg_advertised = 0; + break; + case SPEED_1000: + printk(KERN_INFO "1000 Mbps Speed specified without Duplex\n"); + printk(KERN_INFO + "Using Autonegotiation at 1000 Mbps Full Duplex only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case SPEED_1000 + HALF_DUPLEX: + printk(KERN_INFO "Half Duplex is not supported at 1000 Mbps\n"); + printk(KERN_INFO + "Using Autonegotiation at 1000 Mbps Full Duplex only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case SPEED_1000 + FULL_DUPLEX: + printk(KERN_INFO + "Using Autonegotiation at 1000 Mbps Full Duplex only\n"); + adapter->hw.autoneg = 1; + adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; + break; + default: + BUG(); + } + + /* Speed, AutoNeg and MDI/MDI-X must all play nice */ + if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) { + printk(KERN_INFO "Speed, AutoNeg and MDI-X specifications are " + "incompatible. Setting MDI-X to a compatible value.\n"); + } +} + diff -urP linux-2.4.19.orig/drivers/net/e1000/e1000_proc.c linux-2.4.19/drivers/net/e1000/e1000_proc.c --- linux-2.4.19.orig/drivers/net/e1000/e1000_proc.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/drivers/net/e1000/e1000_proc.c Wed Feb 12 12:09:01 2003 @@ -0,0 +1,699 @@ +/******************************************************************************* + + + Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + Linux NICS + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* + * Proc fs support. + * + * Read-only files created by driver (if CONFIG_PROC_FS): + * + * /proc/net/PRO_LAN_Adapters/.info + * /proc/net/PRO_LAN_Adapters// + * + * where is the system device name, i.e eth0. + * is the driver attribute name. + * + * There is one file for each driver attribute, where the contents + * of the file is the attribute value. The ethx.info file contains + * a list of all driver attributes in one file. + * + */ + +#include "e1000.h" + +#ifdef CONFIG_PROC_FS + +#include + +#define ADAPTERS_PROC_DIR "PRO_LAN_Adapters" +#define TAG_MAX_LENGTH 32 +#define LINE_MAX_LENGTH 80 +#define FIELD_MAX_LENGTH LINE_MAX_LENGTH - TAG_MAX_LENGTH - 3 + +extern char e1000_driver_name[]; +extern char e1000_driver_version[]; + +/* + * The list of driver proc attributes is stored in a proc_list link + * list. The list is build with proc_list_setup and is used to + * build the proc fs nodes. The private data for each node is the + * corresponding link in the link list. + */ + +struct proc_list { + struct list_head list; /* link list */ + char tag[TAG_MAX_LENGTH + 1]; /* attribute name */ + void *data; /* attribute data */ + size_t len; /* sizeof data */ + char *(*func)(void *, size_t, char *); /* format data func */ +}; + +static int +e1000_proc_read(char *page, char **start, off_t off, int count, int *eof) +{ + int len = strlen(page); + + page[len++] = '\n'; + + if(len <= off + count) + *eof = 1; + *start = page + off; + len -= off; + if(len > count) + len = count; + if(len < 0) + len = 0; + + return len; +} + +static int +e1000_proc_info_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct list_head *proc_list_head = data, *curr; + struct proc_list *elem; + char *p = page; + char buf[FIELD_MAX_LENGTH + 1]; + + list_for_each(curr, proc_list_head) { + elem = list_entry(curr, struct proc_list, list); + + if (p - page + LINE_MAX_LENGTH >= PAGE_SIZE) + break; + + if(!strlen(elem->tag)) + p += sprintf(p, "\n"); + else + p += sprintf(p, "%-*.*s %.*s\n", + TAG_MAX_LENGTH, TAG_MAX_LENGTH, + elem->tag, FIELD_MAX_LENGTH, + elem->func(elem->data, elem->len, buf)); + } + + *p = '\0'; + + return e1000_proc_read(page, start, off, count, eof); +} + +static int +e1000_proc_single_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct proc_list *elem = data; + + sprintf(page, "%.*s", FIELD_MAX_LENGTH, elem->func(elem->data, + elem->len, page)); + + return e1000_proc_read(page, start, off, count, eof); +} + +static void +e1000_proc_dirs_free(char *name, struct list_head *proc_list_head) +{ + struct proc_dir_entry *intel_proc_dir, *proc_dir; + char info_name[strlen(name) + strlen(".info")]; + + for(intel_proc_dir = proc_net->subdir; intel_proc_dir; + intel_proc_dir = intel_proc_dir->next) { + if((intel_proc_dir->namelen == strlen(ADAPTERS_PROC_DIR)) && + !memcmp(intel_proc_dir->name, ADAPTERS_PROC_DIR, strlen(ADAPTERS_PROC_DIR))) + break; + } + + if(!intel_proc_dir) + return; + + for(proc_dir = intel_proc_dir->subdir; proc_dir; + proc_dir = proc_dir->next) { + if ((proc_dir->namelen == strlen(name)) && + !memcmp(proc_dir->name, name, strlen(name))) + break; + } + + if(proc_dir) { + struct list_head *curr; + struct proc_list *elem; + + list_for_each(curr, proc_list_head) { + elem = list_entry(curr, struct proc_list, list); + remove_proc_entry(elem->tag, proc_dir); + } + + strcpy(info_name, name); + strcat(info_name, ".info"); + + remove_proc_entry(info_name, intel_proc_dir); + remove_proc_entry(name, intel_proc_dir); + } + + /* If the intel dir is empty, remove it */ + + for(proc_dir = intel_proc_dir->subdir; proc_dir; + proc_dir = proc_dir->next) { + + /* ignore . and .. */ + + if(*(proc_dir->name) == '.') + continue; + break; + } + + if(!proc_dir) + remove_proc_entry(ADAPTERS_PROC_DIR, proc_net); +} + + +static int +e1000_proc_singles_create(struct proc_dir_entry *parent, + struct list_head *proc_list_head) +{ + struct list_head *curr; + struct proc_list *elem; + + list_for_each(curr, proc_list_head) { + struct proc_dir_entry *proc_entry; + + elem = list_entry(curr, struct proc_list, list); + + if(!strlen(elem->tag)) + continue; + + if(!(proc_entry = + create_proc_entry(elem->tag, S_IFREG, parent))) + return 0; + + proc_entry->read_proc = e1000_proc_single_read; + proc_entry->data = elem; + SET_MODULE_OWNER(proc_entry); + } + + return 1; +} + +static void +e1000_proc_dirs_create(void *data, char *name, + struct list_head *proc_list_head) +{ + struct proc_dir_entry *intel_proc_dir, *proc_dir, *info_entry; + char info_name[strlen(name) + strlen(".info")]; + + for(intel_proc_dir = proc_net->subdir; intel_proc_dir; + intel_proc_dir = intel_proc_dir->next) { + if((intel_proc_dir->namelen == strlen(ADAPTERS_PROC_DIR)) && + !memcmp(intel_proc_dir->name, ADAPTERS_PROC_DIR, strlen(ADAPTERS_PROC_DIR))) + break; + } + + if(!intel_proc_dir) + if(!(intel_proc_dir = + create_proc_entry(ADAPTERS_PROC_DIR, + S_IFDIR, proc_net))) + return; + + if(!(proc_dir = + create_proc_entry(name, S_IFDIR, intel_proc_dir))) + return; + SET_MODULE_OWNER(proc_dir); + + if(!e1000_proc_singles_create(proc_dir, proc_list_head)) + return; + + strcpy(info_name, name); + strcat(info_name, ".info"); + + if(!(info_entry = + create_proc_entry(info_name, S_IFREG, intel_proc_dir))) + return; + SET_MODULE_OWNER(info_entry); + + info_entry->read_proc = e1000_proc_info_read; + info_entry->data = proc_list_head; +} + +static void +e1000_proc_list_add(struct list_head *proc_list_head, char *tag, + void *data, size_t len, + char *(*func)(void *, size_t, char *)) +{ + struct proc_list *new = (struct proc_list *) + kmalloc(sizeof(struct proc_list), GFP_KERNEL); + + if(!new) + return; + + strncpy(new->tag, tag, TAG_MAX_LENGTH); + new->data = data; + new->len = len; + new->func = func; + + list_add_tail(&new->list, proc_list_head); +} + +static void +e1000_proc_list_free(struct list_head *proc_list_head) +{ + struct proc_list *elem; + + while(!list_empty(proc_list_head)) { + elem = list_entry(proc_list_head->next, struct proc_list, list); + list_del(&elem->list); + kfree(elem); + } +} + +/* + * General purpose formating functions + */ + +static char * +e1000_proc_str(void *data, size_t len, char *buf) +{ + sprintf(buf, "%s", (char *)data); + return buf; +} + +static char * +e1000_proc_hex(void *data, size_t len, char *buf) +{ + switch(len) { + case sizeof(uint8_t): + sprintf(buf, "0x%02x", *(uint8_t *)data); + break; + case sizeof(uint16_t): + sprintf(buf, "0x%04x", *(uint16_t *)data); + break; + case sizeof(uint32_t): + sprintf(buf, "0x%08x", *(uint32_t *)data); + break; + case sizeof(uint64_t): + sprintf(buf, "0x%08Lx", (unsigned long long)*(uint64_t *)data); + break; + } + return buf; +} + +static char * +e1000_proc_unsigned(void *data, size_t len, char *buf) +{ + switch(len) { + case sizeof(uint8_t): + sprintf(buf, "%u", *(uint8_t *)data); + break; + case sizeof(uint16_t): + sprintf(buf, "%u", *(uint16_t *)data); + break; + case sizeof(uint32_t): + sprintf(buf, "%u", *(uint32_t *)data); + break; + case sizeof(uint64_t): + sprintf(buf, "%Lu", (unsigned long long)*(uint64_t *)data); + break; + } + return buf; +} + +/* + * Specific formating functions + */ + +static char * +e1000_proc_part_number(void *data, size_t len, char *buf) +{ + sprintf(buf, "%06x-%03x", *(uint32_t *)data >> 8, + *(uint32_t *)data & 0x000000FF); + return buf; +} + +static char * +e1000_proc_slot(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + sprintf(buf, "%u", PCI_SLOT(adapter->pdev->devfn)); + return buf; +} + +static char * +e1000_proc_bus_type(void *data, size_t len, char *buf) +{ + e1000_bus_type bus_type = *(e1000_bus_type *)data; + sprintf(buf, + bus_type == e1000_bus_type_pci ? "PCI" : + bus_type == e1000_bus_type_pcix ? "PCI-X" : + "UNKNOWN"); + return buf; +} + +static char * +e1000_proc_bus_speed(void *data, size_t len, char *buf) +{ + e1000_bus_speed bus_speed = *(e1000_bus_speed *)data; + sprintf(buf, + bus_speed == e1000_bus_speed_33 ? "33MHz" : + bus_speed == e1000_bus_speed_66 ? "66MHz" : + bus_speed == e1000_bus_speed_100 ? "100MHz" : + bus_speed == e1000_bus_speed_133 ? "133MHz" : + "UNKNOWN"); + return buf; +} + +static char * +e1000_proc_bus_width(void *data, size_t len, char *buf) +{ + e1000_bus_width bus_width = *(e1000_bus_width *)data; + sprintf(buf, + bus_width == e1000_bus_width_32 ? "32-bit" : + bus_width == e1000_bus_width_64 ? "64-bit" : + "UNKNOWN"); + return buf; +} + +static char * +e1000_proc_hwaddr(void *data, size_t len, char *buf) +{ + unsigned char *hwaddr = data; + sprintf(buf, "%02X:%02X:%02X:%02X:%02X:%02X", + hwaddr[0], hwaddr[1], hwaddr[2], + hwaddr[3], hwaddr[4], hwaddr[5]); + return buf; +} + +static char * +e1000_proc_link(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + sprintf(buf, netif_running(adapter->netdev) ? + netif_carrier_ok(adapter->netdev) ? + "up" : "down" : "N/A"); + return buf; +} + +static char * +e1000_proc_link_speed(void *data, size_t len, char *buf) +{ + uint16_t link_speed = *(uint16_t *)data; + sprintf(buf, link_speed ? "%u" : "N/A", link_speed); + return buf; +} + +static char * +e1000_proc_link_duplex(void *data, size_t len, char *buf) +{ + uint16_t link_duplex = *(uint16_t *)data; + sprintf(buf, + link_duplex == FULL_DUPLEX ? "Full" : + link_duplex == HALF_DUPLEX ? "Half" : + "N/A"); + return buf; +} + +static char * +e1000_proc_state(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + sprintf(buf, adapter->netdev->flags & IFF_UP ? "up" : "down"); + return buf; +} + +static char * +e1000_proc_media_type(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + sprintf(buf, + adapter->hw.media_type == e1000_media_type_copper ? + "Copper" : "Fiber"); + return buf; +} + +static char * +e1000_proc_cable_length(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + e1000_cable_length cable_length = adapter->phy_info.cable_length; + sprintf(buf, "%s%s", + cable_length == e1000_cable_length_50 ? "0-50" : + cable_length == e1000_cable_length_50_80 ? "50-80" : + cable_length == e1000_cable_length_80_110 ? "80-110" : + cable_length == e1000_cable_length_110_140 ? "110-140" : + cable_length == e1000_cable_length_140 ? "> 140" : + "Unknown", + cable_length != e1000_cable_length_undefined ? + " Meters (+/- 20 Meters)" : ""); + return buf; +} + +static char * +e1000_proc_extended(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + e1000_10bt_ext_dist_enable dist_enable = + adapter->phy_info.extended_10bt_distance; + sprintf(buf, + dist_enable == e1000_10bt_ext_dist_enable_normal ? "Disabled" : + dist_enable == e1000_10bt_ext_dist_enable_lower ? "Enabled" : + "Unknown"); + return buf; +} + +static char * +e1000_proc_cable_polarity(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + e1000_rev_polarity polarity = adapter->phy_info.cable_polarity; + sprintf(buf, + polarity == e1000_rev_polarity_normal ? "Normal" : + polarity == e1000_rev_polarity_reversed ? "Reversed" : + "Unknown"); + return buf; +} + +static char * +e1000_proc_polarity_correction(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + e1000_polarity_reversal correction = + adapter->phy_info.polarity_correction; + sprintf(buf, + correction == e1000_polarity_reversal_enabled ? "Disabled" : + correction == e1000_polarity_reversal_disabled ? "Enabled" : + "Unknown"); + return buf; +} + +static char * +e1000_proc_mdi_x_enabled(void *data, size_t len, char *buf) +{ + struct e1000_adapter *adapter = data; + e1000_auto_x_mode mdix_mode = adapter->phy_info.mdix_mode; + sprintf(buf, + mdix_mode == e1000_auto_x_mode_manual_mdi ? "MDI" : + mdix_mode == e1000_auto_x_mode_manual_mdix ? "MDI-X" : + "Unknown"); + return buf; +} + +static char * +e1000_proc_rx_status(void *data, size_t len, char *buf) +{ + e1000_1000t_rx_status rx_status = *(e1000_1000t_rx_status *)data; + sprintf(buf, + rx_status == e1000_1000t_rx_status_not_ok ? "NOT_OK" : + rx_status == e1000_1000t_rx_status_ok ? "OK" : + "Unknown"); + return buf; +} + +/* + * e1000_proc_list_setup - build link list of proc praramters + * @adapter: board private structure + * + * Order matters - ethx.info entries are ordered in the order links + * are added to list. + */ + +#define LIST_ADD_F(T,D,F) \ + e1000_proc_list_add(proc_list_head, (T), (D), sizeof(*(D)), (F)) +#define LIST_ADD_BLANK() LIST_ADD_F("", NULL, NULL) +#define LIST_ADD_S(T,D) LIST_ADD_F((T), (D), e1000_proc_str) +#define LIST_ADD_H(T,D) LIST_ADD_F((T), (D), e1000_proc_hex) +#define LIST_ADD_U(T,D) LIST_ADD_F((T), (D), e1000_proc_unsigned) + +static void +e1000_proc_list_setup(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct list_head *proc_list_head = &adapter->proc_list_head; + + INIT_LIST_HEAD(proc_list_head); + + LIST_ADD_S("Description", adapter->id_string); + LIST_ADD_F("Part_Number", &adapter->part_num, e1000_proc_part_number); + LIST_ADD_S("Driver_Name", e1000_driver_name); + LIST_ADD_S("Driver_Version", e1000_driver_version); + LIST_ADD_H("PCI_Vendor", &hw->vendor_id); + LIST_ADD_H("PCI_Device_ID", &hw->device_id); + LIST_ADD_H("PCI_Subsystem_Vendor", &hw->subsystem_vendor_id); + LIST_ADD_H("PCI_Subsystem_ID", &hw->subsystem_id); + LIST_ADD_H("PCI_Revision_ID", &hw->revision_id); + LIST_ADD_U("PCI_Bus", &adapter->pdev->bus->number); + LIST_ADD_F("PCI_Slot", adapter, e1000_proc_slot); + + if(adapter->hw.mac_type >= e1000_82543) { + LIST_ADD_F("PCI_Bus_Type", + &hw->bus_type, e1000_proc_bus_type); + LIST_ADD_F("PCI_Bus_Speed", + &hw->bus_speed, e1000_proc_bus_speed); + LIST_ADD_F("PCI_Bus_Width", + &hw->bus_width, e1000_proc_bus_width); + } + + LIST_ADD_U("IRQ", &adapter->pdev->irq); + LIST_ADD_S("System_Device_Name", adapter->ifname); + LIST_ADD_F("Current_HWaddr", + adapter->netdev->dev_addr, e1000_proc_hwaddr); + LIST_ADD_F("Permanent_HWaddr", + adapter->hw.perm_mac_addr, e1000_proc_hwaddr); + + LIST_ADD_BLANK(); + + LIST_ADD_F("Link", adapter, e1000_proc_link); + LIST_ADD_F("Speed", &adapter->link_speed, e1000_proc_link_speed); + LIST_ADD_F("Duplex", &adapter->link_duplex, e1000_proc_link_duplex); + LIST_ADD_F("State", adapter, e1000_proc_state); + + LIST_ADD_BLANK(); + + /* Standard net device stats */ + LIST_ADD_U("Rx_Packets", &adapter->net_stats.rx_packets); + LIST_ADD_U("Tx_Packets", &adapter->net_stats.tx_packets); + LIST_ADD_U("Rx_Bytes", &adapter->net_stats.rx_bytes); + LIST_ADD_U("Tx_Bytes", &adapter->net_stats.tx_bytes); + LIST_ADD_U("Rx_Errors", &adapter->net_stats.rx_errors); + LIST_ADD_U("Tx_Errors", &adapter->net_stats.tx_errors); + LIST_ADD_U("Rx_Dropped", &adapter->net_stats.rx_dropped); + LIST_ADD_U("Tx_Dropped", &adapter->net_stats.tx_dropped); + + LIST_ADD_U("Multicast", &adapter->net_stats.multicast); + LIST_ADD_U("Collisions", &adapter->net_stats.collisions); + + LIST_ADD_U("Rx_Length_Errors", &adapter->net_stats.rx_length_errors); + LIST_ADD_U("Rx_Over_Errors", &adapter->net_stats.rx_over_errors); + LIST_ADD_U("Rx_CRC_Errors", &adapter->net_stats.rx_crc_errors); + LIST_ADD_U("Rx_Frame_Errors", &adapter->net_stats.rx_frame_errors); + LIST_ADD_U("Rx_FIFO_Errors", &adapter->net_stats.rx_fifo_errors); + LIST_ADD_U("Rx_Missed_Errors", &adapter->net_stats.rx_missed_errors); + + LIST_ADD_U("Tx_Aborted_Errors", &adapter->net_stats.tx_aborted_errors); + LIST_ADD_U("Tx_Carrier_Errors", &adapter->net_stats.tx_carrier_errors); + LIST_ADD_U("Tx_FIFO_Errors", &adapter->net_stats.tx_fifo_errors); + LIST_ADD_U("Tx_Heartbeat_Errors", + &adapter->net_stats.tx_heartbeat_errors); + LIST_ADD_U("Tx_Window_Errors", &adapter->net_stats.tx_window_errors); + + /* 8254x-specific stats */ + LIST_ADD_U("Tx_Abort_Late_Coll", &adapter->stats.latecol); + LIST_ADD_U("Tx_Deferred_Ok", &adapter->stats.dc); + LIST_ADD_U("Tx_Single_Coll_Ok", &adapter->stats.scc); + LIST_ADD_U("Tx_Multi_Coll_Ok", &adapter->stats.mcc); + LIST_ADD_U("Rx_Long_Length_Errors", &adapter->stats.roc); + LIST_ADD_U("Rx_Short_Length_Errors", &adapter->stats.ruc); + + /* The 82542 does not have an alignment error count register */ + if(adapter->hw.mac_type >= e1000_82543) + LIST_ADD_U("Rx_Align_Errors", &adapter->stats.algnerrc); + + LIST_ADD_U("Rx_Flow_Control_XON", &adapter->stats.xonrxc); + LIST_ADD_U("Rx_Flow_Control_XOFF", &adapter->stats.xoffrxc); + LIST_ADD_U("Tx_Flow_Control_XON", &adapter->stats.xontxc); + LIST_ADD_U("Tx_Flow_Control_XOFF", &adapter->stats.xofftxc); + LIST_ADD_U("Rx_CSum_Offload_Good", &adapter->hw_csum_good); + LIST_ADD_U("Rx_CSum_Offload_Errors", &adapter->hw_csum_err); + + LIST_ADD_BLANK(); + + /* Cable diags */ + LIST_ADD_F("PHY_Media_Type", adapter, e1000_proc_media_type); + if(adapter->hw.media_type == e1000_media_type_copper) { + LIST_ADD_F("PHY_Cable_Length", + adapter, e1000_proc_cable_length); + LIST_ADD_F("PHY_Extended_10Base_T_Distance", + adapter, e1000_proc_extended); + LIST_ADD_F("PHY_Cable_Polarity", + adapter, e1000_proc_cable_polarity); + LIST_ADD_F("PHY_Disable_Polarity_Correction", + adapter, e1000_proc_polarity_correction); + LIST_ADD_U("PHY_Idle_Errors", + &adapter->phy_stats.idle_errors); + LIST_ADD_U("PHY_Receive_Errors", + &adapter->phy_stats.receive_errors); + LIST_ADD_F("PHY_MDI_X_Enabled", + adapter, e1000_proc_mdi_x_enabled); + LIST_ADD_F("PHY_Local_Receiver_Status", + &adapter->phy_info.local_rx, + e1000_proc_rx_status); + LIST_ADD_F("PHY_Remote_Receiver_Status", + &adapter->phy_info.remote_rx, + e1000_proc_rx_status); + } + +} + +/* + * e1000_proc_dev_setup - create proc fs nodes and link list + * @adapter: board private structure + */ + +void +e1000_proc_dev_setup(struct e1000_adapter *adapter) +{ + e1000_proc_list_setup(adapter); + + e1000_proc_dirs_create(adapter, + adapter->ifname, + &adapter->proc_list_head); +} + +/* + * e1000_proc_dev_free - free proc fs nodes and link list + * @adapter: board private structure + */ + +void +e1000_proc_dev_free(struct e1000_adapter *adapter) +{ + e1000_proc_dirs_free(adapter->ifname, &adapter->proc_list_head); + + e1000_proc_list_free(&adapter->proc_list_head); +} + +#else /* CONFIG_PROC_FS */ + +void e1000_proc_dev_setup(struct e1000_adapter *adapter) {} +void e1000_proc_dev_free(struct e1000_adapter *adapter) {} + +#endif /* CONFIG_PROC_FS */ + diff -urP linux-2.4.19.orig/drivers/net/sk98lin/skge.c linux-2.4.19/drivers/net/sk98lin/skge.c --- linux-2.4.19.orig/drivers/net/sk98lin/skge.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/drivers/net/sk98lin/skge.c Wed Feb 12 12:09:01 2003 @@ -282,14 +282,15 @@ #define USE_TX_COMPLETE /* use interrupt moderation (for tx complete only) */ -// #define USE_INT_MOD +#define USE_INT_MOD #define INTS_PER_SEC 1000 /* * threshold for copying small receive frames * set to 0 to avoid copying, set to 9001 to copy all frames */ -#define SK_COPY_THRESHOLD 200 +/*#define SK_COPY_THRESHOLD 200*/ +#define SK_COPY_THRESHOLD 0 /* number of adapters that can be configured via command line params */ #define SK_MAX_CARD_PARAM 16 @@ -1900,13 +1901,13 @@ * freed ( -> ring completely free now). */ pTxPort->pTxdRingTail = pTxd; - netif_start_queue(pAC->dev[pTxPort->PortIndex]); + netif_wake_queue(pAC->dev[pTxPort->PortIndex]); return; } if (Control & TX_CTRL_OWN_BMU) { pTxPort->pTxdRingTail = pTxd; if (pTxPort->TxdRingFree > 0) { - netif_start_queue(pAC->dev[pTxPort->PortIndex]); + netif_wake_queue(pAC->dev[pTxPort->PortIndex]); } return; } diff -urP linux-2.4.19.orig/fs/proc/Makefile linux-2.4.19/fs/proc/Makefile --- linux-2.4.19.orig/fs/proc/Makefile Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/fs/proc/Makefile Wed Feb 12 12:09:01 2003 @@ -18,4 +18,8 @@ obj-y += proc_devtree.o endif +ifeq ($(CONFIG_WEB100_STATS),y) +obj-y += web100.o +endif + include $(TOPDIR)/Rules.make diff -urP linux-2.4.19.orig/fs/proc/root.c linux-2.4.19/fs/proc/root.c --- linux-2.4.19.orig/fs/proc/root.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/fs/proc/root.c Wed Feb 12 12:09:01 2003 @@ -68,6 +68,10 @@ proc_rtas_init(); #endif proc_bus = proc_mkdir("bus", 0); + +#ifdef CONFIG_WEB100_STATS + proc_web100_init(); +#endif } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) diff -urP linux-2.4.19.orig/fs/proc/web100.c linux-2.4.19/fs/proc/web100.c --- linux-2.4.19.orig/fs/proc/web100.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/fs/proc/web100.c Wed Feb 12 12:09:01 2003 @@ -0,0 +1,1265 @@ +/* + * fs/proc/web100.c + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * + * The Web 100 project. See http://www.web100.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +#define WEB100MIB_BLOCK_SIZE PAGE_SIZE - 1024 + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a,b) (((a) < (b)) ? (b) : (a)) +#endif + +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_wmem_max; + +struct proc_dir_entry *proc_web100_dir; +static struct proc_dir_entry *proc_web100_header; + + +/* + * Web100 variable reading/writing + */ + +enum web100_connection_inos { + PROC_CONN_SPEC_ASCII = 1, + PROC_CONN_SPEC, + PROC_CONN_READ, + PROC_CONN_TEST, + PROC_CONN_TUNE, + PROC_CONN_HIGH_INO /* Keep at the end */ +}; + +enum { + WEB100_TYPE_INTEGER = 0, + WEB100_TYPE_INTEGER32, + WEB100_TYPE_INET_ADDRESS_IPV4, + WEB100_TYPE_IP_ADDRESS = WEB100_TYPE_INET_ADDRESS_IPV4, /* Depricated */ + WEB100_TYPE_COUNTER32, + WEB100_TYPE_GAUGE32, + WEB100_TYPE_UNSIGNED32, + WEB100_TYPE_TIME_TICKS, + WEB100_TYPE_COUNTER64, + WEB100_TYPE_INET_PORT_NUMBER, + WEB100_TYPE_UNSIGNED16 = WEB100_TYPE_INET_PORT_NUMBER, /* Depricated */ + WEB100_TYPE_INET_ADDRESS, + WEB100_TYPE_INET_ADDRESS_IPV6, +}; + +struct web100_var; +typedef int (*web100_rwfunc_t)(void *buf, struct web100stats *stats, + struct web100_var *vp); + +/* The printed variable description should look something like this (in ASCII): + * varname offset type + * where offset is the offset into the file. + */ +struct web100_var { + char *name; + __u32 type; + int len; + + web100_rwfunc_t read; + unsigned long read_data; /* read handler-specific data */ + + web100_rwfunc_t write; + unsigned long write_data; /* write handler-specific data */ + + struct web100_var *next; +}; + +struct web100_file { + int len; + char *name; + int low_ino; + mode_t mode; + + struct web100_var *first_var; +}; + +#define F(name,ino,perm) { sizeof (name) - 1, (name), (ino), (perm), NULL } +static struct web100_file web100_file_arr[] = { + F("spec-ascii", PROC_CONN_SPEC_ASCII, S_IFREG | S_IRUGO), + F("spec", PROC_CONN_SPEC, S_IFREG | S_IRUGO), + F("read", PROC_CONN_READ, S_IFREG | S_IRUGO), + F("test", PROC_CONN_TEST, S_IFREG | S_IRUGO), + F("tune", PROC_CONN_TUNE, S_IFREG | S_IRUGO | S_IWUGO) }; +#undef F +#define WEB100_FILE_ARR_SIZE (sizeof (web100_file_arr) / sizeof (struct web100_file)) + +/* This works only if the array is built in the correct order. */ +static inline struct web100_file *web100_file_lookup(int ino) { + return &web100_file_arr[ino - 1]; +} + +static void add_var(struct web100_file *file, char *name, int type, + web100_rwfunc_t read, unsigned long read_data, + web100_rwfunc_t write, unsigned long write_data) +{ + struct web100_var *var; + + /* Again, assuming add_var is only called at init. */ + if ((var = kmalloc(sizeof (struct web100_var), GFP_KERNEL)) == NULL) + panic("No memory available for Web100 var.\n"); + + var->name = name; + var->type = type; + switch (type) { + case WEB100_TYPE_INET_PORT_NUMBER: + var->len = 2; + break; + case WEB100_TYPE_INTEGER: + case WEB100_TYPE_INTEGER32: + case WEB100_TYPE_COUNTER32: + case WEB100_TYPE_GAUGE32: + case WEB100_TYPE_UNSIGNED32: + case WEB100_TYPE_TIME_TICKS: + var->len = 4; + break; + case WEB100_TYPE_COUNTER64: + var->len = 8; + break; + case WEB100_TYPE_INET_ADDRESS: + var->len = 17; + break; + default: + printk("Web100: Warning: Adding variable of unknown type.\n"); + var->len = 0; + } + + var->read = read; + var->read_data = read_data; + + var->write = write; + var->write_data = write_data; + + var->next = file->first_var; + file->first_var = var; +} + + +/* + * proc filesystem routines + */ + +static struct inode *proc_web100_make_inode(struct super_block *sb, int ino) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = ino; + + inode->i_uid = 0; + inode->i_gid = 0; + +out: + return inode; +} + +static inline ino_t ino_from_cid(int cid) +{ + return (cid << 8) | 0x80000000; +} + +static inline ino_t ino_from_parts(ino_t dir_ino, __u16 low_ino) +{ + return (dir_ino & ~0xff) | low_ino; +} + +static inline int cid_from_ino(ino_t ino) +{ + return (ino & 0x7fffff00) >> 8; +} + +static inline int low_from_ino(ino_t ino) +{ + return ino & 0xff; +} + +static int connection_file_open(struct inode *inode, struct file *file) +{ + int cid = cid_from_ino(inode->i_ino); + struct web100stats *stats; + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + if (stats == NULL || stats->wc_dead) { + read_unlock_bh(&web100_linkage_lock); + return -ENOENT; + } + web100_stats_use(stats); + read_unlock_bh(&web100_linkage_lock); + + return 0; +} + +static int connection_file_release(struct inode *inode, struct file *file) +{ + int cid = cid_from_ino(inode->i_ino); + struct web100stats *stats; + + write_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + if (stats == NULL) { + read_unlock_bh(&web100_linkage_lock); + return -ENOENT; + } + web100_stats_unuse(stats); + write_unlock_bh(&web100_linkage_lock); + + return 0; +} + +/** /proc/web100// **/ +static ssize_t connection_file_rw(int read, struct file *file, + char *buf, size_t nbytes, loff_t *ppos) +{ + int low_ino = low_from_ino(file->f_dentry->d_inode->i_ino); + int cid = cid_from_ino(file->f_dentry->d_inode->i_ino); + struct web100stats *stats; + struct web100_file *fp; + struct web100_var *vp; + int pos; + int n; + int err; + web100_rwfunc_t rwfunc; + char *page; + + /* We're only going to let them read one page at a time. + * We shouldn't ever read more than a page, anyway, though. + */ + if (nbytes > PAGE_SIZE) + nbytes = PAGE_SIZE; + + if ((err = verify_area(read ? VERIFY_WRITE : VERIFY_READ, buf, nbytes)) < 0) + return err; + + if ((page = (char *)get_free_page(GFP_KERNEL)) == NULL) + return -ENOMEM; + + if (!read) { + if (copy_from_user(page, buf, nbytes)) + return -EFAULT; + } + + fp = web100_file_lookup(low_ino); + if (fp == NULL) { + printk("Unregistered Web100 file.\n"); + return 0; + } + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + read_unlock_bh(&web100_linkage_lock); + if (stats == NULL) + return -ENOENT; + + lock_sock(stats->wc_sk); + + /* TODO: seek in constant time, not linear. -JWH */ + pos = 0; + n = 0; + vp = fp->first_var; + while (vp && nbytes > n) { + if (pos > *ppos) { + err = -ESPIPE; + goto err_out; + } + if (pos == *ppos) { + if (vp->len > nbytes - n) + break; + + if (read) + rwfunc = vp->read; + else + rwfunc = vp->write; + if (rwfunc == NULL) { + err = -EACCES; + goto err_out; + } + + err = rwfunc(page + n, stats, vp); + + if (err < 0) + goto err_out; + n += vp->len; + *ppos += vp->len; + } + pos += vp->len; + vp = vp->next; + } + + release_sock(stats->wc_sk); + + if (read) { + if (copy_to_user(buf, page, n)) + return -EFAULT; + } + free_page((unsigned long)page); + + return n; + +err_out: + release_sock(stats->wc_sk); + + return err; +} + +static ssize_t connection_file_read(struct file *file, + char *buf, size_t nbytes, loff_t *ppos) +{ + return connection_file_rw(1, file, buf, nbytes, ppos); +} + +static ssize_t connection_file_write(struct file *file, + const char *buf, size_t nbytes, loff_t *ppos) +{ + return connection_file_rw(0, file, (char *)buf, nbytes, ppos); +} + +static struct file_operations connection_file_fops = { + open: connection_file_open, + release: connection_file_release, + read: connection_file_read, + write: connection_file_write +}; + + +static size_t v6addr_str(char *dest, short *addr) +{ + int start = -1, end = -1; + int i, j; + int pos; + + /* Find longest subsequence of 0's in addr */ + for (i = 0; i < 8; i++) { + if (addr[i] == 0) { + for (j = i + 1; addr[j] == 0 && j < 8; j++); + if (j - i > end - start) { + end = j; + start = i; + } + i = j; + } + } + if (end - start == 1) + start = -1; + + pos = 0; + for (i = 0; i < 8; i++) { + if (i > 0) + pos += sprintf(dest + pos, ":"); + if (i == start) { + pos += sprintf(dest + pos, ":"); + i += end - start - 1; + } else { + pos += sprintf(dest + pos, "%hx", ntohs(addr[i])); + } + } + + return pos; +} + +/** /proc/web100//spec_ascii **/ +static ssize_t connection_spec_ascii_read(struct file * file, char * buf, + size_t nbytes, loff_t *ppos) +{ + __u32 local_addr, remote_addr; + __u16 local_port, remote_port; + int cid; + struct web100stats *stats; + struct web100directs *vars; + char tmpbuf[100]; + int len = 0; + + if (*ppos != 0) + return 0; + + cid = cid_from_ino(file->f_dentry->d_parent->d_inode->i_ino); + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + read_unlock_bh(&web100_linkage_lock); + if (stats == NULL) + return -ENOENT; + vars = &stats->wc_vars; + + if (vars->LocalAddressType == WC_ADDRTYPE_IPV4) { + /* These values should not change while stats are linked. + * We don't need to lock the sock. */ + local_addr = vars->LocalAddress.v4addr; + remote_addr = vars->RemAddress.v4addr; + local_port = vars->LocalPort; + remote_port = vars->RemPort; + + len = sprintf(tmpbuf, "%d.%d.%d.%d:%d %d.%d.%d.%d:%d\n", + local_addr & 0xff, + (local_addr >> 8) & 0xff, + (local_addr >> 16) & 0xff, + (local_addr >> 24) & 0xff, + local_port, + remote_addr & 0xff, + (remote_addr >> 8) & 0xff, + (remote_addr >> 16) & 0xff, + (remote_addr >> 24) & 0xff, + remote_port); + } else if (vars->LocalAddressType == WC_ADDRTYPE_IPV6) { + local_port = vars->LocalPort; + remote_port = vars->RemPort; + + len += v6addr_str(tmpbuf + len, (short *)&vars->LocalAddress.v6addr.addr); + len += sprintf(tmpbuf + len, ".%d ", local_port); + len += v6addr_str(tmpbuf + len, (short *)&vars->RemAddress.v6addr.addr); + len += sprintf(tmpbuf + len, ".%d\n", remote_port); + } else { + printk(KERN_ERR "connection_spec_ascii_read: LocalAddressType invalid\n"); + return 0; + } + + len = len > nbytes ? nbytes : len; + if (copy_to_user(buf, tmpbuf, len)) + return -EFAULT; + *ppos += len; + return len; +} + +static struct file_operations connection_spec_ascii_fops = { + open: connection_file_open, + release: connection_file_release, + read: connection_spec_ascii_read +}; + + +/** /proc/web100// **/ +static int connection_dir_readdir(struct file *filp, + void *dirent, filldir_t filldir) +{ + int i; + struct inode *inode = filp->f_dentry->d_inode; + struct web100_file *p; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, proc_web100_dir->low_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + if (i >= WEB100_FILE_ARR_SIZE) + return 1; + p = &web100_file_arr[i]; + while (p->name) { + if (filldir(dirent, p->name, p->len, filp->f_pos, + ino_from_parts(inode->i_ino, p->low_ino), + p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + + return 1; +} + +static struct dentry *connection_dir_lookup(struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode; + struct web100_file *p; + + inode = NULL; + for (p = &web100_file_arr[0]; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + return ERR_PTR(-ENOENT); + + inode = proc_web100_make_inode(dir->i_sb, ino_from_parts(dir->i_ino, p->low_ino)); + if (!inode) + return ERR_PTR(-EINVAL); /* ? */ + inode->i_mode = p->mode; + + switch (p->low_ino) { + case PROC_CONN_SPEC_ASCII: + inode->i_fop = &connection_spec_ascii_fops; + break; + case PROC_CONN_SPEC: + case PROC_CONN_READ: + case PROC_CONN_TEST: + case PROC_CONN_TUNE: + inode->i_fop = &connection_file_fops; + break; + default: + printk("Web100: impossible type (%d)\n", p->low_ino); + iput(inode); + return ERR_PTR(-EINVAL); + } + + d_add(dentry, inode); + return NULL; +} + +static struct inode_operations connection_dir_iops = { + lookup: connection_dir_lookup +}; + +static struct file_operations connection_dir_fops = { + readdir: connection_dir_readdir +}; + + +/** /proc/web100/header **/ +static ssize_t header_read(struct file * file, char * buf, + size_t nbytes, loff_t *ppos) +{ + int len = 0; + int offset; + char *tmpbuf; + struct web100_file *fp; + struct web100_var *vp; + int n, tmp; + int i; + int ret = 0; + + /* We will assume the variable description list will not change + * after init. (True at least right now.) Otherwise, we would have + * to have a lock on it. + */ + + if ((tmpbuf = (char *)get_free_page(GFP_KERNEL)) == NULL) + return -ENOMEM; + + offset = sprintf(tmpbuf, "%s\n", web100_version_string); + + for (i = 0; i < WEB100_FILE_ARR_SIZE; i++) { + int file_offset = 0; + + if ((fp = &web100_file_arr[i]) == NULL) + continue; + + if (fp->first_var == NULL) + continue; + + offset += sprintf(tmpbuf + offset, "\n/%s\n", fp->name); + + vp = fp->first_var; + while (vp) { + if (offset > WEB100MIB_BLOCK_SIZE) { + len += offset; + if (*ppos < len) { + n = MIN(offset, MIN(nbytes, len - *ppos)); + if (copy_to_user(buf, tmpbuf + MAX((*ppos - (len - offset)), 0), n)) + return -EFAULT; + buf += n; + if (nbytes == n) { + *ppos += n; + ret = n; + goto out; + } + } + offset = 0; + } + + offset += sprintf(tmpbuf + offset, "%s %d %d %d\n", + vp->name, file_offset, vp->type, vp->len); + file_offset += vp->len; + + vp = vp->next; + } + } + len += offset; + if (*ppos < len) { + n = MIN(offset, MIN(nbytes, len - *ppos)); + if (copy_to_user(buf, tmpbuf + MAX((*ppos - (len - offset)), 0), n)) + return -EFAULT; + if (nbytes <= len - *ppos) { + *ppos += nbytes; + ret = nbytes; + goto out; + } else { + tmp = len - *ppos; + *ppos = len; + ret = tmp; + goto out; + } + } + +out: + free_page((unsigned long)tmpbuf); + return ret; +} + +static struct file_operations header_file_operations = { + read: header_read +}; + + +/** /proc/web100/ **/ +#define FIRST_CONNECTION_ENTRY 256 +#define NUMBUF_LEN 10 + +static int get_connection_list(int pos, int *cids) +{ + struct web100stats *stats; + int n; + + pos -= FIRST_CONNECTION_ENTRY; + n = 0; + + read_lock_bh(&web100_linkage_lock); + + stats = web100stats_first; + while (stats) { + if (!stats->wc_dead) { + if (pos <= 0) + cids[n++] = stats->wc_cid; + else + pos--; + } + + stats = stats->wc_next; + } + + read_unlock_bh(&web100_linkage_lock); + + return n; +} + +static int web100_dir_readdir(struct file *filp, + void *dirent, filldir_t filldir) +{ + int err; + unsigned n, i; + int *cids; + + if (filp->f_pos < FIRST_CONNECTION_ENTRY) { + if ((err = proc_readdir(filp, dirent, filldir)) < 0) + return err; + filp->f_pos = FIRST_CONNECTION_ENTRY; + } + + if ((cids = kmalloc(WEB100_MAX_CONNS * sizeof (int), GFP_KERNEL)) == NULL) + return -ENOMEM; + + n = get_connection_list(filp->f_pos, cids); + + for (i = 0; i < n; i++) { + int cid = cids[i]; + unsigned long j = NUMBUF_LEN; + ino_t ino = ino_from_cid(cid); + char buf[NUMBUF_LEN]; + + do { + j--; + buf[j] = '0' + (cid % 10); + cid /= 10; + } while (cid); + + if (filldir(dirent, buf + j, NUMBUF_LEN - j, filp->f_pos, + ino, DT_DIR) < 0) { + break; + } + filp->f_pos++; + } + + kfree(cids); + + return 0; +} + +static struct dentry *web100_dir_lookup(struct inode *dir, + struct dentry *dentry) +{ + char *name; + int len; + int cid; + unsigned c; + struct inode *inode; + unsigned long ino; + struct web100stats *stats; + + if (proc_lookup(dir, dentry) == NULL) + return NULL; + + cid = 0; + name = (char *)(dentry->d_name.name); + len = dentry->d_name.len; + if (len <= 0) /* I don't think this can happen */ + return ERR_PTR(-EINVAL); + while (len-- > 0) { + c = *name - '0'; + name++; + cid *= 10; + cid += c; + if (c > 9 || c < 0 || (cid == 0 && len != 0) || cid >= WEB100_MAX_CONNS) { + cid = -1; + break; + } + } + if (cid < 0) + return ERR_PTR(-ENOENT); + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + if (stats == NULL || stats->wc_dead) { + read_unlock_bh(&web100_linkage_lock); + return ERR_PTR(-ENOENT); + } + read_unlock_bh(&web100_linkage_lock); + + ino = ino_from_cid(cid); + inode = proc_web100_make_inode(dir->i_sb, ino); + if (inode == NULL) { + return ERR_PTR(-EINVAL); + } + inode->i_nlink = 2; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_flags |= S_IMMUTABLE; /* ? */ + inode->i_op = &connection_dir_iops; + inode->i_fop = &connection_dir_fops; + + d_add(dentry, inode); + return NULL; +} + +static struct file_operations web100_dir_fops = { + readdir: web100_dir_readdir +}; + +static struct inode_operations web100_dir_iops = { + lookup: web100_dir_lookup +}; + + +/* + * Read/write handlers + */ + +/* A read handler for reading directly from the stats */ +/* read_data is the byte offset into struct web100stats */ +static int read_stats(void *buf, struct web100stats *stats, + struct web100_var *vp) +{ + memcpy(buf, (char *)stats + vp->read_data, vp->len); + + return 0; +} + +/* A write handler for writing directly to the stats */ +/* write_data is a byte offset into struct web100stats */ +static int write_stats(void *buf, struct web100stats *stats, + struct web100_var *vp) +{ + memcpy((char *)stats + vp->read_data, buf, vp->len); + + return 0; +} + +int read_LimCwnd(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + struct tcp_opt *tp = &stats->wc_sk->tp_pinfo.af_tcp; + + *(__u32 *)buf = (__u32)(tp->snd_cwnd_clamp * tp->mss_cache); + + return 0; +} + +int write_LimCwnd(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + struct tcp_opt *tp = &stats->wc_sk->tp_pinfo.af_tcp; + + tp->snd_cwnd_clamp = min(*(__u32 *)buf / tp->mss_cache, 65535U); + + return 0; +} + +int write_LimRwin(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + __u32 val = *(__u32 *)buf; + struct tcp_opt *tp = &stats->wc_sk->tp_pinfo.af_tcp; + + stats->wc_vars.LimRwin = tp->window_clamp = + min(val, 65535U << tp->rcv_wscale); + + return 0; +} + +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; + +int write_SBufMode(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + __u32 val = *(__u32 *)buf; + struct sock *sk = stats->wc_sk; + + switch (val) { + case WC_BUFMODE_OS: + sk->userlocks &= ~SOCK_SNDBUF_LOCK; + break; + case WC_BUFMODE_WEB100: + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = sysctl_wmem_default; + sk->write_space(sk); + break; + default: + return 1; + } + stats->wc_vars.X_SBufMode = val; + + return 0; +} + +int write_RBufMode(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + __u32 val = *(__u32 *)buf; + struct sock *sk = stats->wc_sk; + + switch (val) { + case WC_BUFMODE_OS: + sk->userlocks &= ~SOCK_RCVBUF_LOCK; + stats->wc_vars.LimRwin = sk->tp_pinfo.af_tcp.window_clamp; + break; + case WC_BUFMODE_WEB100: + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = sysctl_rmem_default; + stats->wc_vars.LimRwin = 0xffffffff; + break; + default: + return 1; + } + stats->wc_vars.X_RBufMode = val; + + return 0; +} + +/* A read handler for reading directly from the sk */ +/* read_data is a byte offset into the sk */ +static int read_sk(void *buf, struct web100stats *stats, + struct web100_var *vp) +{ + /* Fill data with 0's if the connection is gone. */ + if (stats->wc_sk == NULL) + memset(buf, 0, vp->len); + else + memcpy(buf, (char *)(stats->wc_sk) + vp->read_data, vp->len); + + return 0; +} + +static int write_sk(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + if (stats->wc_sk == NULL) + return 1; + else + memcpy((char *)(stats->wc_sk) + vp->write_data, buf, vp->len); + + return 0; +} + +#ifdef HAVE_MONO_TIME +extern void get_mono_time(__u64 *time); +#endif + +/* clean the clock */ +__u64 web100_mono_time() +{ +#ifdef HAVE_MONO_TIME + __u64 time; + get_mono_time(&time); + return time; +#else + struct timeval now; + static struct timeval before; + + do_gettimeofday(&now); + + /* assure monotonic, no matter what */ + if ((now.tv_sec > before.tv_sec) || + ((now.tv_sec == before.tv_sec) && (now.tv_usec > before.tv_usec))) { + before = now; + } else { + before.tv_usec++; + if (before.tv_usec >= 1000000) { + before.tv_usec -= 1000000; + before.tv_sec++; + } + } + + return (1000000ULL * (__u64)before.tv_sec + before.tv_usec); +#endif +} + +/* A read handler to get the low part of the current time in usec */ +static int read_now(void *buf, struct web100stats *stats, + struct web100_var *vp) +{ + __u64 val; + + val = web100_mono_time(); + val -= stats->wc_start_monotime; + memcpy(buf, (char *)&val, vp->len); + + return 0; +} + +#ifdef CONFIG_WEB100_NET100 +static int write_mss(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + struct sock *sk = stats->wc_sk; + struct tcp_opt *tp; + __u32 val = *(__u32 *)buf; + + if (sk == NULL) + return 1; + tp = &sk->tp_pinfo.af_tcp; + + if (val > tp->mss_cache) + return 1; + if (val < 1) + return 1; + + tp->mss_cache = val; + web100_update_mss(tp); + + return 0; +} +#endif + +static int write_sndbuf(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + (__u32)(stats->wc_sk->sndbuf) = *(__u32 *)buf; + + return write_LimCwnd(buf, stats, vp); +} + +static int write_rcvbuf(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + (__u32)(stats->wc_sk->rcvbuf) = *(__u32 *)buf; + + return write_LimRwin(buf, stats, vp); +} + +static int rw_noop(void *buf, struct web100stats *stats, struct web100_var *vp) +{ + return 0; +} + +/* + * init + */ + +void __init proc_web100_init(void) +{ + /* Set up the proc files. */ + proc_web100_dir = proc_mkdir("web100", NULL); + proc_web100_dir->proc_iops = &web100_dir_iops; + proc_web100_dir->proc_fops = &web100_dir_fops; + + proc_web100_header = create_proc_entry("header", S_IFREG | S_IRUGO, + proc_web100_dir); + proc_web100_header->proc_fops = &header_file_operations; + + /* Set up the contents of the proc files. */ +#define OFFSET_IN(type,var) ((unsigned long)(&(((type *)NULL)->var))) +#define OFFSET_ST(field) ((unsigned long)(&(((struct web100stats *)NULL)->wc_vars.field))) +#define OFFSET_SK(field) ((unsigned long)(&(((struct sock *)NULL)->field))) + +#define ADD_RO_STATSVAR(ino,name,type) \ +add_var(web100_file_lookup(ino), #name, type, \ + read_stats, OFFSET_ST(name), NULL, 0) + +#define ADD_RO_STATSRENAME(ino,name,type,var) \ +add_var(web100_file_lookup(ino), name, type, \ + read_stats, OFFSET_ST(var), NULL, 0) + +#define ADD_RO_STATSVAR_DEP(ino,name,type) \ +add_var(web100_file_lookup(ino), "_" #name, type, \ + read_stats, OFFSET_ST(name), NULL, 0) + +#define ADD_WO_STATSVAR(ino,name,type) \ +add_var(web100_file_lookup(ino), #name, type, NULL, 0, \ + write_stats, OFFSET_ST(name)) + +#define ADD_WO_STATSVAR_DEP(ino,name,type) \ +add_var(web100_file_lookup(ino), "_" #name, type, NULL, 0, \ + write_stats, OFFSET_ST(name)) + +#define ADD_RW_STATSVAR(ino,name,type) \ +add_var(web100_file_lookup(ino), #name, type, \ + read_stats, OFFSET_ST(name), \ + write_stats, OFFSET_ST(name)) + +#define ADD_RW_STATSVAR_DEP(ino,name,type) \ +add_var(web100_file_lookup(ino), "_" #name, type, \ + read_stats, OFFSET_ST(name), \ + write_stats, OFFSET_ST(name)) + +#define ADD_RO_SKVAR(ino,name,type,var) \ +add_var(web100_file_lookup(ino), #name, type, \ + read_sk, OFFSET_SK(var), NULL, 0) + +#define ADD_RW_SKVAR(ino,name,type,var) \ +add_var(web100_file_lookup(ino), #name, type, \ + read_sk, OFFSET_SK(var), write_sk, OFFSET_SK(var)) + +#define ADD_NOOP(ino,name,type) \ +add_var(web100_file_lookup(ino), #name, type, \ + rw_noop, 0, rw_noop, 0) + + /* spec */ + ADD_RO_STATSVAR(PROC_CONN_SPEC, LocalAddressType, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_SPEC, LocalAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSVAR(PROC_CONN_SPEC, LocalPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSVAR(PROC_CONN_SPEC, RemAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSVAR(PROC_CONN_SPEC, RemPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSRENAME(PROC_CONN_SPEC, "_RemoteAddress", WEB100_TYPE_INET_ADDRESS, RemAddress); + ADD_RO_STATSRENAME(PROC_CONN_SPEC, "_RemotePort", WEB100_TYPE_INET_PORT_NUMBER, RemPort); + + /* read */ + /* STATE */ + ADD_RO_STATSVAR(PROC_CONN_READ, State, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, SACKEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, TimestampsEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, NagleEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, ECNEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, SndWinScale, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, RcvWinScale, WEB100_TYPE_INTEGER); + + /* SYN OPTIONS */ + ADD_RO_STATSVAR(PROC_CONN_READ, ActiveOpen, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, MSSRcvd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, WinScaleRcvd, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, WinScaleSent, WEB100_TYPE_INTEGER); + + /* DATA */ + ADD_RO_STATSVAR(PROC_CONN_READ, PktsOut, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataPktsOut, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, AckPktsOut, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataBytesOut, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, PktsIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataPktsIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, AckPktsIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataBytesIn, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, SndUna, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SndNxt, WEB100_TYPE_INTEGER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SndMax, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_snd_una", WEB100_TYPE_COUNTER32, SndUna); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_snd_nxt", WEB100_TYPE_INTEGER32, SndNxt); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_snd_max", WEB100_TYPE_COUNTER32, SndMax); + ADD_RO_STATSVAR(PROC_CONN_READ, ThruBytesAcked, WEB100_TYPE_COUNTER64); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_ThruBytesSent", WEB100_TYPE_COUNTER64, ThruBytesAcked); + ADD_RO_STATSVAR(PROC_CONN_READ, SndISS, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, SendWraps, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, RcvNxt, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_rcv_nxt", WEB100_TYPE_COUNTER32, RcvNxt); + ADD_RO_STATSVAR(PROC_CONN_READ, ThruBytesReceived, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, RecvISS, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, RecvWraps, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, StartTime, WEB100_TYPE_INTEGER32); + ADD_RO_STATSVAR(PROC_CONN_READ, StartTimeSec, WEB100_TYPE_INTEGER32); + ADD_RO_STATSVAR(PROC_CONN_READ, StartTimeUsec, WEB100_TYPE_INTEGER32); + add_var(web100_file_lookup(PROC_CONN_READ), "Duration", WEB100_TYPE_COUNTER64, read_now, 0, NULL, 0); + add_var(web100_file_lookup(PROC_CONN_READ), "_CurrTime", WEB100_TYPE_COUNTER64, read_now, 0, NULL, 0); + + /* SENDER CONGESTION */ + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTransSender", WEB100_TYPE_COUNTER32, SndLimTrans[WC_SNDLIM_SENDER]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimBytesSender", WEB100_TYPE_COUNTER64, SndLimBytes[WC_SNDLIM_SENDER]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTimeSender", WEB100_TYPE_COUNTER32, SndLimTime[WC_SNDLIM_SENDER]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTransCwnd", WEB100_TYPE_COUNTER32, SndLimTrans[WC_SNDLIM_CWND]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimBytesCwnd", WEB100_TYPE_COUNTER64, SndLimBytes[WC_SNDLIM_CWND]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTimeCwnd", WEB100_TYPE_COUNTER32, SndLimTime[WC_SNDLIM_CWND]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTransRwin", WEB100_TYPE_COUNTER32, SndLimTrans[WC_SNDLIM_RWIN]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimBytesRwin", WEB100_TYPE_COUNTER64, SndLimBytes[WC_SNDLIM_RWIN]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTimeRwin", WEB100_TYPE_COUNTER32, SndLimTime[WC_SNDLIM_RWIN]); + ADD_RO_STATSVAR(PROC_CONN_READ, SlowStart, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CongAvoid, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CongestionSignals, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, OtherReductions, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CongestionOverCount, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_Recoveries", WEB100_TYPE_COUNTER32, CongestionSignals); + ADD_RO_STATSVAR(PROC_CONN_READ, CurCwnd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentCwnd", WEB100_TYPE_GAUGE32, CurCwnd); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxCwnd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurSsthresh, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentSsthresh", WEB100_TYPE_GAUGE32, CurSsthresh); + add_var(web100_file_lookup(PROC_CONN_READ), "LimCwnd", WEB100_TYPE_GAUGE32, read_LimCwnd, 0, NULL, 0); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxSsthresh, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinSsthresh, WEB100_TYPE_GAUGE32); + + /* SENDER PATH MODEL */ + ADD_RO_STATSVAR(PROC_CONN_READ, FastRetran, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, Timeouts, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SubsequentTimeouts, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurTimeoutCount, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrTimeoutCount", WEB100_TYPE_GAUGE32, CurTimeoutCount); + ADD_RO_STATSVAR(PROC_CONN_READ, AbruptTimeouts, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PktsRetrans, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, BytesRetrans, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DupAcksIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SACKsRcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SACKBlocksRcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PreCongSumCwnd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_SumCwndAtCong", WEB100_TYPE_COUNTER32, PreCongSumCwnd); + ADD_RO_STATSVAR(PROC_CONN_READ, PreCongSumRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, PreCongCountRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PostCongSumRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PostCongCountRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, ECERcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SendStall, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, QuenchRcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, RetranThresh, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, NonRecovDA, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, AckAfterFR, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DSACKDups, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SampleRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_SampledRTT", WEB100_TYPE_GAUGE32, SampleRTT); + ADD_RO_STATSVAR(PROC_CONN_READ, SmoothedRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, RTTVar, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, SumRTT, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, CountRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurRTO, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentRTO", WEB100_TYPE_GAUGE32, CurRTO); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRTO, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRTO, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurMSS, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentMSS", WEB100_TYPE_GAUGE32, CurMSS); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxMSS, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinMSS, WEB100_TYPE_GAUGE32); + + /* SENDER BUFFER */ +#define PROC_CONN_XTEST PROC_CONN_READ /* lazy */ + ADD_RO_SKVAR(PROC_CONN_READ, _Sndbuf, WEB100_TYPE_GAUGE32, sndbuf); + ADD_RO_STATSVAR(PROC_CONN_READ, CurRetxQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurRetranQueue", WEB100_TYPE_GAUGE32, CurRetxQueue); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRetxQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_MaxRetranQueue", WEB100_TYPE_GAUGE32, MaxRetxQueue); + ADD_RO_STATSVAR(PROC_CONN_READ, CurAppWQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxAppWQueue, WEB100_TYPE_GAUGE32); + + /* SENDER BUFFER TUNING - See below */ + + /* LOCAL RECEIVER */ + ADD_RO_STATSVAR(PROC_CONN_READ, CurRwinSent, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentRwinSent", WEB100_TYPE_GAUGE32, CurRwinSent); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRwinSent, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRwinSent, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, LimRwin, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, DupAcksOut, WEB100_TYPE_COUNTER32); + ADD_RO_SKVAR(PROC_CONN_READ, _Rcvbuf, WEB100_TYPE_COUNTER32, rcvbuf); + ADD_RO_STATSVAR(PROC_CONN_READ, CurReasmQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxReasmQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurAppRQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxAppRQueue, WEB100_TYPE_GAUGE32); + ADD_RO_SKVAR(PROC_CONN_XTEST, X_rcv_ssthresh, WEB100_TYPE_GAUGE32, tp_pinfo.af_tcp.rcv_ssthresh); + ADD_RO_SKVAR(PROC_CONN_XTEST, X_wnd_clamp, WEB100_TYPE_GAUGE32, tp_pinfo.af_tcp.window_clamp); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg1, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg2, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg3, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg4, WEB100_TYPE_GAUGE32); + + /* OBSERVED RECEIVER */ + ADD_RO_STATSVAR(PROC_CONN_READ, CurRwinRcvd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentRwinRcvd", WEB100_TYPE_GAUGE32, CurRwinRcvd); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRwinRcvd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRwinRcvd, WEB100_TYPE_GAUGE32); + + /* CONNECTION ID */ + ADD_RO_STATSVAR(PROC_CONN_READ, LocalAddressType, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, LocalAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSVAR(PROC_CONN_READ, LocalPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSVAR(PROC_CONN_READ, RemAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_RemoteAddress", WEB100_TYPE_INET_ADDRESS, RemAddress); + ADD_RO_STATSVAR(PROC_CONN_READ, RemPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_RemotePort", WEB100_TYPE_INET_PORT_NUMBER, RemPort); + + ADD_RO_STATSVAR(PROC_CONN_READ, X_RcvRTT, WEB100_TYPE_GAUGE32); + + /* tune */ + add_var(web100_file_lookup(PROC_CONN_TUNE), "LimCwnd", + WEB100_TYPE_GAUGE32, read_LimCwnd, 0, + write_LimCwnd, 0); + add_var(web100_file_lookup(PROC_CONN_TUNE), "LimRwin", + WEB100_TYPE_GAUGE32, read_stats, OFFSET_ST(LimRwin), + write_LimRwin, 0); +#ifdef CONFIG_WEB100_NET100 + add_var(web100_file_lookup(PROC_CONN_TUNE), "CurMSS", + WEB100_TYPE_GAUGE32, read_stats, OFFSET_ST(CurMSS), + write_mss, 0); +#endif + add_var(web100_file_lookup(PROC_CONN_TUNE), "X_SBufMode", + WEB100_TYPE_INTEGER, read_stats, OFFSET_ST(X_SBufMode), + write_SBufMode, 0); + add_var(web100_file_lookup(PROC_CONN_TUNE), "X_RBufMode", + WEB100_TYPE_INTEGER, read_stats, OFFSET_ST(X_RBufMode), + write_RBufMode, 0); + + ADD_RW_SKVAR(PROC_CONN_TUNE, X_Sndbuf, WEB100_TYPE_GAUGE32, sndbuf); + ADD_RW_SKVAR(PROC_CONN_TUNE, X_Rcvbuf, WEB100_TYPE_GAUGE32, rcvbuf); + + ADD_NOOP(PROC_CONN_TUNE, _STuneMode, WEB100_TYPE_INTEGER); + ADD_RO_SKVAR(PROC_CONN_TUNE, _SndbufGet, WEB100_TYPE_GAUGE32, sndbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_SndbufSet", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(sndbuf), + write_sndbuf, 0); + ADD_RW_SKVAR(PROC_CONN_TUNE, _SAppBuf, WEB100_TYPE_GAUGE32, sndbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_SMaxWinBuf", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(sndbuf), + write_sndbuf, 0); + ADD_NOOP(PROC_CONN_TUNE, _SXtra, WEB100_TYPE_GAUGE32); + ADD_NOOP(PROC_CONN_TUNE, _STuneErr, WEB100_TYPE_INTEGER); + + ADD_NOOP(PROC_CONN_TUNE, _RTuneMode, WEB100_TYPE_INTEGER); + ADD_RO_SKVAR(PROC_CONN_TUNE, _RcvbufGet, WEB100_TYPE_GAUGE32, rcvbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_RcvbufSet", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(rcvbuf), + write_rcvbuf, 0); + ADD_RW_SKVAR(PROC_CONN_TUNE, _RAppBuf, WEB100_TYPE_GAUGE32, rcvbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_RMaxWinBuf", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(rcvbuf), + write_rcvbuf, 0); + ADD_NOOP(PROC_CONN_TUNE, _RXtra, WEB100_TYPE_GAUGE32); + ADD_NOOP(PROC_CONN_TUNE, _RTuneErr, WEB100_TYPE_INTEGER); + +#ifdef CONFIG_WEB100_NET100 + ADD_RW_STATSVAR(PROC_CONN_TUNE, WAD_IFQ, WEB100_TYPE_GAUGE32); + ADD_RW_STATSVAR(PROC_CONN_TUNE, WAD_MaxBurst, WEB100_TYPE_GAUGE32); +#endif +} diff -urP linux-2.4.19.orig/include/asm-alpha/timex.h linux-2.4.19/include/asm-alpha/timex.h --- linux-2.4.19.orig/include/asm-alpha/timex.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/asm-alpha/timex.h Wed Feb 12 12:09:01 2003 @@ -17,6 +17,10 @@ * which isn't an evil thing. */ +#ifdef CONFIG_WEB100_STATS +#define HAVE_MONO_TIME 1 +#endif + typedef unsigned int cycles_t; extern cycles_t cacheflush_time; diff -urP linux-2.4.19.orig/include/asm-i386/timex.h linux-2.4.19/include/asm-i386/timex.h --- linux-2.4.19.orig/include/asm-i386/timex.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/asm-i386/timex.h Wed Feb 12 12:09:01 2003 @@ -20,6 +20,10 @@ (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ << (SHIFT_SCALE-SHIFT_HZ)) / HZ) +#ifdef CONFIG_WEB100_STATS +#define HAVE_MONOT_TIME 1 +#endif + /* * Standard way to access the cycle counter on i586+ CPUs. * Currently only used on SMP. diff -urP linux-2.4.19.orig/include/linux/netlink.h linux-2.4.19/include/linux/netlink.h --- linux-2.4.19.orig/include/linux/netlink.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/linux/netlink.h Wed Feb 12 12:09:01 2003 @@ -8,6 +8,9 @@ #define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_ARPD 8 +#ifdef CONFIG_WEB100_STATS +#define NETLINK_WEB100 10 +#endif #define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ diff -urP linux-2.4.19.orig/include/linux/proc_fs.h linux-2.4.19/include/linux/proc_fs.h --- linux-2.4.19.orig/include/linux/proc_fs.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/linux/proc_fs.h Wed Feb 12 12:09:01 2003 @@ -86,6 +86,10 @@ extern void proc_root_init(void); extern void proc_misc_init(void); +#ifdef CONFIG_WEB100_STATS +extern void proc_web100_init(void); +#endif + struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry); void proc_pid_delete_inode(struct inode *inode); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); diff -urP linux-2.4.19.orig/include/linux/sysctl.h linux-2.4.19/include/linux/sysctl.h --- linux-2.4.19.orig/include/linux/sysctl.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/linux/sysctl.h Wed Feb 12 12:09:01 2003 @@ -291,7 +291,21 @@ NET_IPV4_NONLOCAL_BIND=88, NET_IPV4_ICMP_RATELIMIT=89, NET_IPV4_ICMP_RATEMASK=90, - NET_TCP_TW_REUSE=91 + NET_TCP_TW_REUSE=91, +#ifdef CONFIG_WEB100 + NET_IPV4_WEB100_DEFAULT_WSCALE, +#endif +#ifdef CONFIG_WEB100_NET100 + NET_IPV4_WEB100_NO_METRICS_SAVE, + NET_IPV4_WAD_IFQ, + NET_IPV4_WAD_MAX_BURST, +#endif +#ifdef CONFIG_WEB100_STATS + NET_IPV4_WEB100_SBUFMODE, + NET_IPV4_WEB100_RBUFMODE, +#endif + NET_TCP_MODERATE_ON_TXQ=98, + NET_TCP_ALTAIMD=99 }; enum { diff -urP linux-2.4.19.orig/include/net/sock.h linux-2.4.19/include/net/sock.h --- linux-2.4.19.orig/include/net/sock.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/net/sock.h Wed Feb 12 12:09:01 2003 @@ -103,6 +103,8 @@ #include #endif +#include + #include #include @@ -316,7 +318,6 @@ __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ - /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ @@ -418,6 +419,20 @@ int linger2; unsigned long last_synq_overflow; + + /* CONFIG_WEB100_STATS */ + /* For storing Web100 protocol-specific instrument data */ + struct web100stats *tcp_stats; + + __u32 rcv_rtt; + __u32 rcv_rtt_seq; + unsigned long rcv_rtt_time; + __u32 rcv_winest_seq; + unsigned long rcv_winest_time; + + int rcv_space; + int rcv_alloc; + int ofo_hi_seq; }; @@ -586,7 +601,7 @@ #endif /* CONFIG_SPX */ } tp_pinfo; - + int err, err_soft; /* Soft holds errors that don't cause failure but are the cause of a persistent failure not just @@ -658,7 +673,6 @@ #endif } protinfo; - /* This part is used for the timeout functions. */ struct timer_list timer; /* This is the sock cleanup timer. */ struct timeval stamp; @@ -678,6 +692,8 @@ int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); void (*destruct)(struct sock *sk); + + int retx_alloc; }; /* The per-socket spinlock must be held here. */ @@ -802,6 +818,7 @@ /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->lock.slock)) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->lock.slock)) + extern struct sock * sk_alloc(int family, int priority, int zero_it); extern void sk_free(struct sock *sk); diff -urP linux-2.4.19.orig/include/net/tcp.h linux-2.4.19/include/net/tcp.h --- linux-2.4.19.orig/include/net/tcp.h Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/include/net/tcp.h Wed Feb 12 12:09:01 2003 @@ -30,6 +30,10 @@ #include #include +#ifdef CONFIG_WEB100_STATS +#include +#endif + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -147,6 +151,15 @@ return (lport & (tcp_bhash_size - 1)); } +#ifdef CONFIG_ALTAIMD +extern int sysctl_tcp_altAIMD; +struct hstcp_entry { + __u32 cwnd; + __u8 a_val; + __u8 b_val; +}; +#endif + /* This is a TIME_WAIT bucket. It works around the memory consumption * problems of sockets in such a state on heavily loaded servers, but * without violating the protocol specification. @@ -461,6 +474,19 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; +extern int sysctl_tcp_moderate_on_txq; +#ifdef CONFIG_WEB100 +extern int sysctl_web100_default_wscale; +#endif +#ifdef CONFIG_WEB100_NET100 +extern int sysctl_web100_no_metrics_save; +extern int sysctl_WAD_IFQ; +extern int sysctl_WAD_MaxBurst; +#endif +#ifdef CONFIG_WEB100_STATS +extern int sysctl_web100_sbufmode; +extern int sysctl_web100_rbufmode; +#endif extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; @@ -1025,6 +1051,8 @@ __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ + + __u8 transmitted; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) @@ -1072,6 +1100,30 @@ return tp->packets_out - tp->left_out + tp->retrans_out; } +/* Should we use the standard TCP AIMD behaviour? If so, the standard + * code path is used; this only changes things when non-standard AIMD + * (such as for Sally Floyd's HSTCP) is included in the kernel. + */ +#ifndef CONFIG_ALTAIMD +#define tcp_standard_aimd(tp,val1,val2) (val1) +#else +static inline int __tcp_standard_aimd(struct tcp_opt *tp) +{ + extern struct hstcp_entry hstcp_table[]; + return ((!sysctl_tcp_altAIMD) || + (sysctl_tcp_altAIMD && (tp->snd_cwnd < hstcp_table[0].cwnd))); +} +#define tcp_standard_aimd(tp,val1,val2) \ + ((__tcp_standard_aimd(tp)) ? (val1) : (val2)) + +extern struct hstcp_entry get_hstcp_val(struct tcp_opt *tp); + +static inline __u32 tcp_var_reduce(struct tcp_opt *tp) +{ + return (tp->snd_cwnd * (256 - get_hstcp_val(tp).b_val)) >> 8; +} +#endif + /* Recalculate snd_ssthresh, we want to set it to: * * one half the current congestion window, but no @@ -1079,7 +1131,8 @@ */ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) { - return max(tp->snd_cwnd >> 1U, 2U); + return tcp_standard_aimd(tp, max(tp->snd_cwnd >> 1U, 2U), + max(tcp_var_reduce(tp), 2U)); } /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. @@ -1134,6 +1187,7 @@ tp->high_seq = tp->snd_nxt; tp->snd_cwnd_stamp = tcp_time_stamp; TCP_ECN_queue_cwr(tp); + WEB100_UPDATE_FUNC(tp, web100_update_congestion(tp, 0)); } static inline void tcp_enter_cwr(struct tcp_opt *tp) @@ -1152,9 +1206,32 @@ */ static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp) { +#ifdef CONFIG_WEB100_NET100 + return (NET100_WAD(tp, WAD_MaxBurst, sysctl_WAD_MaxBurst)); +#endif return 3; } +/* CWND moderation, preventing bursts due to too big ACKs + * in dubious situations. + */ +static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp) +{ +#ifdef CONFIG_WEB100_STATS + { + u32 t = tcp_packets_in_flight(tp) + tcp_max_burst(tp); + if (t < tp->snd_cwnd) { + tp->snd_cwnd = t; + WEB100_VAR_INC(tp, OtherReductions); + } + }; +#else + tp->snd_cwnd = min(tp->snd_cwnd, + tcp_packets_in_flight(tp)+tcp_max_burst(tp)); +#endif + tp->snd_cwnd_stamp = tcp_time_stamp; +} + static __inline__ int tcp_minshall_check(struct tcp_opt *tp) { return after(tp->snd_sml,tp->snd_una) && @@ -1189,7 +1266,11 @@ /* This checks if the data bearing packet SKB (usually tp->send_head) * should be put on the wire right now. */ -static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, +/* Web100: + * Modified to return WC_SNDLIM_NONE when ok, reason if not ok. + * The name is changed because we have changed the return value. + */ +static __inline__ int tcp_snd_wait(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { /* RFC 1122 - section 4.2.3.4 @@ -1212,15 +1293,25 @@ * to get new data) and if room at tail of skb is * not enough to save something seriously (<32 for now). */ - + if ((tcp_packets_in_flight(tp) >= tp->snd_cwnd) && + !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) + return WC_SNDLIM_CWND; + if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)) + return WC_SNDLIM_RWIN; + if (!(nonagle == 1 || tp->urg_mode || + !tcp_nagle_check(tp, skb, cur_mss, nonagle))) + return WC_SNDLIM_SENDER; + return WC_SNDLIM_NONE; +#if 0 /* Don't be strict about the congestion window for the * final FIN frame. -DaveM */ return ((nonagle==1 || tp->urg_mode - || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && - ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && - !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); + || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && + ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || + (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && + !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); +#endif } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) @@ -1248,7 +1339,7 @@ if (skb) { if (!tcp_skb_is_last(sk, skb)) nonagle = 1; - if (!tcp_snd_test(tp, skb, cur_mss, nonagle) || + if (tcp_snd_wait(tp, skb, cur_mss, nonagle) != WC_SNDLIM_NONE || tcp_write_xmit(sk, nonagle)) tcp_check_probe_timer(sk, tp); } @@ -1266,8 +1357,9 @@ struct sk_buff *skb = tp->send_head; return (skb && - tcp_snd_test(tp, skb, tcp_current_mss(sk), - tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle)); + tcp_snd_wait(tp, skb, tcp_current_mss(sk), + tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle) + == WC_SNDLIM_NONE); } static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq) @@ -1365,6 +1457,8 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->state; + + WEB100_VAR_SET(&sk->tp_pinfo.af_tcp, State, web100_state(state)); switch (state) { case TCP_ESTABLISHED: @@ -1521,8 +1615,12 @@ if (*rcv_wscale && sysctl_tcp_app_win && space>=mss && space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2) (*rcv_wscale)--; +#ifdef CONFIG_WEB100 + (*rcv_wscale) = max((__u8)sysctl_web100_default_wscale, + (*rcv_wscale)); +#endif } - + /* Set initial window to value enough for senders, * following RFC1414. Senders, not following this RFC, * will be satisfied with 2. @@ -1669,18 +1767,62 @@ #define TCP_MEM_QUANTUM ((int)PAGE_SIZE) -static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb) +static inline void tcp_uncharge_skb(struct sock *sk, struct sk_buff *skb) { sk->tp_pinfo.af_tcp.queue_shrunk = 1; sk->wmem_queued -= skb->truesize; sk->forward_alloc += skb->truesize; - __kfree_skb(skb); + web100_update_writeq(sk); } static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb) { sk->wmem_queued += skb->truesize; sk->forward_alloc -= skb->truesize; +} + +#ifdef CONFIG_WEB100_STATS +extern atomic_t tcp_retx_mem; + +static inline void tcp_retx_uncharge_skb(struct sock *sk, struct sk_buff *skb) +{ + unsigned len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + tp->tcp_stats->wc_vars.CurRetxQueue -= len; + + atomic_sub(skb->truesize, &tcp_retx_mem); + sk->retx_alloc -= skb->truesize; + sock_put(sk); +} + +static inline void tcp_retx_charge_skb(struct sock *sk, struct sk_buff *skb) +{ + unsigned len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + + vars->CurRetxQueue += len; + if (vars->MaxRetxQueue < vars->CurRetxQueue) + vars->MaxRetxQueue = vars->CurRetxQueue; + + TCP_SKB_CB(skb)->transmitted = 1; + sock_hold(sk); + tcp_uncharge_skb(sk, skb); + atomic_add(skb->truesize, &tcp_retx_mem); + sk->retx_alloc += skb->truesize; +} +#endif + +static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb) +{ +#ifdef CONFIG_WEB100_STATS + if (TCP_SKB_CB(skb)->transmitted) + tcp_retx_uncharge_skb(sk, skb); + else +#endif + tcp_uncharge_skb(sk, skb); + __kfree_skb(skb); } extern void __tcp_mem_reclaim(struct sock *sk); diff -urP linux-2.4.19.orig/include/net/web100.h linux-2.4.19/include/net/web100.h --- linux-2.4.19.orig/include/net/web100.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/include/net/web100.h Wed Feb 12 12:09:01 2003 @@ -0,0 +1,113 @@ +/* + * include/net/web100.h + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * + * The Web 100 project. See http://www.web100.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _WEB100_H +#define _WEB100_H + +#include + +#define WEB100_MAX_CONNS (1<<15) + +#define WEB100_DELAY_MAX HZ + +/* Netlink */ +#define WC_NL_TYPE_CONNECT 0 +#define WC_NL_TYPE_DISCONNECT 1 + +struct web100_netlink_msg { + int type; + int cid; +}; + +/* The syntax of this version string is subject to future changes */ +extern char *web100_version_string; + +/* Stats structures */ +extern struct web100stats *web100stats_arr[]; +extern struct web100stats *web100stats_first; + +/* For locking the creation and destruction of stats structures. */ +extern rwlock_t web100_linkage_lock; + +/* For /proc/web100 */ +extern struct web100stats *web100stats_lookup(int cid); + +/* For the TCP code */ +extern int web100_stats_create(struct sock *sk); +extern void web100_stats_destroy(struct web100stats *stats); +extern void web100_stats_free(struct web100stats *stats); +extern void web100_stats_establish(struct sock *sk); + +extern void web100_tune_sndbuf_ack(struct sock *sk); +extern void web100_tune_sndbuf_snd(struct sock *sk); +extern void web100_tune_rcvbuf(struct sock *sk); + +extern void web100_update_snd_nxt(struct tcp_opt *tp); +extern void web100_update_rtt(struct tcp_opt *tp, unsigned long rtt_sample); +extern void web100_update_timeout(struct tcp_opt *tp); +extern void web100_update_mss(struct tcp_opt *tp); +extern void web100_update_cwnd(struct tcp_opt *tp); +extern void web100_update_rwin_rcvd(struct tcp_opt *tp); +extern void web100_update_sndlim(struct tcp_opt *tp, int why); +extern void web100_update_rcv_nxt(struct tcp_opt *tp); +extern void web100_update_rwin_sent(struct tcp_opt *tp); +extern void web100_update_congestion(struct tcp_opt *tp, int why); +extern void web100_update_segsend(struct tcp_opt *tp, struct sk_buff *skb); +extern void web100_update_segrecv(struct tcp_opt *tp, struct sk_buff *skb); +extern void web100_update_rcvbuf(struct sock *sk, int rcvbuf); +extern void web100_update_writeq(struct sock *sk); + +extern void web100_stats_init(void); + +/* For the IP code */ +extern int web100_delay_output(struct sk_buff *skb, int (*output)(struct sk_buff *)); + +extern __u64 web100_mono_time(void); + +static inline void web100_stats_use(struct web100stats *stats) +{ + sock_hold(stats->wc_sk); + atomic_inc(&stats->wc_users); +} + +static inline void web100_stats_unuse(struct web100stats *stats) +{ + if (atomic_dec_and_test(&stats->wc_users)) { + sock_put(stats->wc_sk); + web100_stats_free(stats); + } +} + +/* A mapping between Linux and Web100 states. This could easily just + * be an array. */ +static inline int web100_state(int state) +{ + switch (state) { + case TCP_ESTABLISHED: return WC_STATE_ESTABLISHED; + case TCP_SYN_SENT: return WC_STATE_SYNSENT; + case TCP_SYN_RECV: return WC_STATE_SYNRECEIVED; + case TCP_FIN_WAIT1: return WC_STATE_FINWAIT1; + case TCP_FIN_WAIT2: return WC_STATE_FINWAIT2; + case TCP_TIME_WAIT: return WC_STATE_TIMEWAIT; + case TCP_CLOSE: return WC_STATE_CLOSED; + case TCP_CLOSE_WAIT: return WC_STATE_CLOSEWAIT; + case TCP_LAST_ACK: return WC_STATE_LASTACK; + case TCP_LISTEN: return WC_STATE_LISTEN; + case TCP_CLOSING: return WC_STATE_CLOSING; + default: return 0; + } +} + +#endif /* _WEB100_H */ diff -urP linux-2.4.19.orig/include/net/web100_stats.h linux-2.4.19/include/net/web100_stats.h --- linux-2.4.19.orig/include/net/web100_stats.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/include/net/web100_stats.h Wed Feb 12 12:09:01 2003 @@ -0,0 +1,343 @@ +/* + * include/net/web100_stats.h + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * Copyright (C) 2000 Jeff Semke + * + * The Web 100 project. See http://www.web100.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* TODO: make sure that the time duration states below include: + Congestion Avoidance, Slow Start, Timeouts, Idle Application, and + Window Limited cases */ +/* TODO: Consider adding sysctl variable to enable/disable WC stats updates. + Probably should still create stats structures if compiled with WC support, + even if sysctl(wc) is turned off. That would allow the stats to be updated + if the sysctl(wc) is turned back on. */ +/* TODO: Add all variables needed to do user-level auto-tuning, including + writeable parameters */ + + +#ifndef _WEB100_STATS_H +#define _WEB100_STATS_H + +enum wc_sndlim_states { + WC_SNDLIM_NONE = -1, + WC_SNDLIM_SENDER, + WC_SNDLIM_CWND, + WC_SNDLIM_RWIN, + WC_SNDLIM_STARTUP, + WC_SNDLIM_NSTATES /* Keep at end */ +}; + +#ifndef CONFIG_WEB100_STATS + +#define WEB100_VAR_INC(tp,var) do {} while (0) +#define WEB100_VAR_DEC(tp,var) do {} while (0) +#define WEB100_VAR_SET(tp,var,val) do {} while (0) +#define WEB100_VAR_ADD(tp,var,val) do {} while (0) +#define WEB100_UPDATE_FUNC(tp,func) do {} while (0) +#define NET100_WAD(tp, var, def) (def) + +#else /* CONFIG_WEB100_STATS */ /* { */ + +#include + +#define WEB100_CHECK(tp,expr) \ + do { if ((tp)->tcp_stats) (expr); } while (0) +#define WEB100_VAR_INC(tp,var) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var)++) +#define WEB100_VAR_DEC(tp,var) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var)--) +#define WEB100_VAR_ADD(tp,var,val) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var) += (val)) +#define WEB100_VAR_SET(tp,var,val) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var) = (val)) +#define WEB100_UPDATE_FUNC(tp,func) \ + WEB100_CHECK(tp, func) +#ifdef CONFIG_WEB100_NET100 +#define NET100_WAD(tp, var, def) \ + (((tp)->tcp_stats && (tp)->tcp_stats->wc_vars.var) ? (tp)->tcp_stats->wc_vars.var : (def)) +#else +#define NET100_WAD(tp, var, def) (def) +#endif + +/* SMIv2 types - RFC 1902 */ +typedef __s32 INTEGER; +typedef INTEGER Integer32; +typedef __u32 IpAddress; +typedef __u32 Counter32; +typedef __u32 Unsigned32; +typedef Unsigned32 Gauge32; +typedef __u32 TimeTicks; +typedef __u64 Counter64; +typedef __u32 Unsigned16; + +/* New inet address types specified in INET-ADDRESS-MIB */ +typedef Unsigned16 InetPortNumber; +typedef enum { + WC_ADDRTYPE_UNKNOWN = 0, + WC_ADDRTYPE_IPV4, + WC_ADDRTYPE_IPV6, + WC_ADDRTYPE_DNS = 16 +} InetAddressType; +typedef IpAddress InetAddresIPv4; +typedef struct { + __u8 addr[16]; + __u8 type; +} InetAddresIPv6; +typedef union { + InetAddresIPv4 v4addr; + InetAddresIPv6 v6addr; +} InetAddress; + +typedef enum { + truthValueTrue = 1, + truthValueFalse = 2 +} TruthValue; + +enum wc_states { + WC_STATE_CLOSED = 1, + WC_STATE_LISTEN, + WC_STATE_SYNSENT, + WC_STATE_SYNRECEIVED, + WC_STATE_ESTABLISHED, + WC_STATE_FINWAIT1, + WC_STATE_FINWAIT2, + WC_STATE_CLOSEWAIT, + WC_STATE_LASTACK, + WC_STATE_CLOSING, + WC_STATE_TIMEWAIT, + WC_STATE_DELETECB +}; + +enum wc_stunemodes { + WC_STUNEMODE_DEFAULT = 0, /* OS native */ + WC_STUNEMODE_SETSOCKOPT, /* OS native setsockopt() */ + WC_STUNEMODE_FIXED, /* Manual via the web100 API */ + WC_STUNEMODE_AUTO, + WC_STUNEMODE_EXP1, + WC_STUNEMODE_EXP2 +}; + +enum wc_rtunemodes { + WC_RTUNEMODE_DEFAULT = 0, + WC_RTUNEMODE_SETSOCKOPT, + WC_RTUNEMODE_FIXED, + WC_RTUNEMODE_AUTO, + WC_RTUNEMODE_EXP1, + WC_RTUNEMODE_EXP2 +}; + +enum wc_bufmodes { + WC_BUFMODE_OS = 0, + WC_BUFMODE_WEB100, +}; + +enum { + WC_SE_BELOW_DATA_WINDOW = 1, + WC_SE_ABOVE_DATA_WINDOW, + WC_SE_BELOW_ACK_WINDOW, + WC_SE_ABOVE_ACK_WINDOW, + WC_SE_BELOW_TSW_WINDOW, + WC_SE_ABOVE_TSW_WINDOW, + WC_SE_DATA_CHECKSUM +}; + + +/* + * Variables that can be read and written directly. + * + * Should contain most variables from TCP-KIS 0.1. Commented feilds are + * either not implemented or have handlers and do not need struct storage. + */ +struct web100directs { + /* STATE */ + INTEGER State; + TruthValue SACKEnabled; + TruthValue TimestampsEnabled; + TruthValue NagleEnabled; + TruthValue ECNEnabled; + Integer32 SndWinScale; + Integer32 RcvWinScale; + + /* SYN OPTIONS */ + INTEGER ActiveOpen; + /* Gauge32 MSSSent; */ + Gauge32 MSSRcvd; + Integer32 WinScaleRcvd; + Integer32 WinScaleSent; + /* INTEGER SACKokSent; */ + /* INTEGER SACKokRcvd; */ + /* INTEGER TimestampSent; */ + /* INTEGER TimestampRcvd; */ + + /* DATA */ + Counter32 PktsOut; + Counter32 DataPktsOut; + Counter32 AckPktsOut; /* DEPRICATED */ + Counter64 DataBytesOut; + Counter32 PktsIn; + Counter32 DataPktsIn; + Counter32 AckPktsIn; /* DEPRICATED */ + Counter64 DataBytesIn; + /* Counter32 SoftErrors; */ + /* INTEGER SoftErrorReason; */ + Counter32 SndUna; + Counter32 SndNxt; + Integer32 SndMax; + Counter64 ThruBytesAcked; + Counter32 SndISS; /* SndInitial */ + Counter32 SendWraps; /* DEPRICATED */ + Counter32 RcvNxt; + Counter64 ThruBytesReceived; + Counter32 RecvISS; /* RecInitial */ + Counter32 RecvWraps; /* DEPRICATED */ + /* Counter64 Duration; */ + Integer32 StartTime; /* DEPRICATED */ + Integer32 StartTimeSec; + Integer32 StartTimeUsec; + + /* SENDER CONGESTION */ + Counter32 SndLimTrans[WC_SNDLIM_NSTATES]; + Counter32 SndLimTime[WC_SNDLIM_NSTATES]; + Counter64 SndLimBytes[WC_SNDLIM_NSTATES]; + Counter32 SlowStart; + Counter32 CongAvoid; + Counter32 CongestionSignals; + Counter32 OtherReductions; + Counter32 CongestionOverCount; + Gauge32 CurCwnd; + Gauge32 MaxCwnd; + /* Gauge32 LimCwnd; */ + Gauge32 CurSsthresh; + Gauge32 MaxSsthresh; + Gauge32 MinSsthresh; + + /* SENDER PATH MODEL */ + Counter32 FastRetran; + Counter32 Timeouts; + Counter32 SubsequentTimeouts; + Gauge32 CurTimeoutCount; + Counter32 AbruptTimeouts; + Counter32 PktsRetrans; + Counter32 BytesRetrans; + Counter32 DupAcksIn; + Counter32 SACKsRcvd; + Counter32 SACKBlocksRcvd; + Counter32 PreCongSumCwnd; + Counter32 PreCongSumRTT; + Counter32 PreCongCountRTT; /* DEPRICATED */ + Counter32 PostCongSumRTT; + Counter32 PostCongCountRTT; + /* Counter32 ECNsignals; */ + Counter32 ECERcvd; + Counter32 SendStall; + Counter32 QuenchRcvd; + Gauge32 RetranThresh; + /* Counter32 SndDupAckEpisodes; */ + /* Counter64 SumBytesReordered; */ + Counter32 NonRecovDA; + Counter32 AckAfterFR; + Counter32 DSACKDups; + Gauge32 SampleRTT; + Gauge32 SmoothedRTT; + Gauge32 RTTVar; + Gauge32 MaxRTT; + Gauge32 MinRTT; + Counter64 SumRTT; + Counter32 CountRTT; + Gauge32 CurRTO; + Gauge32 MaxRTO; + Gauge32 MinRTO; + Gauge32 CurMSS; + Gauge32 MaxMSS; + Gauge32 MinMSS; + + /* LOCAL SENDER BUFFER */ + Gauge32 CurRetxQueue; + Gauge32 MaxRetxQueue; + Gauge32 CurAppWQueue; + Gauge32 MaxAppWQueue; + + /* LOCAL RECEIVER */ + Gauge32 CurRwinSent; + Gauge32 MaxRwinSent; + Gauge32 MinRwinSent; + Integer32 LimRwin; + /* Counter32 DupAckEpisodes; */ + Counter32 DupAcksOut; + /* Counter32 CERcvd; */ + /* Counter32 ECNSent; */ + /* Counter32 ECNNonceRcvd; */ + Gauge32 CurReasmQueue; + Gauge32 MaxReasmQueue; + Gauge32 CurAppRQueue; + Gauge32 MaxAppRQueue; + Gauge32 X_rcv_ssthresh; + Gauge32 X_wnd_clamp; + Gauge32 X_dbg1; + Gauge32 X_dbg2; + Gauge32 X_dbg3; + Gauge32 X_dbg4; + + /* OBSERVED RECEIVER */ + Gauge32 CurRwinRcvd; + Gauge32 MaxRwinRcvd; + Gauge32 MinRwinRcvd; + + /* CONNECTION ID */ + InetAddressType LocalAddressType; + InetAddress LocalAddress; + InetPortNumber LocalPort; + /* InetAddressType RemAddressType; */ + InetAddress RemAddress; + InetPortNumber RemPort; + /* Integer32 IdId; */ + + Gauge32 X_RcvRTT; + + INTEGER X_SBufMode; + INTEGER X_RBufMode; + +#ifdef CONFIG_WEB100_NET100 + Gauge32 WAD_IFQ; + Gauge32 WAD_MaxBurst; +#endif +}; + +struct web100stats { + int wc_cid; + + struct sock *wc_sk; + + atomic_t wc_users; + __u8 wc_dead; + + struct web100stats *wc_next; + struct web100stats *wc_prev; + + struct web100stats *wc_hash_next; + struct web100stats *wc_hash_prev; + + struct web100stats *wc_death_next; + + int wc_limstate; + __u64 wc_limstate_bytes; + struct timeval wc_limstate_time; + + __u64 wc_start_monotime; + + struct web100directs wc_vars; +}; + +#endif /* CONFIG_WEB100_STATS */ /* } */ + +#endif /*_WEB100_STATS_H */ diff -urP linux-2.4.19.orig/net/ipv4/Config.in linux-2.4.19/net/ipv4/Config.in --- linux-2.4.19.orig/net/ipv4/Config.in Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/Config.in Wed Feb 12 12:09:01 2003 @@ -40,7 +40,26 @@ bool ' IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD fi bool ' IP: TCP Explicit Congestion Notification support' CONFIG_INET_ECN +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' IP: High-Speed TCP [Sally Floyd] available (EXPERIMENTAL)' CONFIG_ALTAIMD + if [ "$CONFIG_ALTAIMD" = "y" ]; then + bool ' IP: High-Speed TCP on by default' CONFIG_ALTAIMD_ON + fi +fi bool ' IP: TCP syncookie support (disabled per default)' CONFIG_SYN_COOKIES if [ "$CONFIG_NETFILTER" != "n" ]; then source net/ipv4/netfilter/Config.in +fi + +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' Web100 networking enhancements' CONFIG_WEB100 + if [ "$CONFIG_WEB100" = "y" ]; then + bool ' Web100: TCP statistics' CONFIG_WEB100_STATS + if [ "$CONFIG_WEB100_STATS" = "y" ]; then + bool ' Web100: Net100 extensions' CONFIG_WEB100_NET100 + # Netlink is always enabled now. + bool ' Web100: netlink event notification service' CONFIG_WEB100_NETLINK + fi + int ' Web100: default winscale initial value' CONFIG_WEB100_WINSCALE_VAL 7 + fi fi diff -urP linux-2.4.19.orig/net/ipv4/Makefile linux-2.4.19/net/ipv4/Makefile --- linux-2.4.19.orig/net/ipv4/Makefile Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/Makefile Wed Feb 12 12:09:01 2003 @@ -25,5 +25,6 @@ obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_IP_PNP) += ipconfig.o +obj-$(CONFIG_WEB100_STATS) += web100_stats.o include $(TOPDIR)/Rules.make diff -urP linux-2.4.19.orig/net/ipv4/route.c linux-2.4.19/net/ipv4/route.c --- linux-2.4.19.orig/net/ipv4/route.c Tue Feb 11 14:53:35 2003 +++ linux-2.4.19/net/ipv4/route.c Wed Feb 12 12:17:04 2003 @@ -2155,7 +2155,7 @@ struct net_device *dev = __dev_get_by_index(iif); err = -ENODEV; if (!dev) - goto out; + goto out_free; skb->protocol = __constant_htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); @@ -2170,10 +2170,8 @@ memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif); } - if (err) { - kfree_skb(skb); - goto out; - } + if (err) + goto out_free; skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) @@ -2184,16 +2182,20 @@ err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); if (!err) - goto out; + goto out_free; if (err < 0) { err = -EMSGSIZE; - goto out; + goto out_free; } err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; out: return err; + +out_free: + kfree_skb(skb); + goto out; } int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) diff -urP linux-2.4.19.orig/net/ipv4/sysctl_net_ipv4.c linux-2.4.19/net/ipv4/sysctl_net_ipv4.c --- linux-2.4.19.orig/net/ipv4/sysctl_net_ipv4.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/sysctl_net_ipv4.c Wed Feb 12 12:09:01 2003 @@ -221,6 +221,30 @@ &sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_TCP_TW_REUSE, "tcp_tw_reuse", &sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_MODERATE_ON_TXQ, "tcp_moderate_on_txq", + &sysctl_tcp_moderate_on_txq, sizeof(int), 0644, NULL, &proc_dointvec}, +#ifdef CONFIG_WEB100 + {NET_IPV4_WEB100_DEFAULT_WSCALE, "web100_default_wscale", + &sysctl_web100_default_wscale, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif +#ifdef CONFIG_WEB100_NET100 + {NET_IPV4_WEB100_NO_METRICS_SAVE, "web100_no_metrics_save", + &sysctl_web100_no_metrics_save, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_WAD_IFQ, "WAD_IFQ", + &sysctl_WAD_IFQ, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_WAD_MAX_BURST, "WAD_MaxBurst", + &sysctl_WAD_MaxBurst, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif +#ifdef CONFIG_WEB100_STATS + {NET_IPV4_WEB100_SBUFMODE, "web100_sbufmode", + &sysctl_web100_sbufmode, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_WEB100_RBUFMODE, "web100_rbufmode", + &sysctl_web100_rbufmode, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif +#ifdef CONFIG_ALTAIMD + {NET_TCP_ALTAIMD, "tcp_altAIMD", + &sysctl_tcp_altAIMD, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif {0} }; diff -urP linux-2.4.19.orig/net/ipv4/tcp.c linux-2.4.19/net/ipv4/tcp.c --- linux-2.4.19.orig/net/ipv4/tcp.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/tcp.c Wed Feb 12 12:09:01 2003 @@ -276,6 +276,21 @@ atomic_t tcp_memory_allocated; /* Current allocated memory. */ atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ +atomic_t tcp_retx_mem; + +#ifdef CONFIG_WEB100 +int sysctl_web100_default_wscale = CONFIG_WEB100_WINSCALE_VAL; +#endif +#ifdef CONFIG_WEB100_NET100 +int sysctl_web100_no_metrics_save = 0; +int sysctl_WAD_IFQ = 0; +int sysctl_WAD_MaxBurst = 3; +#endif +#ifdef CONFIG_WEB100_STATS +int sysctl_web100_sbufmode = 1; +int sysctl_web100_rbufmode = 1; +#endif + /* Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. * All the tcp_mem_schedule() is of this nature: accounting @@ -919,8 +934,10 @@ } out: - if (copied) + if (copied) { tcp_push(sk, tp, flags, mss_now, tp->nonagle); + web100_update_writeq(sk); + } return copied; do_error: @@ -1169,6 +1186,12 @@ __tcp_push_pending_frames(sk, tp, mss_now, 1); } else if (skb == tp->send_head) tcp_push_one(sk, mss_now); + +#if 0 + /* why is this here? WEB100_XXX */ + WEB100_UPDATE_FUNC(tp, web100_update_cwnd(tp)); +#endif + continue; wait_for_sndbuf: @@ -1185,8 +1208,10 @@ } out: - if (copied) + if (copied) { tcp_push(sk, tp, flags, mss_now, tp->nonagle); + web100_update_writeq(sk); + } TCP_CHECK_TIMER(sk); release_sock(sk); return copied; @@ -2280,6 +2305,7 @@ err = -EINVAL; break; } + WEB100_VAR_SET(tp, NagleEnabled, !val); tp->nonagle = (val == 0) ? 0 : 1; if (val) tcp_push_pending_frames(sk, tp); @@ -2301,6 +2327,7 @@ err = -EINVAL; break; } + WEB100_VAR_SET(tp, NagleEnabled, !val); if (val != 0) { tp->nonagle = 2; } else { @@ -2582,7 +2609,7 @@ tcp_ehash = (struct tcp_ehash_bucket *) __get_free_pages(GFP_ATOMIC, order); } while (tcp_ehash == NULL && --order > 0); - + if (!tcp_ehash) panic("Failed to allocate TCP established hash table\n"); for (i = 0; i < (tcp_ehash_size<<1); i++) { @@ -2642,4 +2669,8 @@ tcp_ehash_size<<1, tcp_bhash_size); tcpdiag_init(); + +#ifdef CONFIG_WEB100_STATS + web100_stats_init(); +#endif } diff -urP linux-2.4.19.orig/net/ipv4/tcp_input.c linux-2.4.19/net/ipv4/tcp_input.c --- linux-2.4.19.orig/net/ipv4/tcp_input.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/tcp_input.c Wed Feb 12 12:09:01 2003 @@ -87,6 +87,107 @@ int sysctl_tcp_rfc1337 = 0; int sysctl_tcp_max_orphans = NR_FILE; +#ifdef CONFIG_ALTAIMD +#ifdef CONFIG_ALTAIMD_ON +int sysctl_tcp_altAIMD = 1; +#else +int sysctl_tcp_altAIMD = 0; +#endif + +struct hstcp_entry hstcp_table[] = { +{38, 1, 128}, +{118, 2, 112}, +{221, 3, 104}, +{347, 4, 98}, +{495, 5, 93}, +{663, 6, 89}, +{851, 7, 86}, +{1058, 8, 83}, +{1284, 9, 81}, +{1529, 10, 78}, +{1793, 11, 76}, +{2076, 12, 74}, +{2378, 13, 72}, +{2699, 14, 71}, +{3039, 15, 69}, +{3399, 16, 68}, +{3778, 17, 66}, +{4177, 18, 65}, +{4596, 19, 64}, +{5036, 20, 62}, +{5497, 21, 61}, +{5979, 22, 60}, +{6483, 23, 59}, +{7009, 24, 58}, +{7558, 25, 57}, +{8130, 26, 56}, +{8726, 27, 55}, +{9346, 28, 54}, +{9991, 29, 53}, +{10661, 30, 52}, +{11358, 31, 52}, +{12082, 32, 51}, +{12834, 33, 50}, +{13614, 34, 49}, +{14424, 35, 48}, +{15265, 36, 48}, +{16137, 37, 47}, +{17042, 38, 46}, +{17981, 39, 45}, +{18955, 40, 45}, +{19965, 41, 44}, +{21013, 42, 43}, +{22101, 43, 43}, +{23230, 44, 42}, +{24402, 45, 41}, +{25618, 46, 41}, +{26881, 47, 40}, +{28193, 48, 39}, +{29557, 49, 39}, +{30975, 50, 38}, +{32450, 51, 38}, +{33986, 52, 37}, +{35586, 53, 36}, +{37253, 54, 36}, +{38992, 55, 35}, +{40808, 56, 35}, +{42707, 57, 34}, +{44694, 58, 33}, +{46776, 59, 33}, +{48961, 60, 32}, +{51258, 61, 32}, +{53677, 62, 31}, +{56230, 63, 30}, +{58932, 64, 30}, +{61799, 65, 29}, +{64851, 66, 28}, +{68113, 67, 28}, +{71617, 68, 27}, +{75401, 69, 26}, +{79517, 70, 26}, +{84035, 71, 25}, +{89053, 72, 24}, +{94717, 73, 23}, +}; +static short hstcp_max_entry=72; + +struct hstcp_entry get_hstcp_val(struct tcp_opt *tp) { + short left,right,mid; + __u32 this_cwnd = tp->snd_cwnd; + left = 0; + right = hstcp_max_entry; + while (right-left) { + mid = (left + right)>>1; + if (hstcp_table[mid].cwnd < this_cwnd) { + left = mid + 1; + } else { + right = mid; + } + } + return hstcp_table[right]; +} +#endif + #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ @@ -96,6 +197,9 @@ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ +#ifdef CONFIG_WEB100_STATS +#define FLAG_BIG_DATA_ACKED 0x200 /* ACK of at least 1MSS new data */ +#endif #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -291,6 +395,9 @@ if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) tcp_fixup_sndbuf(sk); +#ifdef CONFIG_WEB100_STATS + tp->rcv_space = tp->rcv_wnd; +#endif maxwin = tcp_full_space(sk); if (tp->window_clamp >= maxwin) { @@ -347,6 +454,126 @@ } } +#ifdef CONFIG_WEB100_STATS +/* Receiver "autotuning" code. + * + * Note that some of these algorithms are based on or similar to + * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. + * + * + * Details on this code can be found at + * + */ + +static inline void tcp_rcv_rtt_update(struct tcp_opt *tp, __u32 rcv_rtt, int win_dep) +{ + rcv_rtt++; /* Truncated, round up. */ + if (tp->rcv_rtt == 0) + tp->rcv_rtt = rcv_rtt; /* First measurement */ + else if (rcv_rtt < tp->rcv_rtt || win_dep) + tp->rcv_rtt = min(tp->rcv_rtt, rcv_rtt); + else + tp->rcv_rtt = (7 * tp->rcv_rtt + rcv_rtt) / 8; + + WEB100_VAR_SET(tp, X_RcvRTT, tp->rcv_rtt * 1000000 / HZ); +} + +static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp) +{ + if (tp->rcv_rtt_time == 0) + goto new_measure; + + if (before(tp->rcv_nxt, tp->rcv_rtt_seq)) + return; + + tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_time, 1); + +new_measure: + tp->rcv_rtt_seq = tp->rcv_nxt + tp->rcv_wnd; + tp->rcv_rtt_time = jiffies; +} + +static void tcp_ofo_free(struct sk_buff *skb) +{ + struct tcp_opt *tp = &skb->sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + + if (tp->ofo_hi_seq == TCP_SKB_CB(skb)->end_seq) { + tp->ofo_hi_seq = 0; + vars->CurReasmQueue = 0; + } else { + vars->CurReasmQueue = tp->ofo_hi_seq - tp->rcv_nxt; + } +} + +static inline void tcp_ofo_own(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + + if (tp->ofo_hi_seq == 0 || after(TCP_SKB_CB(skb)->end_seq, tp->ofo_hi_seq)) + tp->ofo_hi_seq = TCP_SKB_CB(skb)->end_seq; + vars->CurReasmQueue = tp->ofo_hi_seq - tp->rcv_nxt; + if (vars->MaxReasmQueue < vars->CurReasmQueue) + vars->MaxReasmQueue = vars->CurReasmQueue; + + skb->sk = sk; + skb->destructor = tcp_ofo_free; +} + +static void tcp_rcv_free(struct sk_buff *skb) +{ + unsigned len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_opt *tp = &skb->sk->tp_pinfo.af_tcp; + + tp->rcv_alloc -= len; + tp->tcp_stats->wc_vars.CurAppRQueue = tp->rcv_alloc; +} + +static inline void tcp_rcv_own(struct sock *sk, struct sk_buff *skb) +{ + unsigned len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + + tp->rcv_alloc += len; + vars->CurAppRQueue = tp->rcv_alloc; + if (vars->MaxAppRQueue < vars->CurAppRQueue) + vars->MaxAppRQueue = vars->CurAppRQueue; + + skb->sk = sk; + skb->destructor = tcp_rcv_free; +} + +static inline void tcp_rcv_space_adjust(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + int time; + __u32 space; + + /* Don't measure during a recovery period */ + if (tp->ofo_hi_seq) { + tp->rcv_winest_time = 0; + return; + } + + if (tp->rcv_winest_time == 0) + goto new_measure; + + time = jiffies - tp->rcv_winest_time; + if (time < tp->rcv_rtt || tp->rcv_rtt == 0) + return; + + space = 2 * (tp->rcv_nxt - tp->rcv_winest_seq) + 4 * tp->advmss; + space = max(space, (__u32)tp->rcv_space / 2); + tp->rcv_space = (int)space; + +new_measure: + tp->rcv_winest_seq = tp->rcv_nxt; + tp->rcv_winest_time = jiffies; +} +#endif /* CONFIG_WEB100_STATS */ + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The @@ -397,6 +624,12 @@ if (skb->len >= 128) tcp_grow_window(sk, tp, skb); + WEB100_UPDATE_FUNC(tp, web100_update_rcv_nxt(tp)); + +#ifdef CONFIG_WEB100_STATS + tcp_rcv_rtt_measure(tp); + tcp_rcv_space_adjust(sk); +#endif } /* Called to compute a smoothed rtt estimate. The data fed to this @@ -511,6 +744,11 @@ { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct dst_entry *dst = __sk_dst_get(sk); + +#ifdef CONFIG_WEB100_NET100 + if (sysctl_web100_no_metrics_save) + return; +#endif dst_confirm(dst); @@ -635,6 +873,7 @@ if (dst->reordering && tp->reordering != dst->reordering) { tp->sack_ok &= ~2; tp->reordering = dst->reordering; + WEB100_VAR_SET(tp, RetranThresh, tp->reordering); } if (dst->rtt == 0) @@ -687,6 +926,7 @@ { if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); + WEB100_VAR_SET(tp, RetranThresh, tp->reordering); /* This exciting event is worth to be remembered. 8) */ if (ts) @@ -768,6 +1008,9 @@ u32 lost_retrans = 0; int flag = 0; int i; + + WEB100_VAR_INC(tp, SACKsRcvd); + WEB100_VAR_ADD(tp, SACKBlocksRcvd, num_sacks); if (!tp->sacked_out) tp->fackets_out = 0; @@ -795,6 +1038,9 @@ tp->sack_ok |= 4; NET_INC_STATS_BH(TCPDSACKOfoRecv); } + + if (dup_sack) + WEB100_VAR_INC(tp, DSACKDups); /* D-SACK for already forgotten data... * Do dumb counting. */ @@ -977,6 +1223,8 @@ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff *skb; int cnt = 0; + + WEB100_UPDATE_FUNC(tp, web100_update_congestion(tp, 0)); /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || @@ -1013,6 +1261,7 @@ tcp_sync_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); + WEB100_VAR_SET(tp, RetranThresh, tp->reordering); tp->ca_state = TCP_CA_Loss; tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); @@ -1280,16 +1529,6 @@ } } -/* CWND moderation, preventing bursts due to too big ACKs - * in dubious situations. - */ -static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)+tcp_max_burst(tp)); - tp->snd_cwnd_stamp = tcp_time_stamp; -} - /* Decrease cwnd each second ack. */ static void tcp_cwnd_down(struct tcp_opt *tp) @@ -1340,6 +1579,7 @@ tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } + WEB100_VAR_INC(tp, CongestionOverCount); } else { tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } @@ -1626,6 +1866,8 @@ tp->snd_cwnd_cnt = 0; tp->ca_state = TCP_CA_Recovery; + WEB100_UPDATE_FUNC(tp, web100_update_congestion(tp, 0)); + WEB100_VAR_INC(tp, FastRetran); /* WEB100_XXX */ } if (is_dupack || tcp_head_timedout(sk, tp)) @@ -1637,8 +1879,9 @@ /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ -static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag) +static void tcp_ack_saw_tstamp(struct sock *sk, int flag) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; __u32 seq_rtt; /* RTTM Rule: A TSecr value received in a segment is used to @@ -1657,14 +1900,23 @@ * in window is lost... Voila. --ANK (010210) */ seq_rtt = tcp_time_stamp - tp->rcv_tsecr; + +#ifdef CONFIG_WEB100_STATS + if (flag & FLAG_BIG_DATA_ACKED) + tcp_rcv_rtt_update(tp, seq_rtt, 0); +#endif + tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); + WEB100_UPDATE_FUNC(tp, web100_update_rtt(tp, seq_rtt)); tp->backoff = 0; tcp_bound_rto(tp); } -static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag) +static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine * rtt estimates. Also, we must not reset the @@ -1677,20 +1929,34 @@ if (flag & FLAG_RETRANS_DATA_ACKED) return; +#ifdef CONFIG_WEB100_STATS + if (flag & FLAG_BIG_DATA_ACKED) + tcp_rcv_rtt_update(tp, seq_rtt, 0); +#endif + tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); + WEB100_UPDATE_FUNC(tp, web100_update_rtt(tp, seq_rtt)); tp->backoff = 0; tcp_bound_rto(tp); } static __inline__ void -tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) +tcp_ack_update_rtt(struct sock *sk, int flag, s32 seq_rtt) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->saw_tstamp && tp->rcv_tsecr) - tcp_ack_saw_tstamp(tp, flag); + tcp_ack_saw_tstamp(sk, flag); else if (seq_rtt >= 0) - tcp_ack_no_tstamp(tp, seq_rtt, flag); + tcp_ack_no_tstamp(sk, seq_rtt, flag); +} + +static inline int tcp_should_inc_cwnd(struct tcp_opt *tp) +{ + return tcp_standard_aimd(tp, (tp->snd_cwnd_cnt >= tp->snd_cwnd), + ((tp->snd_cwnd_cnt * get_hstcp_val(tp).a_val) + >= tp->snd_cwnd)); } /* This is Jacobson's slow start and congestion avoidance. @@ -1698,21 +1964,31 @@ */ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) { - if (tp->snd_cwnd <= tp->snd_ssthresh) { - /* In "safe" area, increase. */ - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; +#ifdef CONFIG_WEB100_STATS + if (tp->snd_cwnd > tp->snd_cwnd_clamp) { + tp->snd_cwnd--; + return; + } +#endif + + if (tp->snd_cwnd <= tp->snd_ssthresh) { + /* In "safe" area, increase. */ + tp->snd_cwnd++; + + WEB100_VAR_INC(tp, SlowStart); } else { - /* In dangerous area, increase slowly. + /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (tcp_should_inc_cwnd(tp)) { if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; tp->snd_cwnd_cnt=0; } else tp->snd_cwnd_cnt++; - } + WEB100_VAR_INC(tp, CongAvoid); + } + tp->snd_cwnd = min(tp->snd_cwnd, (__u32)tp->snd_cwnd_clamp); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -1786,11 +2062,15 @@ tp->fackets_out--; tp->packets_out--; __skb_unlink(skb, skb->list); +#ifdef CONFIG_WEB100_STATS + if (scb->end_seq - scb->seq >= tp->mss_cache) + acked |= FLAG_BIG_DATA_ACKED; +#endif tcp_free_skb(sk, skb); } if (acked&FLAG_ACKED) { - tcp_ack_update_rtt(tp, acked, seq_rtt); + tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk, tp); } @@ -1884,10 +2164,12 @@ tp->max_window = nwin; tcp_sync_mss(sk, tp->pmtu_cookie); } + WEB100_UPDATE_FUNC(tp, web100_update_rwin_rcvd(tp)); } } tp->snd_una = ack; + WEB100_VAR_SET(tp, SndUna, ack); return flag; } @@ -1918,6 +2200,7 @@ */ tcp_update_wl(tp, ack, ack_seq); tp->snd_una = ack; + WEB100_VAR_SET(tp, SndUna, ack); flag |= FLAG_WIN_UPDATE; NET_INC_STATS_BH(TCPHPAcks); @@ -1932,8 +2215,10 @@ if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) + if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) { flag |= FLAG_ECE; + WEB100_VAR_INC(tp, ECERcvd); + } } /* We passed data and got it acked, remove any soft error @@ -1977,10 +2262,12 @@ return 1; old_ack: + /* WEB100_XXX */ if (TCP_SKB_CB(skb)->sacked) tcp_sacktag_write_queue(sk, skb, prior_snd_una); uninteresting_ack: + /* WEB100_XXX */ SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); return 0; } @@ -2333,6 +2620,8 @@ { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + WEB100_VAR_INC(tp, DupAcksOut); + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(DelayedACKLost); @@ -2573,7 +2862,12 @@ if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb)) goto drop; } - tcp_set_owner_r(skb, sk); +#ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) + tcp_rcv_own(sk, skb); + else +#endif + tcp_set_owner_r(skb, sk); __skb_queue_tail(&sk->receive_queue, skb); } tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -2601,6 +2895,7 @@ __kfree_skb(skb); } else if (!sk->dead) sk->data_ready(sk, 0); + return; } @@ -2654,7 +2949,12 @@ SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - tcp_set_owner_r(skb, sk); +#ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) + tcp_ofo_own(sk, skb); + else +#endif + tcp_set_owner_r(skb, sk); if (skb_peek(&tp->out_of_order_queue) == NULL) { /* Initial out of order segment, build 1 SACK. */ @@ -2790,7 +3090,12 @@ memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_insert(nskb, skb->prev, skb, skb->list); - tcp_set_owner_r(nskb, sk); +#ifdef COFNIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) + ; + else +#endif + tcp_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ while (copy > 0) { @@ -2806,6 +3111,13 @@ copy -= size; start += size; } +#ifdef CONFIG_WEB100_STATS + /* A bit of a hack... */ + if (skb->destructor == tcp_rcv_free) + tcp_rcv_own(sk, nskb); + else if (skb->destructor == tcp_ofo_free) + tcp_ofo_own(sk, nskb); +#endif if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; __skb_unlink(skb, skb->list); @@ -2937,6 +3249,7 @@ if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(tp); tp->snd_cwnd = (tp->snd_cwnd+win_used)>>1; + WEB100_VAR_INC(tp, OtherReductions); } tp->snd_cwnd_used = 0; } @@ -3272,6 +3585,11 @@ */ if (tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); +#ifdef CONFIG_WEB100_STATS + if (tp->rcv_tsecr && + TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= tp->mss_cache) + tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0); +#endif } if (len <= tcp_header_len) { @@ -3290,7 +3608,7 @@ } } else { int eaten = 0; - + if (tp->ucopy.task == current && tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len && @@ -3316,7 +3634,12 @@ /* Bulk data transfer: receiver */ __skb_pull(skb,tcp_header_len); __skb_queue_tail(&sk->receive_queue, skb); - tcp_set_owner_r(skb, sk); +#ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) + tcp_rcv_own(sk, skb); + else +#endif + tcp_set_owner_r(skb, sk); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; } @@ -3404,6 +3727,12 @@ if(th->ack) tcp_ack(sk, skb, FLAG_SLOWPATH); +#ifdef CONFIG_WEB100_STATS + if (tp->saw_tstamp && tp->rcv_tsecr && + TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= tp->mss_cache) + tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0); +#endif + /* Process urgent data. */ tcp_urg(sk, skb, th); @@ -3535,6 +3864,10 @@ tp->copied_seq = tp->rcv_nxt; mb(); tcp_set_state(sk, TCP_ESTABLISHED); + +#ifdef CONFIG_WEB100_STATS + web100_stats_establish(sk); +#endif if(!sk->dead) { sk->state_change(sk); @@ -3751,6 +4084,9 @@ mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->state_change(sk); +#ifdef CONFIG_WEB100_STATS + web100_stats_establish(sk); +#endif /* Note, that this wakeup is only for marginal * crossed SYN case. Passively open sockets @@ -3762,7 +4098,12 @@ } tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << tp->snd_wscale; + WEB100_VAR_SET(tp, SndUna, tp->snd_una); + /* RFC1323: The window in SYN & SYN/ACK segments is + * never scaled (PSC/CMU patch {rreddy,mathis}@psc.edu). + */ + tp->snd_wnd = ntohs(th->window); + WEB100_UPDATE_FUNC(tp, web100_update_rwin_rcvd(tp)); tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate @@ -3770,7 +4111,7 @@ * Fix it at least with timestamps. */ if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(tp, 0); + tcp_ack_saw_tstamp(sk, 0); if (tp->tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff -urP linux-2.4.19.orig/net/ipv4/tcp_ipv4.c linux-2.4.19/net/ipv4/tcp_ipv4.c --- linux-2.4.19.orig/net/ipv4/tcp_ipv4.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/tcp_ipv4.c Wed Feb 12 12:09:01 2003 @@ -835,6 +835,10 @@ if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, sk->sport, usin->sin_port); + WEB100_VAR_SET(tp, SndISS, tp->write_seq); + WEB100_VAR_SET(tp, SndMax, tp->write_seq); + WEB100_VAR_SET(tp, SndNxt, tp->write_seq); + WEB100_VAR_SET(tp, SndUna, tp->write_seq); sk->protinfo.af_inet.id = tp->write_seq^jiffies; @@ -1017,8 +1021,14 @@ /* This is deprecated, but if someone generated it, * we have no reasons to ignore it. */ - if (sk->lock.users == 0) + if (sk->lock.users == 0) { tcp_enter_cwr(tp); + WEB100_VAR_INC(tp, QuenchRcvd); +#if 0 + /* WEB100_XXX */ + WEB100_UPDATE_FUNC(tp, web100_update_cwnd(tp)); +#endif + } goto out; case ICMP_PARAMETERPROB: err = EPROTO; @@ -1546,6 +1556,13 @@ newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) goto exit; +#ifdef CONFIG_WEB100_STATS + if (web100_stats_create(newsk)) { + sk_free(newsk); + goto exit; + } + newsk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV4; +#endif newsk->dst_cache = dst; newsk->route_caps = dst->dev->features; @@ -1764,12 +1781,14 @@ skb->dev = NULL; bh_lock_sock(sk); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_segrecv(&sk->tp_pinfo.af_tcp, skb)); ret = 0; if (!sk->lock.users) { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } else sk_add_backlog(sk, skb); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_cwnd(&sk->tp_pinfo.af_tcp)); bh_unlock_sock(sk); sock_put(sk); @@ -2035,6 +2054,16 @@ sk->sndbuf = sysctl_tcp_wmem[1]; sk->rcvbuf = sysctl_tcp_rmem[1]; + +#ifdef CONFIG_WEB100_STATS + { + int err; + if ((err = web100_stats_create(sk))) { + return err; + } + sk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV4; + } +#endif atomic_inc(&tcp_sockets_allocated); @@ -2059,6 +2088,10 @@ /* Clean up a referenced TCP bind bucket. */ if(sk->prev != NULL) tcp_put_port(sk); + +#ifdef CONFIG_WEB100_STATS + web100_stats_destroy(sk->tp_pinfo.af_tcp.tcp_stats); +#endif /* If sendmsg cached page exists, toss it. */ if (tp->sndmsg_page != NULL) @@ -2279,6 +2312,7 @@ len = 0; return len; } + struct proto tcp_prot = { name: "TCP", diff -urP linux-2.4.19.orig/net/ipv4/tcp_minisocks.c linux-2.4.19/net/ipv4/tcp_minisocks.c --- linux-2.4.19.orig/net/ipv4/tcp_minisocks.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/tcp_minisocks.c Wed Feb 12 12:09:01 2003 @@ -390,6 +390,9 @@ sizeof(struct in6_addr)); } #endif + + WEB100_VAR_SET(tp, State, WC_STATE_TIMEWAIT); + /* Linkage updates. */ __tcp_tw_hashdance(sk, tw); diff -urP linux-2.4.19.orig/net/ipv4/tcp_output.c linux-2.4.19/net/ipv4/tcp_output.c --- linux-2.4.19.orig/net/ipv4/tcp_output.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/tcp_output.c Wed Feb 12 12:09:01 2003 @@ -44,6 +44,9 @@ /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse = 1; +/* moderate the cwnd rather than half it if the transmission queue is full */ +int sysctl_tcp_moderate_on_txq = 0; + static __inline__ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) { @@ -51,6 +54,7 @@ if (tp->send_head == (struct sk_buff *) &sk->write_queue) tp->send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } @@ -163,6 +167,7 @@ } tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; + WEB100_UPDATE_FUNC(tp, web100_update_rwin_sent(tp)); /* RFC1323 scaling applied */ new_win >>= tp->rcv_wscale; @@ -200,6 +205,8 @@ #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 + WEB100_UPDATE_FUNC(tp, web100_update_segsend(tp, skb)); + sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; @@ -279,7 +286,16 @@ if (err <= 0) return err; - tcp_enter_cwr(tp); +#ifdef CONFIG_WEB100_NET100 + if (!NET100_WAD(tp, WAD_IFQ, sysctl_WAD_IFQ)) +#endif + if( !sysctl_tcp_moderate_on_txq ){ + tcp_enter_cwr(tp); + } else { + tcp_moderate_cwnd(tp); + } + + WEB100_VAR_INC(tp, SendStall); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device @@ -305,23 +321,34 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int why = WC_SNDLIM_NONE; /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; __skb_queue_tail(&sk->write_queue, skb); tcp_charge_skb(sk, skb); - if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) { + if (!force_queue && tp->send_head == NULL && + (why = tcp_snd_wait(tp, skb, cur_mss, tp->nonagle)) == WC_SNDLIM_NONE) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { +#ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_SBufMode == WC_BUFMODE_WEB100) + tcp_retx_charge_skb(sk, skb); +#endif tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); tcp_minshall_update(tp, cur_mss, skb); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); return; + } else { + why = WC_SNDLIM_SENDER; } } + if (why != WC_SNDLIM_NONE) + WEB100_UPDATE_FUNC(tp, web100_update_sndlim(tp, why)); /* Queue it, remembering where we must start sending. */ if (tp->send_head == NULL) tp->send_head = skb; @@ -334,18 +361,28 @@ { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb = tp->send_head; + int why; - if (tcp_snd_test(tp, skb, cur_mss, 1)) { + if ((why = tcp_snd_wait(tp, skb, cur_mss, 1)) == WC_SNDLIM_NONE) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { +#ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_SBufMode == WC_BUFMODE_WEB100) + tcp_retx_charge_skb(sk, skb); +#endif tp->send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); return; + } else { + why = WC_SNDLIM_SENDER; } } + if (why != WC_SNDLIM_NONE) + WEB100_UPDATE_FUNC(tp, web100_update_sndlim(tp, why)); } /* Split fragmented skb to two parts at length len. */ @@ -532,6 +569,7 @@ /* And store cached results */ tp->pmtu_cookie = pmtu; tp->mss_cache = mss_now; + WEB100_UPDATE_FUNC(tp, web100_update_mss(tp)); return mss_now; } @@ -547,6 +585,7 @@ { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); unsigned int mss_now; + int why = WC_SNDLIM_NONE; /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all @@ -564,20 +603,31 @@ mss_now = tcp_current_mss(sk); while((skb = tp->send_head) && - tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) { + (why = tcp_snd_wait(tp, skb, mss_now, + tcp_skb_is_last(sk, skb) ? tp->nonagle : 1)) + == WC_SNDLIM_NONE) { if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; } TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) + if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) { + why = WC_SNDLIM_SENDER; break; + } +#ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_SBufMode == WC_BUFMODE_WEB100) + tcp_retx_charge_skb(sk, skb); +#endif /* Advance the send_head. This one is sent out. */ update_send_head(sk, tp, skb); tcp_minshall_update(tp, mss_now, skb); sent_pkts = 1; } + if (why == WC_SNDLIM_NONE) + why = WC_SNDLIM_SENDER; + WEB100_UPDATE_FUNC(tp, web100_update_sndlim(tp, why)); if (sent_pkts) { tcp_cwnd_validate(sk, tp); @@ -654,7 +704,21 @@ int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; - + +#ifdef CONFIG_WEB100_STATS + WEB100_VAR_SET(tp, X_dbg4, full_space); + + if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) { + window = tp->rcv_space - max(tp->rcv_alloc - sk->rcvbuf, 0) + + min_t(int, tp->ofo_hi_seq ? + (tp->ofo_hi_seq - tp->rcv_nxt) : 0, + tp->rcv_space); + + window = max(window, 0); + window = min_t(__u32, window, tp->tcp_stats->wc_vars.LimRwin); + return (u32)window; + } +#endif if (mss > full_space) mss = full_space; @@ -683,6 +747,9 @@ if (window <= free_space - mss || window > free_space) window = (free_space/mss)*mss; + WEB100_VAR_SET(tp, X_dbg3, free_space); + WEB100_VAR_SET(tp, X_dbg2, mss); + WEB100_VAR_SET(tp, X_dbg1, window); return window; } @@ -1197,6 +1264,7 @@ tp->snd_wnd = 0; tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; + WEB100_VAR_SET(tp, SndUna, tp->snd_una); tp->snd_sml = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; @@ -1231,6 +1299,7 @@ TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; tp->snd_nxt = tp->write_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); tp->pushed_seq = tp->write_seq; /* Send it off. */ diff -urP linux-2.4.19.orig/net/ipv4/tcp_timer.c linux-2.4.19/net/ipv4/tcp_timer.c --- linux-2.4.19.orig/net/ipv4/tcp_timer.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv4/tcp_timer.c Wed Feb 12 12:09:01 2003 @@ -371,6 +371,7 @@ NET_INC_STATS_BH(TCPTimeouts); } } + WEB100_UPDATE_FUNC(tp, web100_update_timeout(tp)); tcp_enter_loss(sk, 0); @@ -401,6 +402,7 @@ * the 120 second clamps though! */ tp->backoff++; + WEB100_VAR_SET(tp, CurTimeoutCount, tp->backoff); tp->retransmits++; out_reset_timer: diff -urP linux-2.4.19.orig/net/ipv4/web100_stats.c linux-2.4.19/net/ipv4/web100_stats.c --- linux-2.4.19.orig/net/ipv4/web100_stats.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19/net/ipv4/web100_stats.c Wed Feb 12 12:09:02 2003 @@ -0,0 +1,681 @@ +/* + * net/ipv4/web100_stats.c + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * Copyright (C) 2000 Jeffrey Semke + * + * The Web 100 project. See http://www.web100.org + * + * Functions for creating, destroying, and updating the Web100 + * statistics structure. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define WC_INF32 0xffffffff + +#define WC_DEATH_SLOTS 8 +#define WC_PERSIST_TIME 60 + +/* BEWARE: The release process updates the version string */ +char *web100_version_string = "2.1-pre2 alpha 200210291117" +#ifdef CONFIG_WEB100_NET100 + " net100" +#endif + ; + +static void death_cleanup(unsigned long dummy); + +/* Global stats reader-writer lock */ +rwlock_t web100_linkage_lock = RW_LOCK_UNLOCKED; + +/* Data structures for tying together stats */ +static int web100stats_next_cid; +static int web100stats_conn_num; +static int web100stats_htsize; +struct web100stats **web100stats_ht; +struct web100stats *web100stats_first = NULL; + +static struct web100stats *death_slots[WC_DEATH_SLOTS]; +static int cur_death_slot; +static spinlock_t death_lock = SPIN_LOCK_UNLOCKED; +static struct timer_list stats_persist_timer = { function: death_cleanup }; +static int ndeaths; + +#ifdef CONFIG_WEB100_NETLINK +static struct sock *web100_nlsock; +#endif + +extern struct proc_dir_entry *proc_web100_dir; + + +/* + * Structural maintainance + */ + +static inline int web100stats_hash(int cid) +{ + return cid % web100stats_htsize; +} + +struct web100stats *web100stats_lookup(int cid) +{ + struct web100stats *stats; + + /* Let's ensure safety here. It's not too expensive and may change. */ + if (cid < 0 || cid >= WEB100_MAX_CONNS) + return NULL; + + stats = web100stats_ht[web100stats_hash(cid)]; + while (stats && stats->wc_cid != cid) + stats = stats->wc_hash_next; + return stats; +} + +/* This will get really slow as the cid space fills. This can be done + * better, but it's just not worth it right now. + * The caller must hold the lock. + */ +static int get_next_cid(void) +{ + int i; + + if (web100stats_conn_num >= WEB100_MAX_CONNS) + return -1; + + i = web100stats_next_cid; + do { + if (web100stats_lookup(i) == NULL) + break; + i = (i + 1) % WEB100_MAX_CONNS; + } while (i != web100stats_next_cid); + web100stats_next_cid = (i + 1) % WEB100_MAX_CONNS; + + return i; +} + +static void stats_link(struct web100stats *stats) +{ + int hash; + + if ((stats->wc_cid = get_next_cid()) < 0) + return; + + hash = web100stats_hash(stats->wc_cid); + stats->wc_hash_next = web100stats_ht[hash]; + stats->wc_hash_prev = NULL; + if (web100stats_ht[hash]) + web100stats_ht[hash]->wc_hash_prev = stats; + web100stats_ht[hash] = stats; + + stats->wc_next = web100stats_first; + stats->wc_prev = NULL; + if (web100stats_first) + web100stats_first->wc_prev = stats; + web100stats_first = stats; + + web100stats_conn_num++; + proc_web100_dir->nlink = web100stats_conn_num + 2; +} + +static void stats_unlink(struct web100stats *stats) +{ + int hash; + + hash = web100stats_hash(stats->wc_cid); + if (stats->wc_hash_next) + stats->wc_hash_next->wc_hash_prev = stats->wc_hash_prev; + if (stats->wc_hash_prev) + stats->wc_hash_prev->wc_hash_next = stats->wc_hash_next; + if (stats == web100stats_ht[hash]) + web100stats_ht[hash] = stats->wc_hash_next ? + stats->wc_hash_next : + stats->wc_hash_prev; + + if (stats->wc_next) + stats->wc_next->wc_prev = stats->wc_prev; + if (stats->wc_prev) + stats->wc_prev->wc_next = stats->wc_next; + if (stats == web100stats_first) + web100stats_first = stats->wc_next ? stats->wc_next : + stats->wc_prev; + + web100stats_conn_num--; + proc_web100_dir->nlink = web100stats_conn_num + 2; +} + +static void stats_persist(struct web100stats *stats) +{ + spin_lock_bh(&death_lock); + + stats->wc_death_next = death_slots[cur_death_slot]; + death_slots[cur_death_slot] = stats; + if (ndeaths <= 0) { + stats_persist_timer.expires = jiffies + WC_PERSIST_TIME * HZ / WC_DEATH_SLOTS; + add_timer(&stats_persist_timer); + } + ndeaths++; + + spin_unlock_bh(&death_lock); +} + +static void death_cleanup(unsigned long dummy) +{ + struct web100stats *stats, *next; + + spin_lock_bh(&death_lock); + + cur_death_slot = (cur_death_slot + 1) % WC_DEATH_SLOTS; + stats = death_slots[cur_death_slot]; + while (stats) { + stats->wc_dead = 1; + ndeaths--; + next = stats->wc_death_next; + write_lock_bh(&web100_linkage_lock); + web100_stats_unuse(stats); + write_unlock_bh(&web100_linkage_lock); + stats = next; + } + death_slots[cur_death_slot] = NULL; + + if (ndeaths > 0) { + stats_persist_timer.expires = jiffies + WC_PERSIST_TIME * HZ / WC_DEATH_SLOTS; + add_timer(&stats_persist_timer); + } + + spin_unlock_bh(&death_lock); +} + + +/* Tom Dunigan's (slightly modified) netlink code. Notifies listening apps + * of Web100 events. + * + * NOTE: we are currently squatting on netlink family 10 (NETLINK_WEB100) in + * include/linux/netlink.h + */ + +#ifdef CONFIG_WEB100_NETLINK +void web100_netlink_event(int type, int cid) +{ + struct web100_netlink_msg *msg; + struct sk_buff *tmpskb; + + if (web100_nlsock == NULL) + return; + + if ((tmpskb = alloc_skb((sizeof (struct web100_netlink_msg)), GFP_ATOMIC)) == NULL) { + printk(KERN_INFO "web100_netlink_event: alloc_skb failure\n"); + return; + } + + skb_put(tmpskb, sizeof (struct web100_netlink_msg)); + msg = (struct web100_netlink_msg *)tmpskb->data; + msg->type = type; + msg->cid = cid; + netlink_broadcast(web100_nlsock, tmpskb, 0, ~0, GFP_ATOMIC); +} +#endif /* CONFIG_WEB100_NETLINK */ + +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; + +/* Called whenever a TCP/IPv4 sock is created. + * net/ipv4/tcp_ipv4.c: tcp_v4_syn_recv_sock, + * tcp_v4_init_sock + * Allocates a stats structure and initializes values. + */ +int web100_stats_create(struct sock *sk) +{ + struct web100stats *stats; + struct web100directs *vars; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct timeval tv; + + if ((stats = kmalloc(sizeof (struct web100stats), gfp_any())) == NULL) + return -ENOMEM; + sk->tp_pinfo.af_tcp.tcp_stats = stats; + vars = &stats->wc_vars; + + memset(stats, 0, sizeof (struct web100stats)); + + stats->wc_cid = -1; + stats->wc_sk = sk; + atomic_set(&stats->wc_users, 0); + + stats->wc_limstate = WC_SNDLIM_STARTUP; + do_gettimeofday(&stats->wc_limstate_time); + + vars->NagleEnabled = !(sk->tp_pinfo.af_tcp.nonagle); + vars->ActiveOpen = !in_interrupt(); + + vars->SndUna = tp->snd_una; + vars->SndNxt = tp->snd_nxt; + vars->SndMax = tp->snd_nxt; + vars->SndISS = tp->snd_nxt; + + do_gettimeofday(&tv); + vars->StartTime = tv.tv_sec * 10 + tv.tv_usec / 100000; + vars->StartTimeSec = tv.tv_sec; + vars->StartTimeUsec = tv.tv_usec; + stats->wc_start_monotime = web100_mono_time(); + + vars->MinRTT = vars->MinRTO = vars->MinMSS = vars->MinRwinRcvd = + vars->MinRwinSent = vars->MinSsthresh = WC_INF32; + + if (sysctl_web100_sbufmode == WC_BUFMODE_OS) { + vars->X_SBufMode = WC_BUFMODE_OS; + } else { + vars->X_SBufMode = WC_BUFMODE_WEB100; + if (!(sk->userlocks & SOCK_SNDBUF_LOCK)) { + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = sysctl_wmem_default; + } + } + if (sysctl_web100_rbufmode == WC_BUFMODE_OS) { + vars->X_RBufMode = WC_BUFMODE_OS; + vars->LimRwin = tp->window_clamp; + } else { + vars->X_RBufMode = WC_BUFMODE_WEB100; + if (!(sk->userlocks & SOCK_RCVBUF_LOCK)) { + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = sysctl_rmem_default; + } + vars->LimRwin = WC_INF32; + } + + web100_stats_use(stats); + + return 0; +} + +void web100_stats_destroy(struct web100stats *stats) +{ + /* Attribute final sndlim time. */ + web100_update_sndlim(&stats->wc_sk->tp_pinfo.af_tcp, stats->wc_limstate); + + if (stats->wc_cid >= 0) { +#ifdef CONFIG_WEB100_NETLINK + web100_netlink_event(WC_NL_TYPE_DISCONNECT, stats->wc_cid); +#endif + stats_persist(stats); + } else { + write_lock_bh(&web100_linkage_lock); + web100_stats_unuse(stats); + write_unlock_bh(&web100_linkage_lock); + } +} + +/* Do not call directly. Called from web100_stats_unuse(). */ +void web100_stats_free(struct web100stats *stats) +{ + if (stats->wc_cid >= 0) { + stats_unlink(stats); + } + kfree(stats); +} + +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; + +/* Called when a connection enters the ESTABLISHED state, and has all its + * state initialized. + * net/ipv4/tcp_input.c: tcp_rcv_state_process, + * tcp_rcv_synsent_state_process + * Here we link the statistics structure in so it is visible in the /proc + * fs, and do some final init. + */ +void web100_stats_establish(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100stats *stats = tp->tcp_stats; + struct web100directs *vars = &stats->wc_vars; + + if (stats == NULL) + return; + + /* Let's set these here, since they can't change once the + * connection is established. + */ + vars->LocalPort = sk->num; + vars->RemPort = ntohs(sk->dport); + + if (vars->LocalAddressType == WC_ADDRTYPE_IPV4) { + vars->LocalAddress.v4addr = sk->rcv_saddr; + vars->RemAddress.v4addr = sk->daddr; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (vars->LocalAddressType == WC_ADDRTYPE_IPV6) { + memcpy(&vars->LocalAddress.v6addr.addr, &sk->net_pinfo.af_inet6.saddr, 16); + memcpy(&vars->RemAddress.v6addr.addr, &sk->net_pinfo.af_inet6.daddr, 16); + } +#endif + else { + printk(KERN_ERR "Web100: LocalAddressType not valid.\n"); + } + vars->LocalAddress.v6addr.type = vars->RemAddress.v6addr.type = vars->LocalAddressType; + + vars->SACKEnabled = tp->sack_ok; + vars->TimestampsEnabled = tp->tstamp_ok; +#ifdef CONFIG_INET_ECN + vars->ECNEnabled = tp->ecn_flags & TCP_ECN_OK; +#endif + + if (tp->wscale_ok) { + vars->WinScaleRcvd = tp->snd_wscale; + vars->WinScaleSent = tp->rcv_wscale; + } else { + vars->WinScaleRcvd = -1; + vars->WinScaleSent = -1; + } + vars->SndWinScale = vars->WinScaleRcvd; + vars->RcvWinScale = vars->WinScaleSent; + + vars->CurCwnd = tp->snd_cwnd * tp->mss_cache; + vars->CurSsthresh = tp->snd_ssthresh * tp->mss_cache; + + vars->RecvISS = vars->RcvNxt = tp->rcv_nxt; + + vars->RetranThresh = tp->reordering; + + vars->LimRwin = min_t(__u32, vars->LimRwin, 65355U << tp->rcv_wscale); + + write_lock_bh(&web100_linkage_lock); + stats_link(stats); + write_unlock_bh(&web100_linkage_lock); + + web100_update_sndlim(tp, WC_SNDLIM_SENDER); + +#ifdef CONFIG_WEB100_NETLINK + web100_netlink_event(WC_NL_TYPE_CONNECT, stats->wc_cid); +#endif +} + +/* + * Statistics update functions + */ + +void web100_update_snd_nxt(struct tcp_opt *tp) +{ + struct web100stats *stats = tp->tcp_stats; + + if (after(tp->snd_nxt, stats->wc_vars.SndMax)) { + if (before(stats->wc_vars.SndMax, stats->wc_vars.SndISS) && + after(tp->snd_nxt, stats->wc_vars.SndISS)) + stats->wc_vars.SendWraps++; + stats->wc_vars.ThruBytesAcked += (__u32) (tp->snd_nxt - stats->wc_vars.SndMax); /* XXX */ + stats->wc_vars.SndMax = tp->snd_nxt; + } + stats->wc_vars.SndNxt = tp->snd_nxt; +} + +void web100_update_rtt(struct tcp_opt *tp, unsigned long rtt_sample) +{ + struct web100stats *stats = tp->tcp_stats; + unsigned long rtt_sample_msec = rtt_sample * 1000 / HZ; + __u32 rto; + + stats->wc_vars.SampleRTT = rtt_sample_msec; + + if (rtt_sample_msec > stats->wc_vars.MaxRTT) + stats->wc_vars.MaxRTT = rtt_sample_msec; + if (rtt_sample_msec < stats->wc_vars.MinRTT) + stats->wc_vars.MinRTT = rtt_sample_msec; + + stats->wc_vars.CountRTT++; + stats->wc_vars.SumRTT += rtt_sample_msec; + + if (stats->wc_vars.PreCongCountRTT != stats->wc_vars.PostCongCountRTT) { + stats->wc_vars.PostCongCountRTT++; + stats->wc_vars.PostCongSumRTT += rtt_sample_msec; + } + + /* srtt is stored as 8 * the smoothed estimate */ + stats->wc_vars.SmoothedRTT = + (tp->srtt >> 3) * 1000 / HZ; + + rto = tp->rto * 1000 / HZ; + if (rto > stats->wc_vars.MaxRTO) + stats->wc_vars.MaxRTO = rto; + if (rto < stats->wc_vars.MinRTO) + stats->wc_vars.MinRTO = rto; + stats->wc_vars.CurRTO = rto; + + stats->wc_vars.CurTimeoutCount = 0; + + stats->wc_vars.RTTVar = (tp->rttvar >> 2) * 1000 / HZ; +} + +void web100_update_timeout(struct tcp_opt *tp) { + struct web100stats *stats = tp->tcp_stats; + + stats->wc_vars.CurTimeoutCount++; + if (tp->backoff) + stats->wc_vars.SubsequentTimeouts++; + else + stats->wc_vars.Timeouts++; + if (tp->ca_state == TCP_CA_Open) + stats->wc_vars.AbruptTimeouts++; +} + +void web100_update_mss(struct tcp_opt *tp) +{ + struct web100stats *stats = tp->tcp_stats; + int mss = tp->mss_cache; + + stats->wc_vars.CurMSS = mss; + if (mss > stats->wc_vars.MaxMSS) + stats->wc_vars.MaxMSS = mss; + if (mss < stats->wc_vars.MinMSS) + stats->wc_vars.MinMSS = mss; +} + +void web100_update_cwnd(struct tcp_opt *tp) +{ + struct web100stats *stats = tp->tcp_stats; + __u16 mss = tp->mss_cache; + __u32 cwnd; + __u32 ssthresh; + + if (mss == 0) { + printk("Web100: web100_update_cwnd: mss == 0\n"); + return; + } + + cwnd = min(WC_INF32 / mss, tp->snd_cwnd) * mss; + stats->wc_vars.CurCwnd = cwnd; + if (cwnd > stats->wc_vars.MaxCwnd) + stats->wc_vars.MaxCwnd = cwnd; + + ssthresh = min(WC_INF32 / mss, tp->snd_ssthresh) * mss; + stats->wc_vars.CurSsthresh = ssthresh; + + /* Discard initiail ssthresh set at infinity. */ + if (tp->snd_ssthresh >= 0x7ffffff) { + return; + } + if (ssthresh > stats->wc_vars.MaxSsthresh) + stats->wc_vars.MaxSsthresh = ssthresh; + if (ssthresh < stats->wc_vars.MinSsthresh) + stats->wc_vars.MinSsthresh = ssthresh; +} + +void web100_update_rwin_rcvd(struct tcp_opt *tp) +{ + struct web100stats *stats = tp->tcp_stats; + __u32 win = tp->snd_wnd; + + stats->wc_vars.CurRwinRcvd = win; + if (win > stats->wc_vars.MaxRwinRcvd) + stats->wc_vars.MaxRwinRcvd = win; + if (win < stats->wc_vars.MinRwinRcvd) + stats->wc_vars.MinRwinRcvd = win; +} + +void web100_update_rwin_sent(struct tcp_opt *tp) +{ + struct web100stats *stats = tp->tcp_stats; + __u32 win = tp->rcv_wnd; + + /* Update our advertised window. */ + stats->wc_vars.CurRwinSent = win; + if (win > stats->wc_vars.MaxRwinSent) + stats->wc_vars.MaxRwinSent = win; + if (win < stats->wc_vars.MinRwinSent) + stats->wc_vars.MinRwinSent = win; +} + + +/* TODO: change this to a generic state machine instrument */ +static void web100_state_update(struct tcp_opt *tp, int why, __u64 bytes) +{ + struct web100stats *stats = tp->tcp_stats; + struct timeval now; + + do_gettimeofday(&now); + stats->wc_vars.SndLimTime[stats->wc_limstate] += + (1000000*(now.tv_sec - stats->wc_limstate_time.tv_sec)) + + ((signed)(now.tv_usec) - stats->wc_limstate_time.tv_usec); + memcpy(&stats->wc_limstate_time, &now, sizeof (struct timeval)); + + stats->wc_vars.SndLimBytes[why] += bytes - stats->wc_limstate_bytes; + stats->wc_limstate_bytes = bytes; + + if (stats->wc_limstate != why) { + stats->wc_limstate = why; + stats->wc_vars.SndLimTrans[why]++; + } +} + +void web100_update_sndlim(struct tcp_opt *tp, int why) +{ + struct web100stats *stats = tp->tcp_stats; + + if (why < 0) { + printk("web100_update_sndlim: BUG: why < 0\n"); + return; + } + + web100_state_update(tp, why, stats->wc_vars.DataBytesOut); + /* future instruments on other sender bottlenecks here... */ + /* if (!why) { why = ??? } */ + /* web100_state_update(tp, why, stats->wc_vars.DataBytesOut); */ +} + +void web100_update_congestion(struct tcp_opt *tp, int why_dummy) +{ + struct web100stats *stats = tp->tcp_stats; + + stats->wc_vars.CongestionSignals++; + stats->wc_vars.PreCongSumCwnd += stats->wc_vars.CurCwnd; + + /* This may require more control flags */ + stats->wc_vars.PreCongCountRTT++; + stats->wc_vars.PreCongSumRTT += stats->wc_vars.SampleRTT; +} + +/* Called from tcp_transmit_skb, whenever we push a segment onto the wire. + * This must be called before the header is pushed onto the skb. + */ +void web100_update_segsend(struct tcp_opt *tp, struct sk_buff *skb) +{ + struct web100stats *stats = tp->tcp_stats; + + /* We know we're sending a segment. */ + stats->wc_vars.PktsOut++; + + /* We know the ack seq is rcv_nxt. web100_XXX bug compatible*/ + web100_update_rcv_nxt(tp); + + /* A pure ACK contains no data; everything else is data. */ + if (skb->len > 0) { + stats->wc_vars.DataPktsOut++; + stats->wc_vars.DataBytesOut += skb->len; + } else { + stats->wc_vars.AckPktsOut++; + } + + /* Check for retransmission. */ + if (before(TCP_SKB_CB(skb)->seq, stats->wc_vars.SndMax)) { + stats->wc_vars.PktsRetrans++; + stats->wc_vars.BytesRetrans += skb->len; + } +} + +void web100_update_segrecv(struct tcp_opt *tp, struct sk_buff *skb) +{ + struct web100directs *vars = &tp->tcp_stats->wc_vars; + struct tcphdr *th = skb->h.th; + + vars->PktsIn++; + if (skb->len == th->doff*4) { + vars->AckPktsIn++; + if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) + vars->DupAcksIn++; + } else { + vars->DataPktsIn++; + vars->DataBytesIn += skb->len - th->doff*4; + } +} + +void web100_update_rcv_nxt(struct tcp_opt *tp) +{ + struct web100stats *stats = tp->tcp_stats; + + if (before(stats->wc_vars.RcvNxt, stats->wc_vars.RecvISS) && + after(tp->rcv_nxt, stats->wc_vars.RecvISS)) + stats->wc_vars.RecvWraps++; + stats->wc_vars.ThruBytesReceived += (__u32) (tp->rcv_nxt - stats->wc_vars.RcvNxt); /* XXX */ + stats->wc_vars.RcvNxt = tp->rcv_nxt; +} + +void web100_update_writeq(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + int len = tp->write_seq - vars->SndMax; + + vars->CurAppWQueue = len; + if (len > vars->MaxAppWQueue) + vars->MaxAppWQueue = len; +} + + + +void __init web100_stats_init() +{ + int order; + + memset(death_slots, 0, sizeof (death_slots)); + + web100stats_htsize = tcp_ehash_size; + for (order = 0; (1UL << order) * PAGE_SIZE < web100stats_htsize * + sizeof (struct web100stats *); order++) + ; + printk("Web100: initiailizing hash table of size %d (order %d)\n", + web100stats_htsize, order); + if ((web100stats_ht = (struct web100stats **)__get_free_pages(GFP_ATOMIC, order)) == NULL) + panic("Failed to allocate Web100 stats hash table.\n"); + memset(web100stats_ht, 0, web100stats_htsize * sizeof (struct web100stats *)); + +#ifdef CONFIG_WEB100_NETLINK + if ((web100_nlsock = netlink_kernel_create(NETLINK_WEB100, NULL)) == NULL) + printk(KERN_ERR "web100_stats_init(): cannot initialize netlink socket\n"); +#endif + + printk("Web100 %s: Initialization successful\n", web100_version_string); +} diff -urP linux-2.4.19.orig/net/ipv6/tcp_ipv6.c linux-2.4.19/net/ipv6/tcp_ipv6.c --- linux-2.4.19.orig/net/ipv6/tcp_ipv6.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/ipv6/tcp_ipv6.c Wed Feb 12 12:09:02 2003 @@ -680,6 +680,11 @@ tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, sk->sport, sk->dport); + WEB100_VAR_SET(tp, SndISS, tp->write_seq); + WEB100_VAR_SET(tp, SndMax, tp->write_seq); + WEB100_VAR_SET(tp, SndNxt, tp->write_seq); + WEB100_VAR_SET(tp, SndUna, tp->write_seq); + err = tcp_connect(sk); if (err) goto late_failure; @@ -1302,6 +1307,13 @@ newsk = tcp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out; +#ifdef CONFIG_WEB100_STATS + if (web100_stats_create(newsk)) { + sk_free(newsk); + goto out; + } + newsk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV6; +#endif /* Charge newly allocated IPv6 socket */ #ifdef INET_REFCNT_DEBUG @@ -1589,12 +1601,14 @@ skb->dev = NULL; bh_lock_sock(sk); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_segrecv(&sk->tp_pinfo.af_tcp, skb)); ret = 0; if (!sk->lock.users) { if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } else sk_add_backlog(sk, skb); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_cwnd(&sk->tp_pinfo.af_tcp)); bh_unlock_sock(sk); sock_put(sk); @@ -1834,6 +1848,16 @@ sk->sndbuf = sysctl_tcp_wmem[1]; sk->rcvbuf = sysctl_tcp_rmem[1]; +#ifdef CONFIG_WEB100_STATS + { + int err; + if ((err = web100_stats_create(sk))) { + return err; + } + sk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV6; + } +#endif + atomic_inc(&tcp_sockets_allocated); return 0; @@ -1857,7 +1881,15 @@ /* Clean up a referenced TCP bind bucket. */ if(sk->prev != NULL) tcp_put_port(sk); - + +#ifdef CONFIG_WEB100_STATS +#if 0 + /* Do we have an ipv4 connection here? */ + if (sk->tp_pinfo.af_tcp.tcp_stats) +#endif + web100_stats_destroy(sk->tp_pinfo.af_tcp.tcp_stats); +#endif + /* If sendmsg cached page exists, toss it. */ if (tp->sndmsg_page != NULL) __free_page(tp->sndmsg_page); diff -urP linux-2.4.19.orig/net/netsyms.c linux-2.4.19/net/netsyms.c --- linux-2.4.19.orig/net/netsyms.c Wed Feb 12 12:05:12 2003 +++ linux-2.4.19/net/netsyms.c Wed Feb 12 12:09:02 2003 @@ -401,6 +401,18 @@ EXPORT_SYMBOL(sysctl_max_syn_backlog); #endif +#if defined(CONFIG_WEB100_STATS) && defined(CONFIG_IPV6_MODULE) +EXPORT_SYMBOL(web100_stats_create); +EXPORT_SYMBOL(web100_stats_destroy); +EXPORT_SYMBOL(web100_update_segrecv); +EXPORT_SYMBOL(web100_update_cwnd); +EXPORT_SYMBOL(web100_update_writeq); +#endif + +#ifdef CONFIG_WEB100_STATS +EXPORT_SYMBOL(tcp_retx_mem); +#endif + #if defined (CONFIG_IPV6_MODULE) EXPORT_SYMBOL(secure_tcpv6_sequence_number); EXPORT_SYMBOL(secure_ipv6_id); @@ -530,6 +542,9 @@ EXPORT_SYMBOL(sysctl_rmem_max); #ifdef CONFIG_INET EXPORT_SYMBOL(sysctl_ip_default_ttl); +#ifdef CONFIG_ALTAIMD +EXPORT_SYMBOL(sysctl_tcp_altAIMD); +#endif #endif #endif diff -urP linux-2.4.19.orig/net/socket.c linux-2.4.19/net/socket.c --- linux-2.4.19.orig/net/socket.c Tue Feb 11 14:51:52 2003 +++ linux-2.4.19/net/socket.c Wed Feb 12 12:15:15 2003 @@ -743,11 +743,13 @@ return -ENOMEM; } - sock = socki_lookup(filp->f_dentry->d_inode); - if ((sk=sock->sk) == NULL) + if ((sk=sock->sk) == NULL) { + if (fna) + kfree(fna); return -EINVAL; + } lock_sock(sk);