On Thu, 2010-05-06 at 13:21 -0700, Christoph Hellwig wrote: > On Wed, May 05, 2010 at 10:52:53AM -0700, Stephen Hemminger wrote: > > Let me put it bluntly. Any design that allows external code to run > > in the kernel is not going to be accepted. Out of tree kernel modules are enough > > of a pain already, why do you expect the developers to add another > > interface. > > Exactly. Until our friends at VMware get this basic fact it's useless > to continue arguing. > > Pankaj and Dmitry: you're fine to waste your time on this, but it's not > going to go anywhere until you address that fundamental problem. The > first thing you need to fix in your archicture is to integrate the VF > function code into the kernel tree, and we can work from there. > > Please post patches doing this if you want to resume the discussion. > > _______________________________________________ > Pv-drivers mailing list > Pv-drivers@xxxxxxxxxx > http://mailman2.vmware.com/mailman/listinfo/pv-drivers As discussed, following is the patch to give you an idea about implementation of NPA for vmxnet3 driver. Although the patch is big, I have verified it with checkpatch.pl. It gave 0 errors / warnings. Signed-off-by: Matthieu Bucchaineri <matthieu@xxxxxxxxxx> Signed-off-by: Shreyas Bhatewara <sbhatewara@xxxxxxxxxx> --- drivers/net/vmxnet3/Makefile | 2 drivers/net/vmxnet3/npa_defs.h | 83 + drivers/net/vmxnet3/npa_plugin_api.h | 473 ++++++++ drivers/net/vmxnet3/npa_shell_api.h | 234 ++++ drivers/net/vmxnet3/vmxnet3_defs.h | 2 drivers/net/vmxnet3/vmxnet3_drv.c | 1845 +++++++++++++++++++-------------- drivers/net/vmxnet3/vmxnet3_ethtool.c | 66 + drivers/net/vmxnet3/vmxnet3_int.h | 221 ++-- drivers/net/vmxnet3/vmxnet3_plugin.c | 1221 ++++++++++++++++++++++ 9 files changed, 3221 insertions(+), 926 deletions(-) create mode 100644 drivers/net/vmxnet3/npa_defs.h create mode 100644 drivers/net/vmxnet3/npa_plugin_api.h create mode 100644 drivers/net/vmxnet3/npa_shell_api.h create mode 100644 drivers/net/vmxnet3/vmxnet3_plugin.c diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index 880f509..af501d8 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -32,4 +32,4 @@ obj-$(CONFIG_VMXNET3) += vmxnet3.o -vmxnet3-objs := vmxnet3_drv.o vmxnet3_ethtool.o +vmxnet3-objs := vmxnet3_drv.o vmxnet3_ethtool.o vmxnet3_plugin.o diff --git a/drivers/net/vmxnet3/npa_defs.h b/drivers/net/vmxnet3/npa_defs.h new file mode 100644 index 0000000..74d28b8 --- /dev/null +++ b/drivers/net/vmxnet3/npa_defs.h @@ -0,0 +1,83 @@ +/* + * Network Plugin Architecture definitions. + * + * Copyright (C) 2008-2010, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@xxxxxxxxxx> + * + */ + +#ifndef _NPA_DEFS_H +#define _NPA_DEFS_H + +#define NPA_PLUGIN_NUMPAGES 64 +#define NPA_MEMIO_NUMPAGES 32 +#define NPA_SHARED_NUMPAGES 6 +#define NPA_MAX_PLUGINS_PER_VM 12 +#define VMXNET3_NPA_CMD_SUCCESS 1 +#define VMXNET3_NPA_CMD_FAILURE 0 +#define VMXNET3_PLUGIN_INFO_LEN 32 + +/* these structure are versioned using the vmxnet3 version */ + +struct NPA_PluginPages { + u64 vaddr; + u32 numPages; + u64 pages[NPA_PLUGIN_NUMPAGES]; +}; + +struct NPA_MemioPages { + u64 startPPN; + u32 numPages; +}; + + +struct NPA_SharedPages { + u64 startPPN; + u32 numPages; +}; + +struct NPA_PluginConf { + struct NPA_PluginPages pluginPages; + struct NPA_MemioPages memioPages; + struct NPA_SharedPages sharedPages; + u64 entryVA; /* address of entry function in the plugin */ + u32 deviceInfo[VMXNET3_PLUGIN_INFO_LEN]; /* opaque data returned by + * PF driver */ +}; + + +/* vmkernel and device backend shared definitions */ + +#define VMXNET3_PLUGIN_NAME_LEN 256 +#define VMXNET3_PLUGIN_REPOSITORY "/usr/lib/vmware/npa_plugins" +#define NPA_MEMIO_REGIONS_u64X 6 + +typedef u32 VF_ID; + +struct Vmxnet3_VFInfo { + char pluginName[VMXNET3_PLUGIN_NAME_LEN]; + u32 deviceInfo[VMXNET3_PLUGIN_INFO_LEN]; /* opaque data returned + * by PF driver */ + u64 memioAddr; + u32 memioLen; +}; + +#endif /* _NPA_DEFS_H */ diff --git a/drivers/net/vmxnet3/npa_plugin_api.h b/drivers/net/vmxnet3/npa_plugin_api.h new file mode 100644 index 0000000..11255c2 --- /dev/null +++ b/drivers/net/vmxnet3/npa_plugin_api.h @@ -0,0 +1,473 @@ +/* + * Network Plugin Architecture - Plugin API. + * + * Copyright (C) 2008-2010, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@xxxxxxxxxx> + * + */ + +#ifndef _PLUGIN_API_H +#define _PLUGIN_API_H + +#include "npa_defs.h" +#include "npa_shell_api.h" + +struct Plugin_RxQueueState { + struct Shell_RxQueueHandle *handle; + u8 *ringBaseVA; + u64 ringBasePA; + u32 ringLength; /* length in bytes */ + u32 ringSize; /* # of descriptors/pkts */ +}; + +struct Plugin_TxQueueState { + struct Shell_TxQueueHandle *handle; + u8 *ringBaseVA; + u64 ringBasePA; + u32 ringLength; /* length in bytes */ + u32 ringSize; /* # of descriptors/pkts */ +}; + +#define PLUGIN_MAX_RX_QUEUES 16 /* from vmxnet3_defs.h */ +#define PLUGIN_MAX_TX_QUEUES 8 +#define PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE 4 + +/* value 'ringOffset' range: [0, 4x the # descriptors) */ +#define PLUGIN_SHADOW_ALLOCATION_MULTIPLE 4 + +/* 512-byte alignment for each ring */ +#define PLUGIN_SHADED_AREA_TX_ALLOCATION_ALIGN 512 + +/* # of rings to allocate space for */ +#define PLUGIN_SHADED_AREA_TX_ALLOCATION_MULTIPLE 4 + +/* bytes allocated per desciptor */ +#define PLUGIN_SHADED_AREA_TX_MAX_DESC_SIZE_BYTES 16 + +/* add 4K extra bytes */ +#define PLUGIN_SHADED_AREA_TX_EXTRA_ALLOCATION 4096 + +/* 512-byte alignment for each ring */ +#define PLUGIN_SHADED_AREA_RX_ALLOCATION_ALIGN 512 + +/* # of rings to allocate space for */ +#define PLUGIN_SHADED_AREA_RX_ALLOCATION_MULTIPLE 4 + +/* bytes allocated per desciptor */ +#define PLUGIN_SHADED_AREA_RX_MAX_DESC_SIZE_BYTES 16 + +/* add 4K extra bytes */ +#define PLUGIN_SHADED_AREA_RX_EXTRA_ALLOCATION 4096 + +#define PLUGIN_FEATURES_LRO 0x00000001 + +struct Plugin_State { + u32 size; + u32 majorVersion; + u32 minorVersion; + u32 offsetToPrivateSpace; + u32 features; + u32 deviceInfo[VMXNET3_PLUGIN_INFO_LEN]; + void *memioAddr; + u32 memioAddrLen; + u32 mtu; + u32 numRxQueues; + u32 numTxQueues; + u8 updateRxProd; + struct Plugin_RxQueueState rxQueues[PLUGIN_MAX_RX_QUEUES]; + struct Plugin_TxQueueState txQueues[PLUGIN_MAX_TX_QUEUES]; + void *shared; + u32 sharedLen; + struct Shell_Api shellApi; + u64 privateSpace[512]; +}; + +#ifndef INLINE +#define INLINE inline +#endif + +static INLINE void* +PLUGIN_PRIVATE(struct Plugin_State *plugin) +{ + return (u8 *)plugin + plugin->offsetToPrivateSpace; +} + +struct Plugin_SendInfo { + u32 ipHeaderOffset; /* valid if 'ipv4' or 'ipv6' */ + u32 l4HeaderOffset; /* valid if 'ipv4' or 'ipv6' */ + u32 l4DataOffset; /* valid if ('ipv4' or 'ipv6') and + * ('tcp' or 'udp') */ + bool ipv4; + bool ipv6; + bool tcp; + bool udp; + + bool tso; + u32 tsoMss; /* valid if 'tso' is set */ + + bool xsumTcpOrUdp; /* valid if 'tcp' or 'udp' */ + + bool vlan; + u16 vlanTag; /* vlan id+priority bits; valid if 'vlan' is set */ +}; + +struct Plugin_SgElement { + u64 pa; + u32 length; +}; + +/* + * If IPv4 or IPv6 then headers are contiguous in + * first SG, up to 128-bytes. TSO frames, and only TSO frames, + * are contiguous beyond 128 bytes (on Linux model is TBD). + */ + +struct Plugin_SgList { + u32 totalLength; + u32 numElements; + u8 *firstSgVA; + struct Plugin_SgElement *elements; +}; + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_SwInit -- + * + * Initialize the s/w state of the plugin. The h/w should not be initialized + * through this function. This function is called before any other plugin API + * is called by the shell (except for api exchange function). + * + * called during: device/plugin init. + * concurrent with: nothing + * caller provides: info about configuration and environment + * callee performs: verify data provided by shell + * init private state (e.g. head/tail pointers, location of rings) + * callee can call: nothing. callee should not touch hardware and accesses + * to shared memory should be avoided. + * Result: + * 0 for success; non-zero for failure + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_SwInit(struct Plugin_State *plugin); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_ReinitRxRing -- + * + * Initialize the rx ring data structures + * + * called during: device/plugin init. + * device halt + * during a reset (e.g., RSS change, or OS request) + * concurrent with: nothing. Function is called only while device is + * quiesced and the queue is known to be empty. + * caller provides: state and queue # + * callee performs: bzero rings and reinit head/tail pointers/registers + * should not return any buffers that are found, and assume have + * already been garbage collected. + * callee can call: nothing. callee can write to, but not read from, + * registers and/or memory. + * + * Result: + * zero (essentially void) + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_ReinitRxRing(struct Plugin_State *plugin, u32 queue); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_ReinitTxRing -- + * + * Initialize the tx ring data structures + * + * called during: device/plugin init. + * device halt + * during a reset (e.g., RSS change, or OS request) + * concurrent with: nothing. Function is called only while device is + * quiesced and the queue is known to be empty. + * caller provides: state and queue # + * callee performs: bzero rings and reinit head/tail pointers/registers + * should not complete any sends, and assume have + * already been garbage collected. + * callee can call: nothing. callee can write to, but not read from, + * registers and/or memory. + * + * Result: + * zero (essentially void) + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_ReinitTxRing(struct Plugin_State *plugin, u32 queue); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_EnableInterrupt -- + * + * Enable the interrupt indicated by 'intrIdx' + * + * called during: device/plugin init. + * ISR/DPC, to enable interrupts + * OS request (including PM) + * during a reset (e.g., RSS change, or OS request) + * concurrent with: Plugin_AddBuffersToRxRing() + * Plugin_CheckRxRing() + * Plugin_AddFrameToTxRing() + * Plugin_CheckTxRing() + * Plugin_DisableInterrupt() + * caller provides: state and vector # (note is not queue #) + * callee performs: enable interrupt for vector + * callee can call: nothing + * + * Result: + * zero (essentially void) + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_EnableInterrupt(struct Plugin_State *plugin, u32 intrIdx); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_DisableInterrupt -- + * + * Disable the interrupt indicated by 'intrIdx' + * + * called during: ISR to disable interrupts + * OS request (including PM) + * during a reset (e.g., RSS change, or OS request) + * halt / shutdown + * concurrent with: Plugin_AddBuffersToRxRing() + * Plugin_CheckRxRing() + * Plugin_AddFrameToTxRing() + * Plugin_CheckTxRing() + * Plugin_EnableInterrupt() + * caller provides: state and vector # (note is not queue #) + * callee performs: disalbe interrupt for vector + * callee can call: nothing + * + * Result: + * zero (essentially void) + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_DisableInterrupt(struct Plugin_State *plugin, u32 intrIdx); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_AddFrameToTxRing -- + * + * Add the frame made up of buffers in the sg list 'frame' to the hardware tx + * ring of the given queue. The offload information is passed in 'info'. + * 'lastPktHint' is used to indicate that no more tx packets would be passed + * down in this context and the plugin should use this as a hint to write to + * the h/w doorbell. + * + * called during: ISR/DPC, after ring check + * OS transmit issued for a frame + * concurrent with: Plugin_CheckTxRing() + * Plugin_EnableInterrupt() + * Plugin_DisableInterrupt() + * caller provides: state and queue # + * information about frame (including frame type and header offsets) + * SG array of frame buffers, all eth/ip/tcp/udp headers in first SG + * callee performs: attempt to add frame to tx ring + * callee can call: nothing + * + * Result: + * 0 if successful, 1 to indicate no space in h/w tx ring + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_AddFrameToTxRing(struct Plugin_State *plugin, u32 queue, + const struct Plugin_SendInfo *info, + const struct Plugin_SgList *frame, + bool lastPktHint); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_CheckTxRing -- + * + * Check the tx ring for the given queue for any tx completions. + * This call is made by the shell either during the interrupt or DPC/napi + * context. + * + * called during: ISR/DPC + * concurrent with: Plugin_AddFrameToTxRing() + * Plugin_EnableInterrupt() + * Plugin_DisableInterrupt() + * caller provides: state and queue # + * callee performs: checks ring for any completed sends, and returns them + * callee can call: Shell_CompleteSend() + * + * Result: + * zero (essentially void) + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_CheckTxRing(struct Plugin_State *plugin, u32 queue); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_CheckRxRing -- + * + * Check the rx ring for any incoming packets on the given queue. + * 'maxPkts' indicate the maximum number of packets the plugin can indicate + * upto the shell in this context. The shell calls this function during the + * interrupt or DPC/napi context. + * + * called during: ISR/DPC + * concurrent with: Plugin_AddBuffersToRxRing() + * Plugin_EnableInterrupt() + * Plugin_DisableInterrupt() + * caller provides: state and queue # + * max # of frames to indicate in one call + * callee performs: checks ring for any receives, and indicates them up. + * Callee can/should indicate up frames with bad checksums, + * but should not indicate runts, truncated frames, bad CRCs + * or other types of bad frames. + * callee can call: Shell_IndicateRecv() + * Shell_FreeBuffer() + * + * Result: + * 1 to indicate need for buffers, 0 for no need for buffers. + * + * Side-effects: + * Packets are indicated up and delivered to the OS stack during this call. + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_CheckRxRing(struct Plugin_State *plugin, u32 queue, + u32 maxPkts); + + +/* + *---------------------------------------------------------------------------- + * + * Plugin_AddBuffersToRxRing -- + * + * The plugin can make calls to the shell to allocate more buffers. This call + * is made during the plugin initialization or after Plugin_CheckRxRing or + * when the OS stack returns buffers back to the shell. The plugin should try + * to allocate as many buffers as needed to fill the h/w rings. + * + * called during: device/plugin init. + * ISR/DPC, after Plugin_CheckRxRing() + * OS returns buffers (if applicable for OS) + * concurrent with: Plugin_CheckRxRing() + * Plugin_EnableInterrupt() + * Plugin_DisableInterrupt() + * caller provides: state and queue # + * callee performs: add empty buffers to rx ring(s), as much as possible + * touch device registers, if applicable + * callee can call: Shell_AllocSmallBuffer() + * Shell_AllocLargeBuffer() + * Shell_FreeBuffer() + * + * Result: + * zero (essentially void) + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Plugin_AddBuffersToRxRing(struct Plugin_State *plugin, u32 queue); + +struct Plugin_Api { + Plugin_SwInit *swInit; + Plugin_ReinitRxRing *reinitRxRing; + Plugin_ReinitTxRing *reinitTxRing; + Plugin_EnableInterrupt *enableInterrupt; + Plugin_DisableInterrupt *disableInterrupt; + Plugin_AddFrameToTxRing *addFrameToTxRing; + Plugin_CheckTxRing *checkTxRing; + Plugin_CheckRxRing *checkRxRing; + Plugin_AddBuffersToRxRing *addBuffersToRxRing; +}; + +/* + *---------------------------------------------------------------------------- + * + * NPA_PluginMain -- + * + * This is the first function that the shell calls into the plugin and is + * used to obtain the plugin API function pointer for further communication. + * + * Result: + * Plugin_Api function table filled with the plugin api functions. + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u32 NPA_PluginMainFunc(struct Plugin_Api *pluginApi); +NPA_PluginMainFunc NPA_PluginMain; + +#endif /* _PLUGIN_API_H */ diff --git a/drivers/net/vmxnet3/npa_shell_api.h b/drivers/net/vmxnet3/npa_shell_api.h new file mode 100644 index 0000000..6f9e19c --- /dev/null +++ b/drivers/net/vmxnet3/npa_shell_api.h @@ -0,0 +1,234 @@ +/* + * Network Plugin Architecture - Shell API. + * + * Copyright (C) 2008-2010, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@xxxxxxxxxx> + * + */ + +#ifndef _SHELL_API_H +#define _SHELL_API_H + +#define SHELL_SMALL_RECV_BUFFER_SIZE 2048 +#define SHELL_LARGE_RECV_BUFFER_SIZE 4096 + +/* + * Plugin should never indicate more than 4 sg's in a rx packet. + */ +#define SHELL_MAX_RECV_SG_LEN 4 + +/* + * Over allocate the sg array for future use + */ +#define SHELL_MAX_LRO_RECV_SG_LEN 18 + +#define SHELL_RECV_HASH_FUNCTION_NONE 0 +#define SHELL_RECV_HASH_FUNCTION_TOEPLITZ 1 + +#define SHELL_RECV_HASH_TYPE_NONE 0 +#define SHELL_RECV_HASH_TYPE_IPV4 1 +#define SHELL_RECV_HASH_TYPE_TCPIPV4 5 /* 1 | 4 */ +#define SHELL_RECV_HASH_TYPE_IPV6 2 +#define SHELL_RECV_HASH_TYPE_TCPIPV6 6 /* 2 | 4 */ + +#define SHELL_XSUM_UNKNOWN 0 +#define SHELL_XSUM_CORRECT 1 +#define SHELL_XSUM_INCORRECT 2 + +struct Shell_RxQueueHandle; +struct Shell_TxQueueHandle; + +struct Shell_RecvFrameSG { + u32 ringOffset; + u32 length; + u32 offset; +}; + +struct Shell_RecvFrame { + u32 sgLength; + u32 byteLength; + struct Shell_RecvFrameSG sg[SHELL_MAX_LRO_RECV_SG_LEN]; + bool perfectFiltered; /* indicate if packet exactly + * matches RX filters */ + bool vlan; + u16 vlanTag; /* valid if vlan == TRUE */ + u32 rssHashFunction; + u32 rssHashType; /* valid if rssHashFunction != 0 */ + u32 rssHashValue; /* valid if rssHashFunction and + * rssHashType != 0 */ + bool ipv4; + bool ipv6; + bool nonIp; + bool tcp; + bool udp; + u8 ipXsum; /* UNKNOWN , CORRECT , INCORRECT */ + u8 tcpXsum; /* UNKNOWN , CORRECT , INCORRECT */ + u8 udpXsum; /* UNKNOWN , CORRECT , INCORRECT */ +}; + + +/* + *---------------------------------------------------------------------------- + * + * Shell_AllocSmallBuffer -- + * + * Allocate a 'small' buffer from the shell identified by the ringOffset. + * ringOffset can range from [0..#descs-for-all-rings] and is used + * by the shell to identify the buffer in the shadow ring maintained by + * shell. + * + * This call can only be made from Plugin_AddBuffersToRxRing + * + * Result: + * PA of the buffer + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u64 Shell_AllocSmallBuffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset); + +/* + *---------------------------------------------------------------------------- + * + * Shell_AllocLargeBuffer -- + * + * Allocate a 'large' buffer from the shell identified by the ringOffset. + * ringOffset can range from [0..#descs-for-all-rings] and is used + * by the shell to identify the buffer in the shadow ring maintained by + * shell. + * + * This call can only be made from Plugin_AddBuffersToRxRing + * + * Result: + * PA of the buffer + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef u64 Shell_AllocLargeBuffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset); + +/* + *---------------------------------------------------------------------------- + * + * Shell_FreeBuffer -- + * + * Free the buffer allocated from Shell_Alloc{Small|Large}Buffer identified + * by the cookie 'ringOffset' + * + * This call can be made from Plugin_CheckRxRing(Plugin_AddBuffersToRxRing?) + * + * Result: + * None. + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef void Shell_FreeBuffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset); + + + +/* + *---------------------------------------------------------------------------- + * + * Shell_CompleteSend -- + * + * Indicate # of pre-tso tx completion to the shell. + * + * This call can only be made from Plugin_CheckTxRing + * + * Result: + * None. + * + * Side-effects: + * None + * + *---------------------------------------------------------------------------- + */ + +typedef void Shell_CompleteSend(struct Shell_TxQueueHandle *handle, + u32 numPkts); + + +/* + *---------------------------------------------------------------------------- + * + * Shell_IndicateRecv -- + * + * Indicate a receive frame to the shell. The buffer ownership is transferred + * to the shell and the rest of offload information is transferred along with + * in the RecvFrame + * + * This call can only be made from Plugin_CheckRxRing + * + * Result: + * 0 for success, 1 for failure + * + * Side-effects: + * The buffers are passed up to the OS stack. + * + *---------------------------------------------------------------------------- + */ + +typedef u32 Shell_IndicateRecv(struct Shell_RxQueueHandle *handle, + struct Shell_RecvFrame *frame); + +/* + *---------------------------------------------------------------------------- + * + * Shell_Log -- + * + * Simple logging function. + * + * This call can only be made from anyplace (except NPA_PluginMain) + * + * Result: + * None. + * + * Side-effects: + * None. + * + *---------------------------------------------------------------------------- + */ + +typedef void Shell_Log(size_t nargs, const char *fmt, ...); + +struct Shell_Api { + Shell_AllocSmallBuffer *allocSmallBuffer; + Shell_AllocLargeBuffer *allocLargeBuffer; + Shell_FreeBuffer *freeBuffer; + Shell_CompleteSend *completeSend; + Shell_IndicateRecv *indicateRecv; + Shell_Log *log; +}; + +#endif /* _SHELL_API_H */ diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index b4889e6..53341f0 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -76,7 +76,9 @@ enum { VMXNET3_CMD_UPDATE_IML, VMXNET3_CMD_UPDATE_PMCFG, VMXNET3_CMD_UPDATE_FEATURE, + VMXNET3_CMD_STOP_EMULATION, VMXNET3_CMD_LOAD_PLUGIN, + VMXNET3_CMD_ACTIVATE_VF, VMXNET3_CMD_FIRST_GET = 0xF00D0000, VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 989b742..417581a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -44,6 +44,23 @@ MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table); static atomic_t devices_found; +#ifndef roundup +# define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#endif + +/* + * This is the text segment that'll be used to load HW plugins code. + */ +static u8 vmxnet3_plugin_code_mem[NPA_PLUGIN_NUMPAGES * PAGE_SIZE * + NPA_MAX_PLUGINS_PER_VM] + __attribute__((aligned(PAGE_SIZE), section(".npatext"))); +/* + * The following array (and corresponding spinlock) is used to + * allocated code regions. + */ +static bool vmxnet3_plugin_code_used[NPA_MAX_PLUGINS_PER_VM]; +static spinlock_t vmxnet3_plugin_code_lock; + /* * Enable/Disable the given intr @@ -51,14 +68,26 @@ static atomic_t devices_found; static void vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx) { - VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0); + if (adapter->intr.event_intr_idx == intr_idx) { + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, + 0); + } else { + Plugin_EnableInterrupt(adapter, intr_idx); + } + } static void vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx) { - VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1); + if (adapter->intr.event_intr_idx == intr_idx) { + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, + 1); + } else { + Plugin_DisableInterrupt(adapter, intr_idx); + } + } @@ -183,6 +212,19 @@ vmxnet3_process_events(struct vmxnet3_adapter *adapter) schedule_work(&adapter->work); } + /* Check if passthru is requested */ + if (events & VMXNET3_ECR_DIC) { + /* XXX: PR 496886, use DID_LO to determine what transition */ + if (adapter->passthru) { + printk(KERN_ERR "%s: DIC: passthru -> emulation\n", + adapter->netdev->name); + schedule_work(&adapter->work); + } else { + printk(KERN_ERR "%s: DIC: emulation -> passthru\n", + adapter->netdev->name); + schedule_work(&adapter->passthru_work); + } + } } #ifdef __BIG_ENDIAN_BITFIELD @@ -302,34 +344,31 @@ vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi, tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */ } - static int -vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, - struct pci_dev *pdev, struct vmxnet3_adapter *adapter) +vmxnet3_unmap_pkt(struct vmxnet3_tx_queue *tq, struct pci_dev *pdev, + struct vmxnet3_adapter *adapter) { + struct vmxnet3_tx_shadow_ring *ring = &tq->shadow_ring; struct sk_buff *skb; + u32 eop_idx; int entries = 0; - /* no out of order completion */ - BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp); - BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1); - - skb = tq->buf_info[eop_idx].skb; + eop_idx = ring->base[ring->next2comp].eop_idx; + dev_dbg(&adapter->pdev->dev, "tx complete [%u %u]\n", + ring->next2comp, eop_idx); + skb = ring->base[ring->next2comp].skb; BUG_ON(skb == NULL); - tq->buf_info[eop_idx].skb = NULL; - - VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size); + ring->base[ring->next2comp].skb = NULL; - while (tq->tx_ring.next2comp != eop_idx) { - vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp, - pdev); + while (ring->next2comp != eop_idx) { + vmxnet3_unmap_tx_buf(ring->base + ring->next2comp, pdev); /* update next2comp w/o tx_lock. Since we are marking more, * instead of less, tx ring entries avail, the worst case is * that the tx routine incorrectly re-queues a pkt due to * insufficient tx ring entries. */ - vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring); + vmxnet3_tx_shadow_ring_adv_next2comp(ring); entries++; } @@ -337,125 +376,84 @@ vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, return entries; } - -static int -vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, - struct vmxnet3_adapter *adapter) -{ - int completed = 0; - union Vmxnet3_GenericDesc *gdesc; - - gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; - while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { - completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( - &gdesc->tcd), tq, adapter->pdev, - adapter); - - vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring); - gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; - } - - if (completed) { - spin_lock(&tq->tx_lock); - if (unlikely(vmxnet3_tq_stopped(tq, adapter) && - vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) > - VMXNET3_WAKE_QUEUE_THRESHOLD(tq) && - netif_carrier_ok(adapter->netdev))) { - vmxnet3_tq_wake(tq, adapter); - } - spin_unlock(&tq->tx_lock); - } - return completed; -} - - static void vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { int i; + struct vmxnet3_tx_shadow_ring *ring = &tq->shadow_ring; - while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) { + while (ring->next2comp != ring->next2fill) { struct vmxnet3_tx_buf_info *tbi; - union Vmxnet3_GenericDesc *gdesc; - - tbi = tq->buf_info + tq->tx_ring.next2comp; - gdesc = tq->tx_ring.base + tq->tx_ring.next2comp; + tbi = ring->base + ring->next2comp; vmxnet3_unmap_tx_buf(tbi, adapter->pdev); if (tbi->skb) { dev_kfree_skb_any(tbi->skb); tbi->skb = NULL; } - vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring); + vmxnet3_tx_shadow_ring_adv_next2comp(ring); } /* sanity check, verify all buffers are indeed unmapped and freed */ - for (i = 0; i < tq->tx_ring.size; i++) { - BUG_ON(tq->buf_info[i].skb != NULL || - tq->buf_info[i].map_type != VMXNET3_MAP_NONE); + for (i = 0; i < ring->size; i++) { + BUG_ON(ring->base[i].skb != NULL || + ring->base[i].map_type != VMXNET3_MAP_NONE); } - tq->tx_ring.gen = VMXNET3_INIT_GEN; - tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; - - tq->comp_ring.gen = VMXNET3_INIT_GEN; - tq->comp_ring.next2proc = 0; + ring->next2fill = ring->next2comp = 0; } + + void vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { - if (tq->tx_ring.base) { - pci_free_consistent(adapter->pdev, tq->tx_ring.size * - sizeof(struct Vmxnet3_TxDesc), - tq->tx_ring.base, tq->tx_ring.basePA); - tq->tx_ring.base = NULL; + if (tq->plugin_tq->ringBaseVA) { + pci_free_consistent(adapter->pdev, tq->plugin_tq->ringLength, + tq->plugin_tq->ringBaseVA, + tq->plugin_tq->ringBasePA); + tq->plugin_tq->ringBaseVA = NULL; + tq->plugin_tq->ringBasePA = 0; } + if (tq->data_ring.base) { pci_free_consistent(adapter->pdev, tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc), tq->data_ring.base, tq->data_ring.basePA); tq->data_ring.base = NULL; } - if (tq->comp_ring.base) { - pci_free_consistent(adapter->pdev, tq->comp_ring.size * - sizeof(struct Vmxnet3_TxCompDesc), - tq->comp_ring.base, tq->comp_ring.basePA); - tq->comp_ring.base = NULL; + if (tq->shadow_ring.base) { + vfree(tq->shadow_ring.base); + tq->shadow_ring.base = NULL; } - kfree(tq->buf_info); - tq->buf_info = NULL; + kfree(tq->sg_list.elements); + tq->sg_list.elements = NULL; } - static void vmxnet3_tq_init(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { int i; - /* reset the tx ring contents to 0 and reset the tx ring states */ - memset(tq->tx_ring.base, 0, tq->tx_ring.size * - sizeof(struct Vmxnet3_TxDesc)); - tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; - tq->tx_ring.gen = VMXNET3_INIT_GEN; - + /* reset the data ring contents to 0 and reset the data ring + * states + */ + tq->data_ring.next2fill = 0; + tq->data_ring.next2comp = 0; memset(tq->data_ring.base, 0, tq->data_ring.size * - sizeof(struct Vmxnet3_TxDataDesc)); - - /* reset the tx comp ring contents to 0 and reset comp ring states */ - memset(tq->comp_ring.base, 0, tq->comp_ring.size * - sizeof(struct Vmxnet3_TxCompDesc)); - tq->comp_ring.next2proc = 0; - tq->comp_ring.gen = VMXNET3_INIT_GEN; + sizeof(struct Vmxnet3_TxDataDesc)); /* reset the bookkeeping data */ - memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size); - for (i = 0; i < tq->tx_ring.size; i++) - tq->buf_info[i].map_type = VMXNET3_MAP_NONE; + tq->shadow_ring.next2fill = 0; + tq->shadow_ring.next2comp = 0; + memset(tq->shadow_ring.base, 0, tq->shadow_ring.size * + sizeof(struct vmxnet3_tx_shadow_ring)); + for (i = 0; i < tq->shadow_ring.size; i++) + tq->shadow_ring.base[i].map_type = VMXNET3_MAP_NONE; /* stats are not reset */ } @@ -465,18 +463,35 @@ static int vmxnet3_tq_create(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { - BUG_ON(tq->tx_ring.base || tq->data_ring.base || - tq->comp_ring.base || tq->buf_info); + u32 ring_length; + + BUG_ON(tq->plugin_tq->ringBaseVA || tq->data_ring.base || + tq->shadow_ring.base || tq->sg_list.elements); - tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size - * sizeof(struct Vmxnet3_TxDesc), - &tq->tx_ring.basePA); - if (!tq->tx_ring.base) { + /* + * We don't know the underlying hardware's descriptor size, + * thus use the maximum allowed descriptor size. + */ + ring_length = tq->plugin_tq->ringSize * + PLUGIN_SHADED_AREA_TX_MAX_DESC_SIZE_BYTES; + /* Add room for potential alignment */ + ring_length += PLUGIN_SHADED_AREA_TX_ALLOCATION_ALIGN - 1; + /* + * Again, we don't know the underlying hardware's mode of + * operation, so let's give room for multiple rings. + */ + tq->plugin_tq->ringLength = PLUGIN_SHADED_AREA_TX_ALLOCATION_MULTIPLE * + ring_length + PLUGIN_SHADED_AREA_TX_EXTRA_ALLOCATION; + tq->plugin_tq->ringBaseVA = pci_alloc_consistent(adapter->pdev, + tq->plugin_tq->ringLength, + (dma_addr_t *)&tq->plugin_tq->ringBasePA); + if (!tq->plugin_tq->ringBaseVA) { printk(KERN_ERR "%s: failed to allocate tx ring\n", adapter->netdev->name); goto err; } + tq->data_ring.base = pci_alloc_consistent(adapter->pdev, tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc), @@ -487,20 +502,22 @@ vmxnet3_tq_create(struct vmxnet3_tx_queue *tq, goto err; } - tq->comp_ring.base = pci_alloc_consistent(adapter->pdev, - tq->comp_ring.size * - sizeof(struct Vmxnet3_TxCompDesc), - &tq->comp_ring.basePA); - if (!tq->comp_ring.base) { - printk(KERN_ERR "%s: failed to allocate tx comp ring\n", + tq->shadow_ring.size = + VMXNET3_TX_SHADOW_RING_SIZE(tq->plugin_tq->ringSize); + tq->shadow_ring.base = vmalloc(tq->shadow_ring.size * + sizeof(struct vmxnet3_tx_buf_info)); + if (!tq->shadow_ring.base) { + printk(KERN_ERR "%s: failed to allocate tx shadow ring\n", + adapter->netdev->name); goto err; } - tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]), - GFP_KERNEL); - if (!tq->buf_info) { - printk(KERN_ERR "%s: failed to allocate tx bufinfo\n", + tq->sg_list.elements = kcalloc(VMXNET3_SGLIST_MAX, + sizeof(struct Plugin_SgElement), + GFP_KERNEL); + if (!tq->sg_list.elements) { + printk(KERN_ERR "%s: failed to allocate tx sglist\n", adapter->netdev->name); goto err; } @@ -513,89 +530,8 @@ err: } -/* - * starting from ring->next2fill, allocate rx buffers for the given ring - * of the rx queue and update the rx desc. stop after @num_to_alloc buffers - * are allocated or allocation fails - */ - -static int -vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, - int num_to_alloc, struct vmxnet3_adapter *adapter) -{ - int num_allocated = 0; - struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx]; - struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx]; - u32 val; - - while (num_allocated < num_to_alloc) { - struct vmxnet3_rx_buf_info *rbi; - union Vmxnet3_GenericDesc *gd; - - rbi = rbi_base + ring->next2fill; - gd = ring->base + ring->next2fill; - - if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { - if (rbi->skb == NULL) { - rbi->skb = dev_alloc_skb(rbi->len + - NET_IP_ALIGN); - if (unlikely(rbi->skb == NULL)) { - rq->stats.rx_buf_alloc_failure++; - break; - } - rbi->skb->dev = adapter->netdev; - - skb_reserve(rbi->skb, NET_IP_ALIGN); - rbi->dma_addr = pci_map_single(adapter->pdev, - rbi->skb->data, rbi->len, - PCI_DMA_FROMDEVICE); - } else { - /* rx buffer skipped by the device */ - } - val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT; - } else { - BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE || - rbi->len != PAGE_SIZE); - - if (rbi->page == NULL) { - rbi->page = alloc_page(GFP_ATOMIC); - if (unlikely(rbi->page == NULL)) { - rq->stats.rx_buf_alloc_failure++; - break; - } - rbi->dma_addr = pci_map_page(adapter->pdev, - rbi->page, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - } else { - /* rx buffers skipped by the device */ - } - val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT; - } - - BUG_ON(rbi->dma_addr == 0); - gd->rxd.addr = cpu_to_le64(rbi->dma_addr); - gd->dword[2] = cpu_to_le32((ring->gen << VMXNET3_RXD_GEN_SHIFT) - | val | rbi->len); - - num_allocated++; - vmxnet3_cmd_ring_adv_next2fill(ring); - } - rq->uncommitted[ring_idx] += num_allocated; - - dev_dbg(&adapter->netdev->dev, - "alloc_rx_buf: %d allocated, next2fill %u, next2comp " - "%u, uncommited %u\n", num_allocated, ring->next2fill, - ring->next2comp, rq->uncommitted[ring_idx]); - - /* so that the device can distinguish a full ring and an empty ring */ - BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp); - - return num_allocated; -} - - static void -vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, +vmxnet3_append_frag(struct sk_buff *skb, struct Shell_RecvFrameSG *sg, struct vmxnet3_rx_buf_info *rbi) { struct skb_frag_struct *frag = skb_shinfo(skb)->frags + @@ -604,120 +540,88 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); frag->page = rbi->page; - frag->page_offset = 0; - frag->size = rcd->len; + frag->page_offset = sg->offset; + if (sg->offset != 0) + printk(KERN_INFO "sg->offset:%d\n", sg->offset); + frag->size = sg->length; + skb->data_len += frag->size; skb_shinfo(skb)->nr_frags++; } - static void -vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, - struct vmxnet3_tx_queue *tq, struct pci_dev *pdev, - struct vmxnet3_adapter *adapter) +vmxnet3_map_pkt(struct sk_buff *skb, u32 copy_size, + struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { - u32 dw2, len; - unsigned long buf_offset; - int i; - union Vmxnet3_GenericDesc *gdesc; struct vmxnet3_tx_buf_info *tbi = NULL; + struct vmxnet3_tx_buf_info *sop_tbi = NULL; + struct Plugin_SgList *sg_list = &tq->sg_list; + u32 idx = 0; + int i; - BUG_ON(ctx->copy_size > skb_headlen(skb)); - - /* use the previous gen bit for the SOP desc */ - dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; - - ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; - gdesc = ctx->sop_txd; /* both loops below can be skipped */ + BUG_ON(copy_size > skb_headlen(skb)); + sop_tbi = tq->shadow_ring.base + tq->shadow_ring.next2fill; /* no need to map the buffer if headers are copied */ - if (ctx->copy_size) { - ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA + - tq->tx_ring.next2fill * - sizeof(struct Vmxnet3_TxDataDesc)); - ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size); - ctx->sop_txd->dword[3] = 0; - - tbi = tq->buf_info + tq->tx_ring.next2fill; + if (copy_size) { + tbi = tq->shadow_ring.base + tq->shadow_ring.next2fill; + tbi->skb = NULL; tbi->map_type = VMXNET3_MAP_NONE; - - dev_dbg(&adapter->netdev->dev, - "txd[%u]: 0x%Lx 0x%x 0x%x\n", - tq->tx_ring.next2fill, - le64_to_cpu(ctx->sop_txd->txd.addr), - ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]); - vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); - - /* use the right gen for non-SOP desc */ - dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT; + tbi->len = 0; + tbi->dma_addr = 0; + sg_list->elements[idx].pa = tq->data_ring.basePA + + tq->data_ring.next2fill * + sizeof(struct Vmxnet3_TxDataDesc); + sg_list->elements[idx].length = copy_size; + idx++; + vmxnet3_tx_shadow_ring_adv_next2fill(&tq->shadow_ring); } - /* linear part can use multiple tx desc if it's big */ - len = skb_headlen(skb) - ctx->copy_size; - buf_offset = ctx->copy_size; - while (len) { - u32 buf_size; - buf_size = len > VMXNET3_MAX_TX_BUF_SIZE ? - VMXNET3_MAX_TX_BUF_SIZE : len; - - tbi = tq->buf_info + tq->tx_ring.next2fill; + /* + * linear part can use multiple tx desc in the plugin if it's + * big, but only one in the shadow/data ring + */ + if (skb_headlen(skb) > copy_size) { + tbi = tq->shadow_ring.base + tq->shadow_ring.next2fill; + tbi->skb = NULL; tbi->map_type = VMXNET3_MAP_SINGLE; + tbi->len = skb_headlen(skb) - copy_size; tbi->dma_addr = pci_map_single(adapter->pdev, - skb->data + buf_offset, buf_size, + skb->data + copy_size, tbi->len, PCI_DMA_TODEVICE); - tbi->len = buf_size; /* this automatically convert 2^14 to 0 */ + sg_list->elements[idx].pa = tbi->dma_addr; + sg_list->elements[idx].length = tbi->len; + idx++; - gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; - BUG_ON(gdesc->txd.gen == tq->tx_ring.gen); - - gdesc->txd.addr = cpu_to_le64(tbi->dma_addr); - gdesc->dword[2] = cpu_to_le32(dw2 | buf_size); - gdesc->dword[3] = 0; - - dev_dbg(&adapter->netdev->dev, - "txd[%u]: 0x%Lx 0x%x 0x%x\n", - tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr), - le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]); - vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); - dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT; - - len -= buf_size; - buf_offset += buf_size; + vmxnet3_tx_shadow_ring_adv_next2fill(&tq->shadow_ring); } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; - tbi = tq->buf_info + tq->tx_ring.next2fill; + tbi = tq->shadow_ring.base + tq->shadow_ring.next2fill; + tbi->skb = NULL; tbi->map_type = VMXNET3_MAP_PAGE; + tbi->len = frag->size; tbi->dma_addr = pci_map_page(adapter->pdev, frag->page, frag->page_offset, frag->size, PCI_DMA_TODEVICE); - tbi->len = frag->size; - - gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; - BUG_ON(gdesc->txd.gen == tq->tx_ring.gen); + sg_list->elements[idx].pa = tbi->dma_addr; + sg_list->elements[idx].length = tbi->len; + idx++; - gdesc->txd.addr = cpu_to_le64(tbi->dma_addr); - gdesc->dword[2] = cpu_to_le32(dw2 | frag->size); - gdesc->dword[3] = 0; - - dev_dbg(&adapter->netdev->dev, - "txd[%u]: 0x%llu %u %u\n", - tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr), - le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]); - vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); - dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT; + vmxnet3_tx_shadow_ring_adv_next2fill(&tq->shadow_ring); } - ctx->eop_txd = gdesc; - /* set the last buf_info for the pkt */ - tbi->skb = skb; - tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base; + sop_tbi->skb = skb; + sop_tbi->eop_idx = tq->shadow_ring.next2fill; + BUG_ON(idx >= VMXNET3_SGLIST_MAX); + sg_list->numElements = idx; + sg_list->totalLength = skb->len; } @@ -730,95 +634,118 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, * Returns: * -1: error happens during parsing * 0: protocol headers parsed, but too big to be copied - * 1: protocol headers parsed and copied + * n: protocol headers parsed and copied; n is # of bytes copied * * Other effects: - * 1. related *ctx fields are updated. - * 2. ctx->copy_size is # of bytes copied - * 3. the portion copied is guaranteed to be in the linear part + * 1. related *info fields are updated. + * 2. the portion copied is guaranteed to be in the linear part * */ static int vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, - struct vmxnet3_tx_ctx *ctx, + struct Plugin_SendInfo *info, struct vmxnet3_adapter *adapter) { struct Vmxnet3_TxDataDesc *tdd; - - if (ctx->mss) { - ctx->eth_ip_hdr_size = skb_transport_offset(skb); - ctx->l4_hdr_size = ((struct tcphdr *) - skb_transport_header(skb))->doff * 4; - ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size; + unsigned int copy_size; + + if (info->tsoMss) { + info->tcp = true; + info->tso = true; + info->xsumTcpOrUdp = true; + info->ipHeaderOffset = skb_network_offset(skb); + info->l4HeaderOffset = skb_transport_offset(skb); + info->l4DataOffset = info->l4HeaderOffset + + ((struct tcphdr *)skb_transport_header(skb))->doff * 4; + + copy_size = info->l4DataOffset; } else { unsigned int pull_size; + info->tcp = false; + info->udp = false; + info->tso = false; + if (info->ipv4) { + struct iphdr *iph = (struct iphdr *) + skb_network_header(skb); + if (iph->protocol == IPPROTO_TCP) + info->tcp = true; + else if (iph->protocol == IPPROTO_UDP) + info->udp = true; + } else if (info->ipv6) { + /* XXX what about option headers */ + struct ipv6hdr *iph = (struct ipv6hdr *) + skb_network_header(skb); + if (iph->nexthdr == IPPROTO_TCP) + info->tcp = true; + else if (iph->nexthdr == IPPROTO_UDP) + info->udp = true; + } if (skb->ip_summed == CHECKSUM_PARTIAL) { - ctx->eth_ip_hdr_size = skb_transport_offset(skb); - - if (ctx->ipv4) { - struct iphdr *iph = (struct iphdr *) - skb_network_header(skb); - if (iph->protocol == IPPROTO_TCP) { - pull_size = ctx->eth_ip_hdr_size + + info->ipHeaderOffset = skb_network_offset(skb); + info->l4HeaderOffset = skb_transport_offset(skb); + if (info->ipv4 || info->ipv6) { + if (info->tcp) { + info->xsumTcpOrUdp = true; + pull_size = info->l4HeaderOffset + sizeof(struct tcphdr); if (unlikely(!pskb_may_pull(skb, pull_size))) { goto err; } - ctx->l4_hdr_size = ((struct tcphdr *) + info->l4DataOffset = + info->l4HeaderOffset + + ((struct tcphdr *) skb_transport_header(skb))->doff * 4; - } else if (iph->protocol == IPPROTO_UDP) { - ctx->l4_hdr_size = - sizeof(struct udphdr); + copy_size = info->l4DataOffset; + } else if (info->udp) { + info->xsumTcpOrUdp = true; + info->l4DataOffset = + info->l4HeaderOffset + + sizeof(struct udphdr); + copy_size = info->l4DataOffset; } else { - ctx->l4_hdr_size = 0; + info->xsumTcpOrUdp = false; + copy_size = info->l4HeaderOffset; } } else { + info->xsumTcpOrUdp = false; /* for simplicity, don't copy L4 headers */ - ctx->l4_hdr_size = 0; + copy_size = info->l4HeaderOffset; } - ctx->copy_size = ctx->eth_ip_hdr_size + - ctx->l4_hdr_size; } else { - ctx->eth_ip_hdr_size = 0; - ctx->l4_hdr_size = 0; + info->xsumTcpOrUdp = false; /* copy as much as allowed */ - ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE - , skb_headlen(skb)); + copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE, + skb_headlen(skb)); } - /* make sure headers are accessible directly */ - if (unlikely(!pskb_may_pull(skb, ctx->copy_size))) + if (unlikely(!pskb_may_pull(skb, copy_size))) goto err; } - if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) { + if (unlikely(copy_size > VMXNET3_HDR_COPY_SIZE)) { tq->stats.oversized_hdr++; - ctx->copy_size = 0; return 0; } - tdd = tq->data_ring.base + tq->tx_ring.next2fill; + tdd = tq->data_ring.base + tq->data_ring.next2fill; + BUG_ON(copy_size > skb_headlen(skb)); - memcpy(tdd->data, skb->data, ctx->copy_size); - dev_dbg(&adapter->netdev->dev, - "copy %u bytes to dataRing[%u]\n", - ctx->copy_size, tq->tx_ring.next2fill); - return 1; + memcpy(tdd->data, skb->data, copy_size); + return copy_size; err: return -1; } static void -vmxnet3_prepare_tso(struct sk_buff *skb, - struct vmxnet3_tx_ctx *ctx) +vmxnet3_prepare_tso(struct sk_buff *skb, struct Plugin_SendInfo *info) { struct tcphdr *tcph = (struct tcphdr *)skb_transport_header(skb); - if (ctx->ipv4) { + if (info->ipv4) { struct iphdr *iph = (struct iphdr *)skb_network_header(skb); iph->check = 0; tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, @@ -848,24 +775,20 @@ static int vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter, struct net_device *netdev) { - int ret; + int copy_size; u32 count; unsigned long flags; - struct vmxnet3_tx_ctx ctx; - union Vmxnet3_GenericDesc *gdesc; -#ifdef __BIG_ENDIAN_BITFIELD - /* Use temporary descriptor to avoid touching bits multiple times */ - union Vmxnet3_GenericDesc tempTxDesc; -#endif + u32 shadow_idx; + bool lastPktHint; + int i; /* conservatively estimate # of descriptors to use */ count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + skb_shinfo(skb)->nr_frags + 1; - - ctx.ipv4 = (skb->protocol == __constant_ntohs(ETH_P_IP)); - - ctx.mss = skb_shinfo(skb)->gso_size; - if (ctx.mss) { + tq->info.ipv4 = (skb->protocol == __constant_ntohs(ETH_P_IP)); + tq->info.ipv6 = (skb->protocol == __constant_ntohs(ETH_P_IPV6)); + tq->info.tsoMss = skb_shinfo(skb)->gso_size; + if (tq->info.tsoMss) { if (skb_header_cloned(skb)) { if (unlikely(pskb_expand_head(skb, 0, 0, GFP_ATOMIC) != 0)) { @@ -874,7 +797,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } tq->stats.copy_skb_header++; } - vmxnet3_prepare_tso(skb, &ctx); + vmxnet3_prepare_tso(skb, &tq->info); } else { if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) { @@ -892,18 +815,17 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } } - ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter); - if (ret >= 0) { - BUG_ON(ret <= 0 && ctx.copy_size != 0); + copy_size = vmxnet3_parse_and_copy_hdr(skb, tq, &tq->info, adapter); + if (copy_size >= 0) { /* hdrs parsed, check against other limits */ - if (ctx.mss) { - if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size > + if (tq->info.tsoMss) { + if (unlikely(tq->info.l4DataOffset > VMXNET3_MAX_TX_BUF_SIZE)) { goto hdr_too_big; } } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (unlikely(ctx.eth_ip_hdr_size + + if (unlikely(tq->info.l4HeaderOffset + skb->csum_offset > VMXNET3_MAX_CSUM_OFFSET)) { goto hdr_too_big; @@ -916,82 +838,83 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } spin_lock_irqsave(&tq->tx_lock, flags); - - if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) { + /* Convert all deb_dbg to dprink */ + if (vmxnet3_tx_data_ring_desc_avail(&tq->data_ring) < 1) { tq->stats.tx_ring_full++; - dev_dbg(&adapter->netdev->dev, - "tx queue stopped on %s, next2comp %u" - " next2fill %u\n", adapter->netdev->name, - tq->tx_ring.next2comp, tq->tx_ring.next2fill); + dev_dbg(&adapter->pdev->dev, "tx queue stopped on %s, data ring" + " next2comp %u next2fill %u\n", adapter->netdev->name, + tq->data_ring.next2comp, tq->data_ring.next2fill); vmxnet3_tq_stop(tq, adapter); spin_unlock_irqrestore(&tq->tx_lock, flags); return NETDEV_TX_BUSY; } - /* fill tx descs related to addr & len */ - vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter); + if (count > vmxnet3_tx_shadow_ring_desc_avail(&tq->shadow_ring)) { + tq->stats.tx_ring_full++; + dev_dbg(&adapter->pdev->dev, "tx queue stopped on %s, shadow " + " ring next2comp %u next2fill %u\n", + adapter->netdev->name, + tq->shadow_ring.next2comp, tq->shadow_ring.next2fill); - /* setup the EOP desc */ - ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP); + vmxnet3_tq_stop(tq, adapter); + spin_unlock_irqrestore(&tq->tx_lock, flags); + return NETDEV_TX_BUSY; + } - /* setup the SOP desc */ -#ifdef __BIG_ENDIAN_BITFIELD - gdesc = &tempTxDesc; - gdesc->dword[2] = ctx.sop_txd->dword[2]; - gdesc->dword[3] = ctx.sop_txd->dword[3]; -#else - gdesc = ctx.sop_txd; -#endif - if (ctx.mss) { - gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_TSO; - gdesc->txd.msscof = ctx.mss; - le32_add_cpu(&tq->shared->txNumDeferred, (skb->len - - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss); - } else { - if (skb->ip_summed == CHECKSUM_PARTIAL) { - gdesc->txd.hlen = ctx.eth_ip_hdr_size; - gdesc->txd.om = VMXNET3_OM_CSUM; - gdesc->txd.msscof = ctx.eth_ip_hdr_size + - skb->csum_offset; + /* fill shadow ring and populate sg_list with addr & len */ + shadow_idx = tq->shadow_ring.next2fill; + vmxnet3_map_pkt(skb, copy_size, tq, adapter); + if (tq->info.tsoMss) + tq->shared->txNumDeferred += (skb->len - copy_size + + tq->info.tsoMss - 1) / tq->info.tsoMss; + else + tq->shared->txNumDeferred += 1; + + if (!adapter->passthru) { + if (le32_to_cpu(tq->shared->txNumDeferred) >= + le32_to_cpu(tq->shared->txThreshold)) { + tq->shared->txNumDeferred = 0; + lastPktHint = true; } else { - gdesc->txd.om = 0; - gdesc->txd.msscof = 0; + lastPktHint = false; } - le32_add_cpu(&tq->shared->txNumDeferred, 1); + } else { + lastPktHint = true; } if (vlan_tx_tag_present(skb)) { - gdesc->txd.ti = 1; - gdesc->txd.tci = vlan_tx_tag_get(skb); + tq->info.vlan = true; + tq->info.vlanTag = vlan_tx_tag_get(skb); } - /* finally flips the GEN bit of the SOP desc. */ - gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ - VMXNET3_TXD_GEN); -#ifdef __BIG_ENDIAN_BITFIELD - /* Finished updating in bitfields of Tx Desc, so write them in original - * place. - */ - vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc, - (struct Vmxnet3_TxDesc *)ctx.sop_txd); - gdesc = ctx.sop_txd; -#endif - dev_dbg(&adapter->netdev->dev, - "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n", - (u32)((union Vmxnet3_GenericDesc *)ctx.sop_txd - - tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr), - le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3])); + if (Plugin_AddFrameToTxRing(adapter, tq->qid, &tq->info, &tq->sg_list, + lastPktHint) != 0) { + tq->stats.tx_ring_full++; + dev_dbg(&adapter->pdev->dev, "tx queue stopped on %s, plugin " + "ring: full\n", adapter->netdev->name); + + /* roll back shadow ring and unmap pkt */ + for (i = shadow_idx; i < tq->shadow_ring.next2fill; i++) { + vmxnet3_unmap_tx_buf(tq->shadow_ring.base + i, + adapter->pdev); + tq->shadow_ring.base[i].skb = NULL; + } + tq->shadow_ring.next2fill = shadow_idx; + tq->sg_list.numElements = 0; + tq->sg_list.totalLength = 0; + + vmxnet3_tq_stop(tq, adapter); + spin_unlock_irqrestore(&tq->tx_lock, flags); + return NETDEV_TX_BUSY; + } + wmb(); + + vmxnet3_tx_data_ring_adv_next2fill(&tq->data_ring); spin_unlock_irqrestore(&tq->tx_lock, flags); - if (le32_to_cpu(tq->shared->txNumDeferred) >= - le32_to_cpu(tq->shared->txThreshold)) { - tq->shared->txNumDeferred = 0; - VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_TXPROD, - tq->tx_ring.next2fill); - } + netdev->trans_start = jiffies; return NETDEV_TX_OK; @@ -1008,331 +931,68 @@ static netdev_tx_t vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - return vmxnet3_tq_xmit(skb, &adapter->tx_queue, adapter, netdev); } -static void -vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, - struct sk_buff *skb, - union Vmxnet3_GenericDesc *gdesc) -{ - if (!gdesc->rcd.cnc && adapter->rxcsum) { - /* typical case: TCP/UDP over IP and both csums are correct */ - if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) == - VMXNET3_RCD_CSUM_OK) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); - BUG_ON(!(gdesc->rcd.v4 || gdesc->rcd.v6)); - BUG_ON(gdesc->rcd.frg); - } else { - if (gdesc->rcd.csum) { - skb->csum = htons(gdesc->rcd.csum); - skb->ip_summed = CHECKSUM_PARTIAL; - } else { - skb->ip_summed = CHECKSUM_NONE; - } - } - } else { - skb->ip_summed = CHECKSUM_NONE; - } -} - - -static void -vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd, - struct vmxnet3_rx_ctx *ctx, struct vmxnet3_adapter *adapter) -{ - rq->stats.drop_err++; - if (!rcd->fcs) - rq->stats.drop_fcs++; - - rq->stats.drop_total++; - - /* - * We do not unmap and chain the rx buffer to the skb. - * We basically pretend this buffer is not used and will be recycled - * by vmxnet3_rq_alloc_rx_buf() - */ - - /* - * ctx->skb may be NULL if this is the first and the only one - * desc for the pkt - */ - if (ctx->skb) - dev_kfree_skb_irq(ctx->skb); - - ctx->skb = NULL; -} - - -static int -vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, - struct vmxnet3_adapter *adapter, int quota) -{ - static u32 rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; - u32 num_rxd = 0; - struct Vmxnet3_RxCompDesc *rcd; - struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; -#ifdef __BIG_ENDIAN_BITFIELD - struct Vmxnet3_RxDesc rxCmdDesc; - struct Vmxnet3_RxCompDesc rxComp; -#endif - vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, - &rxComp); - while (rcd->gen == rq->comp_ring.gen) { - struct vmxnet3_rx_buf_info *rbi; - struct sk_buff *skb; - int num_to_alloc; - struct Vmxnet3_RxDesc *rxd; - u32 idx, ring_idx; - - if (num_rxd >= quota) { - /* we may stop even before we see the EOP desc of - * the current pkt - */ - break; - } - num_rxd++; - - idx = rcd->rxdIdx; - ring_idx = rcd->rqID == rq->qid ? 0 : 1; - vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd, - &rxCmdDesc); - rbi = rq->buf_info[ring_idx] + idx; - - BUG_ON(rxd->addr != rbi->dma_addr || - rxd->len != rbi->len); - - if (unlikely(rcd->eop && rcd->err)) { - vmxnet3_rx_error(rq, rcd, ctx, adapter); - goto rcd_done; - } - - if (rcd->sop) { /* first buf of the pkt */ - BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD || - rcd->rqID != rq->qid); - - BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB); - BUG_ON(ctx->skb != NULL || rbi->skb == NULL); - - if (unlikely(rcd->len == 0)) { - /* Pretend the rx buffer is skipped. */ - BUG_ON(!(rcd->sop && rcd->eop)); - dev_dbg(&adapter->netdev->dev, - "rxRing[%u][%u] 0 length\n", - ring_idx, idx); - goto rcd_done; - } - - ctx->skb = rbi->skb; - rbi->skb = NULL; - - pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len, - PCI_DMA_FROMDEVICE); - - skb_put(ctx->skb, rcd->len); - } else { - BUG_ON(ctx->skb == NULL); - /* non SOP buffer must be type 1 in most cases */ - if (rbi->buf_type == VMXNET3_RX_BUF_PAGE) { - BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY); - - if (rcd->len) { - pci_unmap_page(adapter->pdev, - rbi->dma_addr, rbi->len, - PCI_DMA_FROMDEVICE); - - vmxnet3_append_frag(ctx->skb, rcd, rbi); - rbi->page = NULL; - } - } else { - /* - * The only time a non-SOP buffer is type 0 is - * when it's EOP and error flag is raised, which - * has already been handled. - */ - BUG_ON(true); - } - } - - skb = ctx->skb; - if (rcd->eop) { - skb->len += skb->data_len; - skb->truesize += skb->data_len; - - vmxnet3_rx_csum(adapter, skb, - (union Vmxnet3_GenericDesc *)rcd); - skb->protocol = eth_type_trans(skb, adapter->netdev); - - if (unlikely(adapter->vlan_grp && rcd->ts)) { - vlan_hwaccel_receive_skb(skb, - adapter->vlan_grp, rcd->tci); - } else { - netif_receive_skb(skb); - } - - ctx->skb = NULL; - } - -rcd_done: - /* device may skip some rx descs */ - rq->rx_ring[ring_idx].next2comp = idx; - VMXNET3_INC_RING_IDX_ONLY(rq->rx_ring[ring_idx].next2comp, - rq->rx_ring[ring_idx].size); - - /* refill rx buffers frequently to avoid starving the h/w */ - num_to_alloc = vmxnet3_cmd_ring_desc_avail(rq->rx_ring + - ring_idx); - if (unlikely(num_to_alloc > VMXNET3_RX_ALLOC_THRESHOLD(rq, - ring_idx, adapter))) { - vmxnet3_rq_alloc_rx_buf(rq, ring_idx, num_to_alloc, - adapter); - - /* if needed, update the register */ - if (unlikely(rq->shared->updateRxProd)) { - VMXNET3_WRITE_BAR0_REG(adapter, - rxprod_reg[ring_idx] + rq->qid * 8, - rq->rx_ring[ring_idx].next2fill); - rq->uncommitted[ring_idx] = 0; - } - } - - vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring); - vmxnet3_getRxComp(rcd, - &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); - } - - return num_rxd; -} - +static void vmxnet3_shell_free_buffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset); static void vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) { - u32 i, ring_idx; - struct Vmxnet3_RxDesc *rxd; - - for (ring_idx = 0; ring_idx < 2; ring_idx++) { - for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { -#ifdef __BIG_ENDIAN_BITFIELD - struct Vmxnet3_RxDesc rxDesc; -#endif - vmxnet3_getRxDesc(rxd, - &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc); - - if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD && - rq->buf_info[ring_idx][i].skb) { - pci_unmap_single(adapter->pdev, rxd->addr, - rxd->len, PCI_DMA_FROMDEVICE); - dev_kfree_skb(rq->buf_info[ring_idx][i].skb); - rq->buf_info[ring_idx][i].skb = NULL; - } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY && - rq->buf_info[ring_idx][i].page) { - pci_unmap_page(adapter->pdev, rxd->addr, - rxd->len, PCI_DMA_FROMDEVICE); - put_page(rq->buf_info[ring_idx][i].page); - rq->buf_info[ring_idx][i].page = NULL; - } - } + struct vmxnet3_rx_buf_info *rbi; + u32 i; - rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN; - rq->rx_ring[ring_idx].next2fill = - rq->rx_ring[ring_idx].next2comp = 0; - rq->uncommitted[ring_idx] = 0; + for (i = 0; i < rq->plugin_rq->ringSize * + PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE; i++) { + rbi = rq->buf_info + i; + if (rbi->buf_type != VMXNET3_RX_BUF_NONE) + vmxnet3_shell_free_buffer((struct Shell_RxQueueHandle *) + rq, i); } - - rq->comp_ring.gen = VMXNET3_INIT_GEN; - rq->comp_ring.next2proc = 0; + BUG_ON(rq->avail_skbs != 0); } - -void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, - struct vmxnet3_adapter *adapter) +void +vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, + struct vmxnet3_adapter *adapter) { - int i; - int j; - - /* all rx buffers must have already been freed */ - for (i = 0; i < 2; i++) { - if (rq->buf_info[i]) { - for (j = 0; j < rq->rx_ring[i].size; j++) - BUG_ON(rq->buf_info[i][j].page != NULL); - } + if (rq->plugin_rq->ringBaseVA) { + pci_free_consistent(adapter->pdev, rq->plugin_rq->ringLength, + rq->plugin_rq->ringBaseVA, + rq->plugin_rq->ringBasePA); + rq->plugin_rq->ringBaseVA = NULL; + rq->plugin_rq->ringBasePA = 0; } - - kfree(rq->buf_info[0]); - - for (i = 0; i < 2; i++) { - if (rq->rx_ring[i].base) { - pci_free_consistent(adapter->pdev, rq->rx_ring[i].size - * sizeof(struct Vmxnet3_RxDesc), - rq->rx_ring[i].base, - rq->rx_ring[i].basePA); - rq->rx_ring[i].base = NULL; - } - rq->buf_info[i] = NULL; - } - - if (rq->comp_ring.base) { - pci_free_consistent(adapter->pdev, rq->comp_ring.size * - sizeof(struct Vmxnet3_RxCompDesc), - rq->comp_ring.base, rq->comp_ring.basePA); - rq->comp_ring.base = NULL; + if (rq->buf_info) { + vfree(rq->buf_info); + rq->buf_info = NULL; } } - static int vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) { + struct vmxnet3_rx_buf_info *rbi; int i; - /* initialize buf_info */ - for (i = 0; i < rq->rx_ring[0].size; i++) { - - /* 1st buf for a pkt is skbuff */ - if (i % adapter->rx_buf_per_pkt == 0) { - rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB; - rq->buf_info[0][i].len = adapter->skb_buf_size; - } else { /* subsequent bufs for a pkt is frag */ - rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE; - rq->buf_info[0][i].len = PAGE_SIZE; - } - } - for (i = 0; i < rq->rx_ring[1].size; i++) { - rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE; - rq->buf_info[1][i].len = PAGE_SIZE; - } - - /* reset internal state and allocate buffers for both rings */ - for (i = 0; i < 2; i++) { - rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0; - rq->uncommitted[i] = 0; + BUG_ON(adapter->rx_buf_per_pkt <= 0 || + rq->plugin_rq->ringSize % adapter->rx_buf_per_pkt != 0); - memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size * - sizeof(struct Vmxnet3_RxDesc)); - rq->rx_ring[i].gen = VMXNET3_INIT_GEN; - } - if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, - adapter) == 0) { - /* at least has 1 rx buffer for the 1st ring */ - return -ENOMEM; + /* initialize buf_info */ + for (i = 0; i < rq->plugin_rq->ringSize * + PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE; i++) { + rbi = rq->buf_info + i; + rbi->buf_type = VMXNET3_RX_BUF_NONE; + rbi->skb = NULL; + rbi->page = NULL; } - vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter); - - /* reset the comp ring */ - rq->comp_ring.next2proc = 0; - memset(rq->comp_ring.base, 0, rq->comp_ring.size * - sizeof(struct Vmxnet3_RxCompDesc)); - rq->comp_ring.gen = VMXNET3_INIT_GEN; - /* reset rxctx */ - rq->rx_ctx.skb = NULL; + rq->avail_skbs = 0; /* stats are not reset */ return 0; @@ -1342,41 +1002,45 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, static int vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) { - int i; - size_t sz; - struct vmxnet3_rx_buf_info *bi; + u32 ring_length; - for (i = 0; i < 2; i++) { - sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc); - rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz, - &rq->rx_ring[i].basePA); - if (!rq->rx_ring[i].base) { - printk(KERN_ERR "%s: failed to allocate rx ring %d\n", - adapter->netdev->name, i); - goto err; - } - } + BUG_ON(rq->plugin_rq->ringSize == 0); + BUG_ON((rq->plugin_rq->ringSize & VMXNET3_RING_SIZE_MASK) != 0); + BUG_ON(rq->plugin_rq->ringBaseVA || rq->buf_info); + BUG_ON(rq->plugin_rq->ringSize % adapter->rx_buf_per_pkt != 0); - sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc); - rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz, - &rq->comp_ring.basePA); - if (!rq->comp_ring.base) { - printk(KERN_ERR "%s: failed to allocate rx comp ring\n", + /* + * We don't know the underlying hardware's descriptor size, + * thus use the maximum allowed descriptor size. + */ + ring_length = rq->plugin_rq->ringSize * + PLUGIN_SHADED_AREA_RX_MAX_DESC_SIZE_BYTES; + /* Add room for potential alignment */ + ring_length += PLUGIN_SHADED_AREA_RX_ALLOCATION_ALIGN - 1; + /* + * Again, we don't know the underlying hardware's mode of + * operation, so let's give room for multiple rings. + */ + rq->plugin_rq->ringLength = PLUGIN_SHADED_AREA_RX_ALLOCATION_MULTIPLE * + ring_length + PLUGIN_SHADED_AREA_RX_EXTRA_ALLOCATION; + rq->plugin_rq->ringBaseVA = pci_alloc_consistent(adapter->pdev, + rq->plugin_rq->ringLength, + (dma_addr_t *)&rq->plugin_rq->ringBasePA); + if (!rq->plugin_rq->ringBaseVA) { + printk(KERN_ERR "%s: failed to allocate rx ring\n", adapter->netdev->name); goto err; } - sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size + - rq->rx_ring[1].size); - bi = kzalloc(sz, GFP_KERNEL); - if (!bi) { + rq->buf_info = vmalloc(rq->plugin_rq->ringSize * + PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE * + sizeof(struct vmxnet3_rx_buf_info)); + if (!rq->buf_info) { printk(KERN_ERR "%s: failed to allocate rx bufinfo\n", adapter->netdev->name); goto err; } - rq->buf_info[0] = bi; - rq->buf_info[1] = bi + rq->rx_ring[0].size; return 0; @@ -1392,8 +1056,11 @@ vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget) if (unlikely(adapter->shared->ecr)) vmxnet3_process_events(adapter); - vmxnet3_tq_tx_complete(&adapter->tx_queue, adapter); - return vmxnet3_rq_rx_complete(&adapter->rx_queue, adapter, budget); + Plugin_CheckTxRing(adapter, 0); + adapter->rx_queue.rxd_done = 0; + if (Plugin_CheckRxRing(adapter, 0, budget)) + Plugin_AddBuffersToRxRing(adapter, 0); + return adapter->rx_queue.rxd_done; } @@ -1495,8 +1162,8 @@ vmxnet3_request_irqs(struct vmxnet3_adapter *adapter) adapter->intr.mod_levels[i] = UPT1_IML_ADAPTIVE; /* next setup intr index for all intr sources */ - adapter->tx_queue.comp_ring.intr_idx = 0; - adapter->rx_queue.comp_ring.intr_idx = 0; + adapter->tx_queue.intr_idx = 0; + adapter->rx_queue.intr_idx = 0; adapter->intr.event_intr_idx = 0; printk(KERN_INFO "%s: intr type %u, mode %u, %u vectors " @@ -1747,7 +1414,10 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) struct Vmxnet3_DSDevRead *devRead = &shared->devRead; struct Vmxnet3_TxQueueConf *tqc; struct Vmxnet3_RxQueueConf *rqc; - int i; + struct vmxnet3_tx_queue *tq; + struct vmxnet3_rx_queue *rq; + dma_addr_t pa; + int i, ring1_size; memset(shared, 0, sizeof(*shared)); @@ -1785,37 +1455,52 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) sizeof(struct Vmxnet3_TxQueueDesc) + sizeof(struct Vmxnet3_RxQueueDesc)); - /* tx queue settings */ - BUG_ON(adapter->tx_queue.tx_ring.base == NULL); - devRead->misc.numTxQueues = 1; tqc = &adapter->tqd_start->conf; - tqc->txRingBasePA = cpu_to_le64(adapter->tx_queue.tx_ring.basePA); - tqc->dataRingBasePA = cpu_to_le64(adapter->tx_queue.data_ring.basePA); - tqc->compRingBasePA = cpu_to_le64(adapter->tx_queue.comp_ring.basePA); - tqc->ddPA = cpu_to_le64(virt_to_phys( - adapter->tx_queue.buf_info)); - tqc->txRingSize = cpu_to_le32(adapter->tx_queue.tx_ring.size); - tqc->dataRingSize = cpu_to_le32(adapter->tx_queue.data_ring.size); - tqc->compRingSize = cpu_to_le32(adapter->tx_queue.comp_ring.size); - tqc->ddLen = cpu_to_le32(sizeof(struct vmxnet3_tx_buf_info) * - tqc->txRingSize); - tqc->intrIdx = adapter->tx_queue.comp_ring.intr_idx; + tq = &adapter->tx_queue; + BUG_ON(tq->plugin_tq->ringBaseVA == NULL); + BUG_ON(tq->plugin_tq->ringBasePA == 0); + pa = tq->plugin_tq->ringBasePA; + tqc->txRingBasePA = ALIGN(pa, VMXNET3_RING_BA_ALIGN); + tqc->dataRingBasePA = tq->data_ring.basePA; + pa += tq->plugin_tq->ringSize * sizeof(struct Vmxnet3_TxDesc); + tqc->compRingBasePA = ALIGN(pa, VMXNET3_RING_BA_ALIGN); + tqc->ddPA = virt_to_phys(tq->shadow_ring.base); + tqc->txRingSize = tq->plugin_tq->ringSize; + tqc->dataRingSize = tq->data_ring.size; + tqc->compRingSize = tq->plugin_tq->ringSize; + tqc->ddLen = sizeof(struct vmxnet3_tx_buf_info) * + tq->shadow_ring.size; + tqc->intrIdx = tq->intr_idx; /* rx queue settings */ + if (adapter->lro || + adapter->netdev->mtu > SHELL_SMALL_RECV_BUFFER_SIZE) { + ring1_size = adapter->rx_queue.plugin_rq->ringSize; + } else { + /* same as in plugin and windows shell */ + ring1_size = 32; + } + devRead->misc.numRxQueues = 1; + rq = &adapter->rx_queue; + + BUG_ON(rq->plugin_rq->ringBaseVA == NULL); + BUG_ON(rq->plugin_rq->ringBasePA == 0); rqc = &adapter->rqd_start->conf; - rqc->rxRingBasePA[0] = cpu_to_le64(adapter->rx_queue.rx_ring[0].basePA); - rqc->rxRingBasePA[1] = cpu_to_le64(adapter->rx_queue.rx_ring[1].basePA); - rqc->compRingBasePA = cpu_to_le64(adapter->rx_queue.comp_ring.basePA); - rqc->ddPA = cpu_to_le64(virt_to_phys( - adapter->rx_queue.buf_info)); - rqc->rxRingSize[0] = cpu_to_le32(adapter->rx_queue.rx_ring[0].size); - rqc->rxRingSize[1] = cpu_to_le32(adapter->rx_queue.rx_ring[1].size); - rqc->compRingSize = cpu_to_le32(adapter->rx_queue.comp_ring.size); - rqc->ddLen = cpu_to_le32(sizeof(struct vmxnet3_rx_buf_info) * - (rqc->rxRingSize[0] + rqc->rxRingSize[1])); - rqc->intrIdx = adapter->rx_queue.comp_ring.intr_idx; + pa = rq->plugin_rq->ringBasePA; + rqc->rxRingBasePA[0] = ALIGN(pa, VMXNET3_RING_BA_ALIGN); + pa += rq->plugin_rq->ringSize * sizeof(struct Vmxnet3_RxDesc); + rqc->rxRingBasePA[1] = ALIGN(pa, VMXNET3_RING_BA_ALIGN); + pa += ring1_size * sizeof(struct Vmxnet3_RxDesc); + rqc->compRingBasePA = ALIGN(pa, VMXNET3_RING_BA_ALIGN); + rqc->ddPA = virt_to_phys(rq->buf_info); + rqc->rxRingSize[0] = rq->plugin_rq->ringSize; + rqc->rxRingSize[1] = ring1_size; + rqc->compRingSize = rq->plugin_rq->ringSize + ring1_size; + rqc->ddLen = sizeof(struct vmxnet3_rx_buf_info) * + (rq->plugin_rq->ringSize + ring1_size); + rqc->intrIdx = rq->intr_idx; /* intr settings */ devRead->intrConf.autoMask = adapter->intr.mask_mode == @@ -1832,55 +1517,214 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) /* the rest are already zeroed */ } +/* + * This function asks the Hypervisor to load the HW plugin inside the guest. + * + * First we look for an available region to load the code, then we + * populate the NPA_PluginConf before issuing the CMD_LOAD_PLUGIN. + * After this, we set the MMIO address, copy the init opaque data and + * retrieve the entry poinf of the plugin. + */ -int -vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) +static NPA_PluginMainFunc * +vmxnet3_load_plugin(struct vmxnet3_adapter *adapter) +{ + struct NPA_PluginConf *plugin_conf = adapter->plugin_conf; + u8 *plugin_code_region; + int ret; + int i; + + /* look for an available code region */ + spin_lock(&vmxnet3_plugin_code_lock); + for (i = 0; i < NPA_MAX_PLUGINS_PER_VM; i++) + if (!vmxnet3_plugin_code_used[i]) + break; + if (i == NPA_MAX_PLUGINS_PER_VM) { + spin_unlock(&vmxnet3_plugin_code_lock); + printk(KERN_ERR "Failed to allocated code section on %s\n", + adapter->netdev->name); + return NULL; + } + vmxnet3_plugin_code_used[i] = true; + spin_unlock(&vmxnet3_plugin_code_lock); + adapter->plugin_region_idx = i; + plugin_code_region = &vmxnet3_plugin_code_mem[NPA_PLUGIN_NUMPAGES * + PAGE_SIZE * i]; + + /* construct the plugin_conf */ + memset(plugin_conf, 0, sizeof(*plugin_conf)); + BUG_ON(((uintptr_t)plugin_code_region & ~PAGE_MASK)); + plugin_conf->pluginPages.vaddr = (uintptr_t)plugin_code_region; + plugin_conf->pluginPages.numPages = NPA_PLUGIN_NUMPAGES; + for (i = 0; i < NPA_PLUGIN_NUMPAGES; i++) { + plugin_conf->pluginPages.pages[i] = + page_to_pfn(vmalloc_to_page(plugin_code_region + + i * PAGE_SIZE)); + } + + plugin_conf->memioPages.startPPN = ALIGN(adapter->plugin_memio_pa, + PAGE_SIZE) / PAGE_SIZE; + plugin_conf->memioPages.numPages = NPA_MEMIO_NUMPAGES; + plugin_conf->sharedPages.startPPN = ALIGN(adapter->plugin_shared_pa, + PAGE_SIZE) / PAGE_SIZE; + plugin_conf->sharedPages.numPages = NPA_SHARED_NUMPAGES; + + adapter->shared->devRead.pluginConfDesc.confVer = 1; + adapter->shared->devRead.pluginConfDesc.confLen = sizeof(*plugin_conf); + adapter->shared->devRead.pluginConfDesc.confPA = + virt_to_phys(plugin_conf); + + dev_dbg(&adapter->pdev->dev, "%s: pluginConf: %d 0x%llx 0x%llx" + " 0x%llx\n", adapter->netdev->name, + adapter->shared->devRead.pluginConfDesc.confLen, + adapter->shared->devRead.pluginConfDesc.confPA, + plugin_conf->pluginPages.vaddr, + plugin_conf->pluginPages.pages[0]); + + /* issue command to load the plugin */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_LOAD_PLUGIN); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + if (ret == VMXNET3_NPA_CMD_SUCCESS) { + adapter->plugin.memioAddr = + (void *)ALIGN((uintptr_t)adapter->plugin_memio, + PAGE_SIZE); + memcpy(adapter->plugin.deviceInfo, plugin_conf->deviceInfo, + sizeof(adapter->plugin.deviceInfo)); + return (NPA_PluginMainFunc *)(uintptr_t)plugin_conf->entryVA; + } else { + spin_lock(&vmxnet3_plugin_code_lock); + vmxnet3_plugin_code_used[adapter->plugin_region_idx] = false; + spin_unlock(&vmxnet3_plugin_code_lock); + return NULL; + } +} + + + int +vmxnet3_activate_dev(struct vmxnet3_adapter *adapter, bool load_plugin) { int err; u32 ret; dev_dbg(&adapter->netdev->dev, "%s: skb_buf_size %d, rx_buf_per_pkt %d, ring sizes" - " %u %u %u\n", adapter->netdev->name, adapter->skb_buf_size, - adapter->rx_buf_per_pkt, adapter->tx_queue.tx_ring.size, - adapter->rx_queue.rx_ring[0].size, - adapter->rx_queue.rx_ring[1].size); + " %u %u %u\n", adapter->netdev->name, + adapter->skb_buf_size, adapter->rx_buf_per_pkt, + adapter->tx_queue.plugin_tq->ringSize, + adapter->tx_queue.shadow_ring.size, + adapter->rx_queue.plugin_rq->ringSize); vmxnet3_tq_init(&adapter->tx_queue, adapter); err = vmxnet3_rq_init(&adapter->rx_queue, adapter); if (err) { printk(KERN_ERR "Failed to init rx queue for %s: error %d\n", - adapter->netdev->name, err); + adapter->netdev->name, err); goto rq_err; } err = vmxnet3_request_irqs(adapter); if (err) { printk(KERN_ERR "Failed to setup irq for %s: error %d\n", - adapter->netdev->name, err); + adapter->netdev->name, err); goto irq_err; } vmxnet3_setup_driver_shared(adapter); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO( - adapter->shared_pa)); + adapter->shared_pa)); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI( - adapter->shared_pa)); - VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, - VMXNET3_CMD_ACTIVATE_DEV); - ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); - - if (ret != 0) { - printk(KERN_ERR "Failed to activate dev %s: error %u\n", - adapter->netdev->name, ret); - err = -EINVAL; - goto activate_err; + adapter->shared_pa)); + if (!load_plugin) { + NPA_PluginMain(&adapter->plugin_api); + adapter->plugin.memioAddr = adapter->hw_addr0; + memset(adapter->plugin.deviceInfo, 0, + sizeof(adapter->plugin.deviceInfo)); + adapter->plugin.shared = NULL; + adapter->plugin.sharedLen = 0; + printk(KERN_ERR "Using s/w api for %s\n", + adapter->netdev->name); + } else { + NPA_PluginMainFunc *plugin_main; + plugin_main = vmxnet3_load_plugin(adapter); + /* plugin memioAddr and deviceInfo are set in load_plugin */ + adapter->plugin.shared = + (void *)ALIGN((uintptr_t)adapter->plugin_shared, + PAGE_SIZE); + adapter->plugin.sharedLen = NPA_SHARED_NUMPAGES * PAGE_SIZE; + if (plugin_main == NULL) { + printk(KERN_ERR "Failed to load plugin for %s\n", + adapter->netdev->name); + err = -EINVAL; + goto load_plugin_err; + } + printk(KERN_ERR "Using h/w api %p for %s\n", plugin_main, + adapter->netdev->name); + plugin_main(&adapter->plugin_api); + } + + dev_dbg(&adapter->pdev->dev, + "%s: Plugin API:\n" + "swInit: %p\n" + "reinitTxRing: %p\n" + "reinitRxRing: %p\n" + "enableInterrupt: %p\n" + "disableInterrupt: %p\n" + "addFrameToTxRing: %p\n" + "checkTxRing: %p\n" + "checkRxRing: %p\n" + "addBuffersToRxRing: %p\n", + adapter->netdev->name, + adapter->plugin_api.swInit, + adapter->plugin_api.reinitTxRing, + adapter->plugin_api.reinitRxRing, + adapter->plugin_api.enableInterrupt, + adapter->plugin_api.disableInterrupt, + adapter->plugin_api.addFrameToTxRing, + adapter->plugin_api.checkTxRing, + adapter->plugin_api.checkRxRing, + adapter->plugin_api.addBuffersToRxRing); + + BUG_ON(!adapter->plugin_api.swInit); + BUG_ON(!adapter->plugin_api.reinitTxRing); + BUG_ON(!adapter->plugin_api.reinitRxRing); + BUG_ON(!adapter->plugin_api.enableInterrupt); + BUG_ON(!adapter->plugin_api.disableInterrupt); + BUG_ON(!adapter->plugin_api.addFrameToTxRing); + BUG_ON(!adapter->plugin_api.checkTxRing); + BUG_ON(!adapter->plugin_api.checkRxRing); + BUG_ON(!adapter->plugin_api.addBuffersToRxRing); + + Plugin_SwInit(adapter); + + Plugin_ReinitTxRing(adapter, 0); + Plugin_ReinitRxRing(adapter, 0); + + if (!load_plugin) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_ACTIVATE_DEV); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + if (ret != 0) { + printk(KERN_ERR "Failed to activate dev %s: error %u\n", + adapter->netdev->name, ret); + err = -EINVAL; + goto activate_err; + } + } else { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_ACTIVATE_VF); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + if (ret != VMXNET3_NPA_CMD_SUCCESS) { + printk(KERN_ERR "Failed to activate vf %s: error %u\n", + adapter->netdev->name, ret); + err = -EINVAL; + goto activate_err; + } } - VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD, - adapter->rx_queue.rx_ring[0].next2fill); - VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD2, - adapter->rx_queue.rx_ring[1].next2fill); + + adapter->passthru = load_plugin; + Plugin_AddBuffersToRxRing(adapter, 0); /* Apply the rx filter settins last. */ vmxnet3_set_mc(adapter->netdev); @@ -1897,6 +1741,12 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) return 0; activate_err: + if (load_plugin) { + spin_lock(&vmxnet3_plugin_code_lock); + vmxnet3_plugin_code_used[adapter->plugin_region_idx] = false; + spin_unlock(&vmxnet3_plugin_code_lock); + } +load_plugin_err: VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0); vmxnet3_free_irqs(adapter); @@ -1914,18 +1764,41 @@ vmxnet3_reset_dev(struct vmxnet3_adapter *adapter) VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV); } +/* + * soft_quiesce indicates to quiesce the software (emulated) + * device. It doesn't completely stop the vmxnet3 backend. It has to + * be used when switching to passthrough. + */ int -vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter) +vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter, bool soft_quiesce) { if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)) return 0; + if (soft_quiesce) { + u32 result; - - VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, - VMXNET3_CMD_QUIESCE_DEV); + BUG_ON(adapter->passthru); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_STOP_EMULATION); + result = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + if (result != VMXNET3_NPA_CMD_SUCCESS) { + printk(KERN_INFO "%s: failed to stop emulation 0x%x\n", + adapter->netdev->name, result); + clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); + return 1; + } + } else { + if (adapter->passthru) { + spin_lock(&vmxnet3_plugin_code_lock); + vmxnet3_plugin_code_used[adapter->plugin_region_idx] = + false; + spin_unlock(&vmxnet3_plugin_code_lock); + } + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_QUIESCE_DEV); + } vmxnet3_disable_all_intrs(adapter); - napi_disable(&adapter->napi); netif_tx_disable(adapter->netdev); adapter->link_speed = 0; @@ -2056,54 +1929,63 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) { size_t sz; - if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - - VMXNET3_MAX_ETH_HDR_SIZE) { - adapter->skb_buf_size = adapter->netdev->mtu + - VMXNET3_MAX_ETH_HDR_SIZE; + if (adapter->netdev->mtu <= SHELL_SMALL_RECV_BUFFER_SIZE) { + if (!adapter->lro) { + adapter->skb_buf_size = adapter->netdev->mtu + + VMXNET3_MAX_ETH_HDR_SIZE; + } else { + adapter->skb_buf_size = SHELL_SMALL_RECV_BUFFER_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + } if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; adapter->rx_buf_per_pkt = 1; } else { - adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; - sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + - VMXNET3_MAX_ETH_HDR_SIZE; - adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + adapter->skb_buf_size = SHELL_SMALL_RECV_BUFFER_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + sz = adapter->netdev->mtu - adapter->skb_buf_size; + adapter->rx_buf_per_pkt = + 1 + (sz + SHELL_LARGE_RECV_BUFFER_SIZE - 1) / + SHELL_LARGE_RECV_BUFFER_SIZE; } /* - * for simplicity, force the ring0 size to be a multiple of + * for simplicity, force the ring size to be a multiple of * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN */ sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN; - adapter->rx_queue.rx_ring[0].size = (adapter->rx_queue.rx_ring[0].size + - sz - 1) / sz * sz; - adapter->rx_queue.rx_ring[0].size = min_t(u32, - adapter->rx_queue.rx_ring[0].size, - VMXNET3_RX_RING_MAX_SIZE / sz * sz); + adapter->rx_queue.plugin_rq->ringSize = + (adapter->rx_queue.plugin_rq->ringSize + sz - 1) + / sz * sz; + adapter->rx_queue.plugin_rq->ringSize = min_t(u32, + adapter->rx_queue.plugin_rq->ringSize, + VMXNET3_RX_RING_MAX_SIZE / sz * sz); } int vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, - u32 rx_ring_size, u32 rx_ring2_size) + u32 rx_ring_size) { - int err; + int err = 0; - adapter->tx_queue.tx_ring.size = tx_ring_size; + adapter->tx_queue.adapter = adapter; + adapter->tx_queue.plugin_tq = adapter->plugin.txQueues; + adapter->tx_queue.plugin_tq->ringSize = tx_ring_size; adapter->tx_queue.data_ring.size = tx_ring_size; - adapter->tx_queue.comp_ring.size = tx_ring_size; adapter->tx_queue.shared = &adapter->tqd_start->ctrl; adapter->tx_queue.stopped = true; + adapter->tx_queue.qid = 0; err = vmxnet3_tq_create(&adapter->tx_queue, adapter); if (err) return err; - adapter->rx_queue.rx_ring[0].size = rx_ring_size; - adapter->rx_queue.rx_ring[1].size = rx_ring2_size; + adapter->rx_queue.adapter = adapter; + adapter->rx_queue.plugin_rq = &adapter->plugin.rxQueues[0]; + + adapter->rx_queue.plugin_rq->ringSize = rx_ring_size; vmxnet3_adjust_rx_ring_size(adapter); - adapter->rx_queue.comp_ring.size = adapter->rx_queue.rx_ring[0].size + - adapter->rx_queue.rx_ring[1].size; adapter->rx_queue.qid = 0; adapter->rx_queue.qid2 = 1; adapter->rx_queue.shared = &adapter->rqd_start->ctrl; @@ -2114,23 +1996,273 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, return err; } + +/* + * Vmxnet3 Shell APIs + */ + +static void +vmxnet3_shell_log(size_t nargs, const char *str, ...) +{ + va_list va; + + va_start(va, str); + printk(str, va); + va_end(va); +} + + +static void +vmxnet3_shell_complete_send(struct Shell_TxQueueHandle *handle, u32 numPkts) +{ + struct vmxnet3_tx_queue *tq = (struct vmxnet3_tx_queue *)handle; + struct vmxnet3_adapter *adapter = tq->adapter; + int i; + + /* do in-order completion only */ + for (i = 0; i < numPkts; i++) { + vmxnet3_unmap_pkt(tq, adapter->pdev, adapter); + vmxnet3_tx_data_ring_adv_next2comp(&tq->data_ring); + } + + spin_lock(&tq->tx_lock); + /* + * XXX: PR 531329, we should wake the queue based on plugin + * ring and not shadow ring + */ + if (unlikely(vmxnet3_tq_stopped(tq, adapter) && + (vmxnet3_tx_shadow_ring_desc_avail(&tq->shadow_ring) > + VMXNET3_WAKE_QUEUE_SHADOW_THRESHOLD(tq) && + vmxnet3_tx_data_ring_desc_avail(&tq->data_ring) > + VMXNET3_WAKE_QUEUE_DATA_THRESHOLD(tq)) && + netif_carrier_ok(adapter->netdev))) { + vmxnet3_tq_wake(tq, adapter); + } + spin_unlock(&tq->tx_lock); +} + + +static u64 +vmxnet3_shell_alloc_small_buffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset) +{ + struct vmxnet3_rx_queue *rq = (struct vmxnet3_rx_queue *)handle; + struct vmxnet3_adapter *adapter = rq->adapter; + struct vmxnet3_rx_buf_info *rbi = rq->buf_info + ringOffset; + + BUG_ON(ringOffset >= rq->plugin_rq->ringSize * + PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE); + + if (rbi->buf_type != VMXNET3_RX_BUF_NONE) { + dev_dbg(&adapter->pdev->dev, "%s: alloc_small_buffer:[%u] %u\n", + adapter->netdev->name, ringOffset, rbi->buf_type); + rq->stats.rx_buf_cookie_error++; + return 0; + } + + rbi->len = adapter->skb_buf_size; + rbi->skb = dev_alloc_skb(rbi->len + NET_IP_ALIGN); + if (unlikely(rbi->skb == NULL)) { + rq->stats.rx_buf_alloc_failure++; + return 0; + } + skb_reserve(rbi->skb, NET_IP_ALIGN); + + rbi->skb->dev = adapter->netdev; + rbi->dma_addr = pci_map_single(adapter->pdev, rbi->skb->data, rbi->len, + PCI_DMA_FROMDEVICE); + rbi->buf_type = VMXNET3_RX_BUF_SKB; + + rq->avail_skbs++; + return rbi->dma_addr; +} + + +static u64 +vmxnet3_shell_alloc_large_buffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset) +{ + struct vmxnet3_rx_queue *rq = (struct vmxnet3_rx_queue *)handle; + struct vmxnet3_adapter *adapter = rq->adapter; + struct vmxnet3_rx_buf_info *rbi = rq->buf_info + ringOffset; + + + BUG_ON(ringOffset >= rq->plugin_rq->ringSize * + PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE); + + if (rbi->buf_type != VMXNET3_RX_BUF_NONE) { + dev_dbg(&adapter->pdev->dev, "%s:alloc_large_buffer: [%u] %u\n", + adapter->netdev->name, ringOffset, rbi->buf_type); + rq->stats.rx_buf_cookie_error++; + return 0; + } + + BUILD_BUG_ON(SHELL_LARGE_RECV_BUFFER_SIZE != PAGE_SIZE); + rbi->len = SHELL_LARGE_RECV_BUFFER_SIZE; + rbi->page = alloc_page(GFP_ATOMIC); + + if (unlikely(rbi->page == NULL)) { + rq->stats.rx_buf_alloc_failure++; + return 0; + } + rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + rbi->buf_type = VMXNET3_RX_BUF_PAGE; + + return rbi->dma_addr; +} + + + static void +vmxnet3_shell_free_buffer(struct Shell_RxQueueHandle *handle, + u32 ringOffset) +{ + struct vmxnet3_rx_queue *rq = (struct vmxnet3_rx_queue *)handle; + struct vmxnet3_adapter *adapter = rq->adapter; + struct vmxnet3_rx_buf_info *rbi = rq->buf_info + ringOffset; + + BUG_ON(ringOffset >= rq->plugin_rq->ringSize * + PLUGIN_SHARED_AREA_RX_ALLOCATION_MULTIPLE); + BUG_ON(rbi->buf_type == VMXNET3_RX_BUF_NONE); + + if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { + pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(rbi->skb); + rq->avail_skbs--; + rbi->skb = NULL; + } else if (rbi->buf_type == VMXNET3_RX_BUF_PAGE) { + pci_unmap_page(adapter->pdev, rbi->dma_addr, rbi->len, + PCI_DMA_FROMDEVICE); + put_page(rbi->page); + rbi->page = NULL; + } + rbi->buf_type = VMXNET3_RX_BUF_NONE; +} + + +static u32 +vmxnet3_shell_indicate_recv(struct Shell_RxQueueHandle *handle, + struct Shell_RecvFrame *frame) +{ + struct vmxnet3_rx_queue *rq = (struct vmxnet3_rx_queue *)handle; + struct vmxnet3_adapter *adapter = rq->adapter; + struct vmxnet3_rx_buf_info *rbi; + struct sk_buff *skb; + int i; + + rbi = rq->buf_info + frame->sg[0].ringOffset; + BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB); + skb = rbi->skb; + BUG_ON(frame->sgLength == 0); + rq->avail_skbs--; + rbi->skb = NULL; + pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len, + PCI_DMA_FROMDEVICE); + + skb_reserve(skb, 0); + skb_put(skb, frame->sg[0].length); + rbi->buf_type = VMXNET3_RX_BUF_NONE; + + for (i = 1; i < frame->sgLength; i++) { + rbi = rq->buf_info + frame->sg[i].ringOffset; + BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE); + + pci_unmap_page(rq->adapter->pdev, rbi->dma_addr, + rbi->len, PCI_DMA_FROMDEVICE); + vmxnet3_append_frag(skb, frame->sg + i, rbi); + rbi->page = NULL; + rbi->buf_type = VMXNET3_RX_BUF_NONE; + } + + skb->len += skb->data_len; + skb->truesize += skb->data_len; + + skb->ip_summed = CHECKSUM_NONE; + if (adapter->rxcsum && (frame->ipv4 || frame->ipv6)) { + if (frame->ipXsum != SHELL_XSUM_CORRECT) + skb->ip_summed = CHECKSUM_NONE; + else if ((frame->tcp && + frame->tcpXsum != SHELL_XSUM_CORRECT) || + (frame->udp && + frame->udpXsum != SHELL_XSUM_CORRECT)) + skb->ip_summed = CHECKSUM_NONE; + else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + + skb->protocol = eth_type_trans(skb, adapter->netdev); + + if (unlikely(adapter->vlan_grp && frame->vlan)) { + vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, + frame->vlanTag); + } else { + netif_receive_skb(skb); + } + + rq->rxd_done++; + adapter->netdev->last_rx = jiffies; + + return 0; +} + + + + static int vmxnet3_open(struct net_device *netdev) { struct vmxnet3_adapter *adapter; int err; + struct Plugin_State *plugin; adapter = netdev_priv(netdev); - + plugin = &adapter->plugin; + + plugin->size = sizeof(*plugin); + plugin->majorVersion = 1; + plugin->minorVersion = 0; + plugin->offsetToPrivateSpace = offsetof(struct Plugin_State, + privateSpace); + + plugin->shellApi.allocSmallBuffer = vmxnet3_shell_alloc_small_buffer; + plugin->shellApi.allocLargeBuffer = vmxnet3_shell_alloc_large_buffer; + plugin->shellApi.freeBuffer = vmxnet3_shell_free_buffer; + plugin->shellApi.completeSend = vmxnet3_shell_complete_send; + plugin->shellApi.indicateRecv = vmxnet3_shell_indicate_recv; + plugin->shellApi.log = vmxnet3_shell_log; + + plugin->mtu = adapter->netdev->mtu; + + plugin->numTxQueues = 1; + plugin->txQueues->handle = (struct Shell_TxQueueHandle *) + &adapter->tx_queue; spin_lock_init(&adapter->tx_queue.tx_lock); + plugin->numRxQueues = 1; + plugin->rxQueues->handle = (struct Shell_RxQueueHandle *) + &adapter->rx_queue; + + if (adapter->lro) + plugin->features = PLUGIN_FEATURES_LRO; + err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE, - VMXNET3_DEF_RX_RING_SIZE, VMXNET3_DEF_RX_RING_SIZE); if (err) goto queue_err; - - err = vmxnet3_activate_dev(adapter); + dev_dbg(&adapter->pdev->dev, "rxQueues[0] %p %llu %u %u\n", + plugin->rxQueues[0].ringBaseVA, + plugin->rxQueues[0].ringBasePA, + plugin->rxQueues[0].ringLength, + plugin->rxQueues[0].ringSize); + dev_dbg(&adapter->pdev->dev, "txQueues[0] %p %llu %u %u\n", + plugin->txQueues[0].ringBaseVA, + plugin->txQueues[0].ringBasePA, + plugin->txQueues[0].ringLength, + plugin->txQueues[0].ringSize); + + err = vmxnet3_activate_dev(adapter, false); if (err) goto activate_err; @@ -2156,7 +2288,7 @@ vmxnet3_close(struct net_device *netdev) while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) msleep(1); - vmxnet3_quiesce_dev(adapter); + vmxnet3_quiesce_dev(adapter, false); vmxnet3_rq_destroy(&adapter->rx_queue, adapter); vmxnet3_tq_destroy(&adapter->tx_queue, adapter); @@ -2205,15 +2337,12 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) msleep(1); if (netif_running(netdev)) { - vmxnet3_quiesce_dev(adapter); + vmxnet3_quiesce_dev(adapter, false); vmxnet3_reset_dev(adapter); /* we need to re-create the rx queue based on the new mtu */ vmxnet3_rq_destroy(&adapter->rx_queue, adapter); vmxnet3_adjust_rx_ring_size(adapter); - adapter->rx_queue.comp_ring.size = - adapter->rx_queue.rx_ring[0].size + - adapter->rx_queue.rx_ring[1].size; err = vmxnet3_rq_create(&adapter->rx_queue, adapter); if (err) { printk(KERN_ERR "%s: failed to re-create rx queue," @@ -2221,7 +2350,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) goto out; } - err = vmxnet3_activate_dev(adapter); + err = vmxnet3_activate_dev(adapter, false); if (err) { printk(KERN_ERR "%s: failed to re-activate, error %d. " "Closing it\n", netdev->name, err); @@ -2249,7 +2378,6 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER | NETIF_F_TSO | - NETIF_F_TSO6 | NETIF_F_LRO; printk(KERN_INFO "features: sg csum vlan jf tso tsoIPv6 lro"); @@ -2258,6 +2386,11 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) adapter->jumbo_frame = true; adapter->lro = true; +#ifdef NETIF_F_TSO6 + netdev->features |= NETIF_F_TSO6; + printk(KERN_INFO " tsoIPv6"); +#endif + if (dma64) { netdev->features |= NETIF_F_HIGHDMA; printk(" highDMA"); @@ -2294,6 +2427,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) adapter->intr.type = cfg & 0x3; adapter->intr.mask_mode = (cfg >> 2) & 0x3; +#ifdef CONFIG_PCI_MSI if (adapter->intr.type == VMXNET3_IT_AUTO) { int err; @@ -2316,6 +2450,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) } } +#endif adapter->intr.type = VMXNET3_IT_INTX; /* INT-X related setting */ @@ -2358,11 +2493,12 @@ vmxnet3_reset_work(struct work_struct *data) return; /* if the device is closed, we must leave it alone */ - if (netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev) && + (adapter->netdev->flags & IFF_UP)) { printk(KERN_INFO "%s: resetting\n", adapter->netdev->name); - vmxnet3_quiesce_dev(adapter); + vmxnet3_quiesce_dev(adapter, false); vmxnet3_reset_dev(adapter); - vmxnet3_activate_dev(adapter); + vmxnet3_activate_dev(adapter, false); } else { printk(KERN_INFO "%s: already closed\n", adapter->netdev->name); } @@ -2370,6 +2506,53 @@ vmxnet3_reset_work(struct work_struct *data) clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); } +static void +vmxnet3_passthru_work(struct work_struct *data) +{ + struct vmxnet3_adapter *adapter; + + adapter = container_of(data, struct vmxnet3_adapter, passthru_work); + + /* if another thread is resetting the device, wait for it to complete */ + while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) + msleep(1); + + /* if the device is closed, we must leave it alone */ + if (netif_running(adapter->netdev)) { + if (vmxnet3_quiesce_dev(adapter, true) == 0) { + if (vmxnet3_activate_dev(adapter, true) == 0) { + printk(KERN_ERR "%s: passthru mode\n", + adapter->netdev->name); + } else { + printk(KERN_INFO "%s: activate dev failed\n", + adapter->netdev->name); + /* + * We already have quiesced the + * adapter in the guest; tell the + * device BE to do a hard quiesce + */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_QUIESCE_DEV); + vmxnet3_reset_dev(adapter); + vmxnet3_activate_dev(adapter, false); + printk(KERN_ERR "%s: emulation mode\n", + adapter->netdev->name); + } + } else { + printk(KERN_INFO "%s: soft quiesce failed\n", + adapter->netdev->name); + vmxnet3_quiesce_dev(adapter, false); + vmxnet3_reset_dev(adapter); + vmxnet3_activate_dev(adapter, false); + printk(KERN_ERR "%s: emulation mode\n", + adapter->netdev->name); + } + } else { + printk(KERN_INFO "%s: already closed\n", adapter->netdev->name); + } + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); +} + static int __devinit vmxnet3_probe_device(struct pci_dev *pdev, @@ -2442,6 +2625,33 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_pm; } + adapter->plugin_conf = kmalloc(sizeof(struct NPA_PluginConf), + GFP_KERNEL); + if (adapter->plugin_conf == NULL) { + printk(KERN_ERR "Failed to allocate memory for %s\n", + pci_name(pdev)); + err = -ENOMEM; + goto err_alloc_plugin_conf; + } + + adapter->plugin_memio = + pci_alloc_consistent(adapter->pdev, + (NPA_MEMIO_NUMPAGES + 1) * PAGE_SIZE, + &adapter->plugin_memio_pa); + if (!adapter->plugin_memio) { + err = -ENOMEM; + goto err_alloc_plugin_mmio; + } + + adapter->plugin_shared = + pci_alloc_consistent(adapter->pdev, + (NPA_SHARED_NUMPAGES + 1) * PAGE_SIZE, + &adapter->plugin_shared_pa); + if (!adapter->plugin_shared) { + err = -ENOMEM; + goto err_alloc_plugin_shared; + } + err = vmxnet3_alloc_pci_resources(adapter, &dma64); if (err < 0) goto err_alloc_pci; @@ -2479,8 +2689,10 @@ vmxnet3_probe_device(struct pci_dev *pdev, vmxnet3_set_ethtool_ops(netdev); INIT_WORK(&adapter->work, vmxnet3_reset_work); + INIT_WORK(&adapter->passthru_work, vmxnet3_passthru_work); netif_napi_add(netdev, &adapter->napi, vmxnet3_poll, 64); + SET_NETDEV_DEV(netdev, &pdev->dev); err = register_netdev(netdev); @@ -2499,6 +2711,16 @@ err_register: err_ver: vmxnet3_free_pci_resources(adapter); err_alloc_pci: + pci_free_consistent(adapter->pdev, + (NPA_SHARED_NUMPAGES + 1) * PAGE_SIZE, + adapter->plugin_shared, adapter->plugin_shared_pa); +err_alloc_plugin_shared: + pci_free_consistent(adapter->pdev, + (NPA_MEMIO_NUMPAGES + 1) * PAGE_SIZE, + adapter->plugin_memio, adapter->plugin_memio_pa); +err_alloc_plugin_mmio: + kfree(adapter->plugin_conf); +err_alloc_plugin_conf: kfree(adapter->pm_conf); err_alloc_pm: pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) + @@ -2526,6 +2748,13 @@ vmxnet3_remove_device(struct pci_dev *pdev) vmxnet3_free_intr_resources(adapter); vmxnet3_free_pci_resources(adapter); + pci_free_consistent(adapter->pdev, + (NPA_SHARED_NUMPAGES + 1) * PAGE_SIZE, + adapter->plugin_shared, adapter->plugin_shared_pa); + pci_free_consistent(adapter->pdev, + (NPA_MEMIO_NUMPAGES + 1) * PAGE_SIZE, + adapter->plugin_memio, adapter->plugin_memio_pa); + kfree(adapter->plugin_conf); kfree(adapter->pm_conf); pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) + sizeof(struct Vmxnet3_RxQueueDesc), @@ -2703,8 +2932,14 @@ static struct pci_driver vmxnet3_driver = { static int __init vmxnet3_init_module(void) { + int i; + printk(KERN_INFO "%s - version %s\n", VMXNET3_DRIVER_DESC, VMXNET3_DRIVER_VERSION_REPORT); + spin_lock_init(&vmxnet3_plugin_code_lock); + for (i = 0; i < NPA_MAX_PLUGINS_PER_VM; i++) + vmxnet3_plugin_code_used[i] = false; + return pci_register_driver(&vmxnet3_driver); } diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 3935c44..236ca88 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -127,12 +127,10 @@ vmxnet3_rq_driver_stats[] = { /* description, offset */ { "drv dropped rx total", offsetof(struct vmxnet3_rq_driver_stats, drop_total) }, - { " err", offsetof(struct vmxnet3_rq_driver_stats, - drop_err) }, - { " fcs", offsetof(struct vmxnet3_rq_driver_stats, - drop_fcs) }, { "rx buf alloc fail", offsetof(struct vmxnet3_rq_driver_stats, rx_buf_alloc_failure) }, + { "rx buf bad cookie", offsetof(struct vmxnet3_rq_driver_stats, + rx_buf_cookie_error) }, }; /* gloabl stats maintained by the driver */ @@ -213,7 +211,7 @@ vmxnet3_get_sset_count(struct net_device *netdev, int sset) static int vmxnet3_get_regs_len(struct net_device *netdev) { - return 20 * sizeof(u32); + return 16 * sizeof(u32); } @@ -347,32 +345,26 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) regs->version = 1; /* Update vmxnet3_get_regs_len if we want to dump more registers */ - /* make each ring use multiple of 16 bytes */ - buf[0] = adapter->tx_queue.tx_ring.next2fill; - buf[1] = adapter->tx_queue.tx_ring.next2comp; - buf[2] = adapter->tx_queue.tx_ring.gen; + buf[0] = adapter->tx_queue.plugin_tq->ringSize; + buf[1] = 0; + buf[2] = adapter->tx_queue.stopped; buf[3] = 0; - buf[4] = adapter->tx_queue.comp_ring.next2proc; - buf[5] = adapter->tx_queue.comp_ring.gen; - buf[6] = adapter->tx_queue.stopped; - buf[7] = 0; + buf[4] = adapter->tx_queue.shadow_ring.next2fill; + buf[5] = adapter->tx_queue.shadow_ring.next2comp; + buf[6] = adapter->tx_queue.data_ring.next2fill; + buf[7] = adapter->tx_queue.data_ring.next2comp; - buf[8] = adapter->rx_queue.rx_ring[0].next2fill; - buf[9] = adapter->rx_queue.rx_ring[0].next2comp; - buf[10] = adapter->rx_queue.rx_ring[0].gen; + buf[8] = adapter->rx_queue.plugin_rq->ringSize; + buf[9] = 0; + buf[10] = adapter->rx_queue.avail_skbs; buf[11] = 0; - buf[12] = adapter->rx_queue.rx_ring[1].next2fill; - buf[13] = adapter->rx_queue.rx_ring[1].next2comp; - buf[14] = adapter->rx_queue.rx_ring[1].gen; + buf[12] = adapter->passthru; + buf[13] = adapter->passthru ? adapter->plugin_region_idx : 0; + buf[14] = 0; buf[15] = 0; - - buf[16] = adapter->rx_queue.comp_ring.next2proc; - buf[17] = adapter->rx_queue.comp_ring.gen; - buf[18] = 0; - buf[19] = 0; } @@ -437,8 +429,8 @@ vmxnet3_get_ringparam(struct net_device *netdev, param->rx_mini_max_pending = 0; param->rx_jumbo_max_pending = 0; - param->rx_pending = adapter->rx_queue.rx_ring[0].size; - param->tx_pending = adapter->tx_queue.tx_ring.size; + param->rx_pending = adapter->rx_queue.plugin_rq->ringSize; + param->tx_pending = adapter->tx_queue.plugin_tq->ringSize; param->rx_mini_pending = 0; param->rx_jumbo_pending = 0; } @@ -467,9 +459,16 @@ vmxnet3_set_ringparam(struct net_device *netdev, ~VMXNET3_RING_SIZE_MASK; new_tx_ring_size = min_t(u32, new_tx_ring_size, VMXNET3_TX_RING_MAX_SIZE); - if (new_tx_ring_size > VMXNET3_TX_RING_MAX_SIZE || (new_tx_ring_size % - VMXNET3_RING_SIZE_ALIGN) != 0) + + sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN; + new_rx_ring_size = (param->rx_pending + sz - 1) / sz * sz; + new_rx_ring_size = min_t(u32, new_rx_ring_size, + VMXNET3_RX_RING_MAX_SIZE / sz * sz); + + if (new_tx_ring_size == adapter->tx_queue.plugin_tq->ringSize && + new_rx_ring_size == adapter->rx_queue.plugin_rq->ringSize) { return -EINVAL; + } /* ring0 has to be a multiple of * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN @@ -482,8 +481,8 @@ vmxnet3_set_ringparam(struct net_device *netdev, sz) != 0) return -EINVAL; - if (new_tx_ring_size == adapter->tx_queue.tx_ring.size && - new_rx_ring_size == adapter->rx_queue.rx_ring[0].size) { + if (new_tx_ring_size == adapter->tx_queue.plugin_tq->ringSize && + new_rx_ring_size == adapter->rx_queue.plugin_rq->ringSize) { return 0; } @@ -495,7 +494,7 @@ vmxnet3_set_ringparam(struct net_device *netdev, msleep(1); if (netif_running(netdev)) { - vmxnet3_quiesce_dev(adapter); + vmxnet3_quiesce_dev(adapter, false); vmxnet3_reset_dev(adapter); /* recreate the rx queue and the tx queue based on the @@ -504,7 +503,7 @@ vmxnet3_set_ringparam(struct net_device *netdev, vmxnet3_rq_destroy(&adapter->rx_queue, adapter); err = vmxnet3_create_queues(adapter, new_tx_ring_size, - new_rx_ring_size, VMXNET3_DEF_RX_RING_SIZE); + new_rx_ring_size); if (err) { /* failed, most likely because of OOM, try default * size */ @@ -512,7 +511,6 @@ vmxnet3_set_ringparam(struct net_device *netdev, " default ones\n", netdev->name); err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE, - VMXNET3_DEF_RX_RING_SIZE, VMXNET3_DEF_RX_RING_SIZE); if (err) { printk(KERN_ERR "%s: failed to create queues " @@ -522,7 +520,7 @@ vmxnet3_set_ringparam(struct net_device *netdev, } } - err = vmxnet3_activate_dev(adapter); + err = vmxnet3_activate_dev(adapter, false); if (err) printk(KERN_ERR "%s: failed to re-activate, error %d." " Closing it\n", netdev->name, err); diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 34f392f..d14bff1 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -29,6 +29,7 @@ #include <linux/ethtool.h> #include <linux/delay.h> +#include <linux/if_link.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/compiler.h> @@ -55,8 +56,10 @@ #include <linux/if_vlan.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> +#include <net/dst.h> #include "vmxnet3_defs.h" +#include "npa_plugin_api.h" #ifdef DEBUG # define VMXNET3_DRIVER_VERSION_REPORT VMXNET3_DRIVER_VERSION_STRING"-NAPI(debug)" @@ -117,77 +120,82 @@ enum { #define MAX_ETHERNET_CARDS 10 #define MAX_PCI_PASSTHRU_DEVICE 6 -struct vmxnet3_cmd_ring { - union Vmxnet3_GenericDesc *base; - u32 size; - u32 next2fill; - u32 next2comp; - u8 gen; - dma_addr_t basePA; + +struct vmxnet3_tx_data_ring { + struct Vmxnet3_TxDataDesc *base; + u32 size; + u32 next2fill; + u32 next2comp; + dma_addr_t basePA; +}; + +enum vmxnet3_buf_map_type { + VMXNET3_MAP_INVALID = 0, + VMXNET3_MAP_NONE, + VMXNET3_MAP_SINGLE, + VMXNET3_MAP_PAGE, +}; + +struct vmxnet3_tx_buf_info { + u32 map_type; + u16 len; + u16 eop_idx; + dma_addr_t dma_addr; + struct sk_buff *skb; +}; + +/* + * we have no idea how much data we can put in a TXD, so for the + * bookkeeping let's allocate 8 times more descriptors + */ +#define VMXNET3_TX_SHADOW_RING_SIZE(_ringSize) ((_ringSize) * 8) + +struct vmxnet3_tx_shadow_ring { + struct vmxnet3_tx_buf_info *base; + u32 size; + u32 next2fill; + u32 next2comp; }; static inline void -vmxnet3_cmd_ring_adv_next2fill(struct vmxnet3_cmd_ring *ring) +vmxnet3_tx_shadow_ring_adv_next2comp(struct vmxnet3_tx_shadow_ring *ring) { - ring->next2fill++; - if (unlikely(ring->next2fill == ring->size)) { - ring->next2fill = 0; - VMXNET3_FLIP_RING_GEN(ring->gen); - } + VMXNET3_INC_RING_IDX_ONLY(ring->next2comp, ring->size); } static inline void -vmxnet3_cmd_ring_adv_next2comp(struct vmxnet3_cmd_ring *ring) +vmxnet3_tx_shadow_ring_adv_next2fill(struct vmxnet3_tx_shadow_ring *ring) { - VMXNET3_INC_RING_IDX_ONLY(ring->next2comp, ring->size); + VMXNET3_INC_RING_IDX_ONLY(ring->next2fill, ring->size); } static inline int -vmxnet3_cmd_ring_desc_avail(struct vmxnet3_cmd_ring *ring) +vmxnet3_tx_shadow_ring_desc_avail(struct vmxnet3_tx_shadow_ring *ring) { return (ring->next2comp > ring->next2fill ? 0 : ring->size) + ring->next2comp - ring->next2fill - 1; } -struct vmxnet3_comp_ring { - union Vmxnet3_GenericDesc *base; - u32 size; - u32 next2proc; - u8 gen; - u8 intr_idx; - dma_addr_t basePA; -}; - static inline void -vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring) +vmxnet3_tx_data_ring_adv_next2comp(struct vmxnet3_tx_data_ring *ring) { - ring->next2proc++; - if (unlikely(ring->next2proc == ring->size)) { - ring->next2proc = 0; - VMXNET3_FLIP_RING_GEN(ring->gen); - } + VMXNET3_INC_RING_IDX_ONLY(ring->next2comp, ring->size); } -struct vmxnet3_tx_data_ring { - struct Vmxnet3_TxDataDesc *base; - u32 size; - dma_addr_t basePA; -}; -enum vmxnet3_buf_map_type { - VMXNET3_MAP_INVALID = 0, - VMXNET3_MAP_NONE, - VMXNET3_MAP_SINGLE, - VMXNET3_MAP_PAGE, -}; +static inline void +vmxnet3_tx_data_ring_adv_next2fill(struct vmxnet3_tx_data_ring *ring) +{ + VMXNET3_INC_RING_IDX_ONLY(ring->next2fill, ring->size); +} + +static inline int +vmxnet3_tx_data_ring_desc_avail(struct vmxnet3_tx_data_ring *ring) +{ + return (ring->next2comp > ring->next2fill ? 0 : ring->size) + + ring->next2comp - ring->next2fill - 1; +} -struct vmxnet3_tx_buf_info { - u32 map_type; - u16 len; - u16 sop_idx; - dma_addr_t dma_addr; - struct sk_buff *skb; -}; struct vmxnet3_tq_driver_stats { u64 drop_total; /* # of pkts dropped by the driver, the @@ -205,29 +213,23 @@ struct vmxnet3_tq_driver_stats { u64 oversized_hdr; }; -struct vmxnet3_tx_ctx { - bool ipv4; - u16 mss; - u32 eth_ip_hdr_size; /* only valid for pkts requesting tso or csum - * offloading - */ - u32 l4_hdr_size; /* only valid if mss != 0 */ - u32 copy_size; /* # of bytes copied into the data ring */ - union Vmxnet3_GenericDesc *sop_txd; - union Vmxnet3_GenericDesc *eop_txd; -}; +struct vmxnet3_adapter; struct vmxnet3_tx_queue { + struct vmxnet3_adapter *adapter; spinlock_t tx_lock; - struct vmxnet3_cmd_ring tx_ring; - struct vmxnet3_tx_buf_info *buf_info; + struct Plugin_SendInfo info; + struct Plugin_SgList sg_list; + struct Plugin_TxQueueState *plugin_tq; + struct vmxnet3_tx_shadow_ring shadow_ring; struct vmxnet3_tx_data_ring data_ring; - struct vmxnet3_comp_ring comp_ring; - struct Vmxnet3_TxQueueCtrl *shared; + u8 intr_idx; + struct Vmxnet3_TxQueueCtrl *shared; struct vmxnet3_tq_driver_stats stats; bool stopped; int num_stop; /* # of times the queue is * stopped */ + int qid; } __attribute__((__aligned__(SMP_CACHE_BYTES))); enum vmxnet3_rx_buf_type { @@ -246,29 +248,26 @@ struct vmxnet3_rx_buf_info { dma_addr_t dma_addr; }; -struct vmxnet3_rx_ctx { - struct sk_buff *skb; - u32 sop_idx; -}; - struct vmxnet3_rq_driver_stats { u64 drop_total; - u64 drop_err; - u64 drop_fcs; u64 rx_buf_alloc_failure; + u64 rx_buf_cookie_error; }; struct vmxnet3_rx_queue { - struct vmxnet3_cmd_ring rx_ring[2]; - struct vmxnet3_comp_ring comp_ring; - struct vmxnet3_rx_ctx rx_ctx; - u32 qid; /* rqID in RCD for buffer from 1st ring */ - u32 qid2; /* rqID in RCD for buffer from 2nd ring */ - u32 uncommitted[2]; /* # of buffers allocated since last RXPROD - * update */ - struct vmxnet3_rx_buf_info *buf_info[2]; - struct Vmxnet3_RxQueueCtrl *shared; + struct vmxnet3_adapter *adapter; +#ifdef VMXNET3_NAPI + struct napi_struct napi; +#endif + struct Plugin_RxQueueState *plugin_rq; + struct vmxnet3_rx_buf_info *buf_info; + struct Vmxnet3_RxQueueCtrl *shared; struct vmxnet3_rq_driver_stats stats; + u8 intr_idx; + u8 qid; + u8 qid2; + u32 avail_skbs; + u32 rxd_done; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define VMXNET3_LINUX_MAX_MSIX_VECT 1 @@ -296,6 +295,10 @@ struct vmxnet3_adapter { struct Vmxnet3_DriverShared *shared; struct Vmxnet3_PMConf *pm_conf; + struct Plugin_State plugin; + struct Plugin_Api plugin_api; + + struct NPA_PluginConf *plugin_conf; struct Vmxnet3_TxQueueDesc *tqd_start; /* first tx queue desc */ struct Vmxnet3_RxQueueDesc *rqd_start; /* first rx queue desc */ struct net_device *netdev; @@ -304,6 +307,14 @@ struct vmxnet3_adapter { u8 *hw_addr0; /* for BAR 0 */ u8 *hw_addr1; /* for BAR 1 */ + u8 *plugin_memio; + dma_addr_t plugin_memio_pa; + + u8 *plugin_shared; + dma_addr_t plugin_shared_pa; + + int plugin_region_idx; + /* feature control */ bool rxcsum; bool lro; @@ -323,10 +334,12 @@ struct vmxnet3_adapter { u64 tx_timeout_count; struct work_struct work; + struct work_struct passthru_work; unsigned long state; /* VMXNET3_STATE_BIT_xxx */ int dev_number; + bool passthru; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -339,13 +352,20 @@ struct vmxnet3_adapter { #define VMXNET3_READ_BAR1_REG(adapter, reg) \ le32_to_cpu(readl((adapter)->hw_addr1 + (reg))) -#define VMXNET3_WAKE_QUEUE_THRESHOLD(tq) (5) -#define VMXNET3_RX_ALLOC_THRESHOLD(rq, ring_idx, adapter) \ - ((rq)->rx_ring[ring_idx].size >> 3) + +#define VMXNET3_WAKE_QUEUE_SHADOW_THRESHOLD(tq) (5) +#define VMXNET3_WAKE_QUEUE_DATA_THRESHOLD(tq) (5) #define VMXNET3_GET_ADDR_LO(dma) ((u32)(dma)) #define VMXNET3_GET_ADDR_HI(dma) ((u32)(((u64)(dma)) >> 32)) +/* + * the way we process packet is: 1 SG for header, 1 SG for linear part + * and 1 SG per frag + */ +#define VMXNET3_SGLIST_MAX (2 + MAX_SKB_FRAGS) + + /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 #define VMXNET3_DEF_RX_RING_SIZE 256 @@ -357,11 +377,40 @@ void set_flag_le16(__le16 *data, u16 flag); void set_flag_le64(__le64 *data, u64 flag); void reset_flag_le64(__le64 *data, u64 flag); +#define Plugin_SwInit(_adapter) \ + ((_adapter)->plugin_api.swInit(&(_adapter)->plugin)) +#define Plugin_ReinitTxRing(_adapter, _queue) \ + ((_adapter)->plugin_api.reinitTxRing(&(_adapter)->plugin, \ + (_queue))) +#define Plugin_ReinitRxRing(_adapter, _queue) \ + ((_adapter)->plugin_api.reinitRxRing(&(_adapter)->plugin, \ + (_queue))) +#define Plugin_EnableInterrupt(_adapter, _idx) \ + ((_adapter)->plugin_api.enableInterrupt(&(_adapter)->plugin, \ + (_idx))) +#define Plugin_DisableInterrupt(_adapter, _idx) \ + ((_adapter)->plugin_api.disableInterrupt(&(_adapter)->plugin, \ + (_idx))) +#define Plugin_AddFrameToTxRing(_adapter, _queue, _info, _frame, _lastPkt)\ + ((_adapter)->plugin_api.addFrameToTxRing(&(_adapter)->plugin, \ + (_queue), (_info), \ + (_frame), (_lastPkt))) +#define Plugin_CheckTxRing(_adapter, _queue) \ + ((_adapter)->plugin_api.checkTxRing(&(_adapter)->plugin, \ + (_queue))) +#define Plugin_CheckRxRing(_adapter, _queue, _budget) \ + ((_adapter)->plugin_api.checkRxRing(&(_adapter)->plugin, \ + (_queue), (_budget))) +#define Plugin_AddBuffersToRxRing(_adapter, _queue) \ + ((_adapter)->plugin_api.addBuffersToRxRing(&(_adapter)->plugin, \ + (_queue))) + + int -vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter); +vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter, bool soft); int -vmxnet3_activate_dev(struct vmxnet3_adapter *adapter); +vmxnet3_activate_dev(struct vmxnet3_adapter *adapter, bool load_plugin); void vmxnet3_force_close(struct vmxnet3_adapter *adapter); @@ -379,7 +428,7 @@ vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, int vmxnet3_create_queues(struct vmxnet3_adapter *adapter, - u32 tx_ring_size, u32 rx_ring_size, u32 rx_ring2_size); + u32 tx_ring_size, u32 rx_ring_size); extern void vmxnet3_set_ethtool_ops(struct net_device *netdev); extern struct net_device_stats *vmxnet3_get_stats(struct net_device *netdev); diff --git a/drivers/net/vmxnet3/vmxnet3_plugin.c b/drivers/net/vmxnet3/vmxnet3_plugin.c new file mode 100644 index 0000000..49b5bf2 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_plugin.c @@ -0,0 +1,1221 @@ +/* + * NPA plugin for vmxnet3 driver. + * + * Copyright (C) 2008-2010, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@xxxxxxxxxx> + * + */ + +/* + * vmxnet3Plugin.c -- + * + * Implements a plugin for vmxnet3 rings. + */ + +#include <linux/types.h> +#include "vmxnet3_int.h" +#include "vmxnet3_defs.h" +#include "npa_plugin_api.h" + +/* + * Log & loglevel. Can change at runtime via debugger. + */ +static u32 logLevel; +static int logEnabled; + + +/* + * Easy shell API calling macros. + */ +#define Shell_AllocSmallBuffer(_state, _handle, _ringOffset) \ + ((_state)->shellApi.allocSmallBuffer((_handle), (_ringOffset))) +#define Shell_AllocLargeBuffer(_state, _handle, _ringOffset) \ + ((_state)->shellApi.allocLargeBuffer((_handle), (_ringOffset))) +#define Shell_FreeBuffer(_state, _handle, _ringOffset) \ + ((_state)->shellApi.freeBuffer((_handle), (_ringOffset))) +#define Shell_CompleteSend(_state, _handle, _numPkt) \ + ((_state)->shellApi.completeSend((_handle), (_numPkt))) +#define Shell_IndicateRecv(_state, _handle, _frame) \ + ((_state)->shellApi.indicateRecv((_handle), (_frame))) +#define Shell_Log(_state, _loglevel, _n, _fmt, ...) \ + do { \ + if (logEnabled && (_loglevel) <= (u32)logLevel) { \ + (_state)->shellApi.log((_n) + 1, \ + "%s: " _fmt, \ + __func__, \ +##__VA_ARGS__); \ + } \ + } while (0) + + +/* + * Some standard definitions + */ +#ifndef NULL +#define NULL (void *)0 +#endif + + +/* + * Utility macro to write a register's value (BAR0) + */ +#define VMXNET3_WRITE_REG(_state, _offset, _value) \ + (*(u32 *)((u8 *)(_state)->memioAddr + (_offset)) = \ + (_value)) + + +/* + * Utility macro to align a virtual address + */ +#define ALIGN_VA(_ptr, _align) ((void *)(((uintptr_t)(_ptr) + ((_align) - 1)) &\ + ~((_align) - 1))) + + +/* + * TCP and UDP checksum offset + */ +#define TCP_CSUM_OFFSET (16) +#define UDP_CSUM_OFFSET (6) + + +/* + * Vmxnet3 TX queue + */ +struct Vmxnet3PluginTxQueue { + u32 txProdOffset; /* offset of txProd register */ + u32 ringSize; /* size in desc, aligned correctly */ + + u32 hwCmdInsert; /* last cmd insert we told hardware */ + u32 nextCmdInsert; /* index of next txd to fill */ + u32 nextCmdRemove; /* index of next txd to clean */ + u32 nextCompleteRemove; /* index of next to complete */ + u8 genCmd; /* current value for gen bit on tx ring */ + u8 genComplete; /* current value for gen bit on comp ring */ + + struct Vmxnet3_TxDesc *txCmdVirt; + struct Vmxnet3_TxCompDesc *txCompleteVirt; +}; + + +/* + * Vmxnet3 RX ring + */ +struct Vmxnet3PluginRxCmdRing { + u32 rxProdOffset; /* offset of register */ + u32 cookieOffset; /* 1st ring = 0, 2nd ring = (size of 1st ring) */ + u32 ringSize; /* size in desc, copied from adapter->rxRingLength */ + + u32 nextCmdInsert; + u32 nextCmdRemove; + + u8 genBit; + + struct Vmxnet3_RxDesc *ring; +}; + + +/* + * Vmxnet3 RX queue + */ +struct Vmxnet3PluginRxQueue { + struct Vmxnet3PluginRxCmdRing cmdRing[2]; + + u32 ringCompleteSize; + struct Vmxnet3_RxCompDesc *rxCompleteVirt; + + struct Shell_RecvFrame frame; + + u32 nextCompleteRemove; + u8 genComplete; +}; + +/* + * Vmxnet3 Plugin state + */ +struct Vmxnet3PluginCustomState { + struct Vmxnet3PluginTxQueue txQueues[PLUGIN_MAX_TX_QUEUES]; + struct Vmxnet3PluginRxQueue rxQueues[PLUGIN_MAX_RX_QUEUES]; + u32 maxSgLength; +}; + +#define VMXNET3_PLUGIN_STATE(state) \ + ((struct Vmxnet3PluginCustomState *)PLUGIN_PRIVATE((state))) + + +static INLINE void +MoveMemory(void *dst, + void *src, + size_t length) +{ + size_t i; + for (i = 0; i < length; ++i) + ((u8 *)dst)[i] = ((u8 *)src)[i]; +} + +static INLINE void +ZeroMemory(void *memory, + size_t length) +{ + size_t i; + for (i = 0; i < length; ++i) + ((u8 *)memory)[i] = 0; +} + + +/* + * Init any private software state. Returns 0 on success and 1 otherwise. + */ + +static u32 +Vmxnet3Plugin_SwInit(struct Plugin_State *state) +{ + struct Vmxnet3PluginCustomState *customState = VMXNET3_PLUGIN_STATE( + state); + u32 i; + + if (state->majorVersion != 1 || state->size < sizeof(*state)) + return 1; + + for (i = 0; i < state->numRxQueues; ++i) { + struct Vmxnet3PluginRxQueue *rxQueue = + &(customState->rxQueues[i]); + u32 j; + + /* check ring size & adjust 2nd ring size */ + rxQueue->cmdRing[0].ringSize = state->rxQueues[i].ringSize; + if ((state->features & PLUGIN_FEATURES_LRO) || + state->mtu > SHELL_SMALL_RECV_BUFFER_SIZE) { + rxQueue->cmdRing[1].ringSize = + state->rxQueues[i].ringSize; + } else { + rxQueue->cmdRing[1].ringSize = 32; + } + rxQueue->cmdRing[0].cookieOffset = 0; + rxQueue->cmdRing[1].cookieOffset = rxQueue->cmdRing[0].ringSize; + BUG_ON(rxQueue->cmdRing[0].ringSize == 0); + BUG_ON((rxQueue->cmdRing[0].ringSize & + VMXNET3_RING_SIZE_MASK) != 0); + BUG_ON(rxQueue->cmdRing[1].ringSize == 0); + BUG_ON((rxQueue->cmdRing[1].ringSize & + VMXNET3_RING_SIZE_MASK) != 0); + + for (j = 0; j < 2; ++j) { + struct Vmxnet3PluginRxCmdRing *cmdRing = + rxQueue->cmdRing + j; + + /* initialize command ring management & gen values */ + cmdRing->nextCmdInsert = 0; + cmdRing->nextCmdRemove = 0; + cmdRing->genBit = VMXNET3_INIT_GEN; + } + /* setup the two command rings */ + rxQueue->cmdRing[0].ring = + ALIGN_VA(state->rxQueues[i].ringBaseVA, + VMXNET3_RING_BA_ALIGN); + rxQueue->cmdRing[1].ring = + ALIGN_VA((u8 *)rxQueue->cmdRing[0].ring + + rxQueue->cmdRing[0].ringSize * + sizeof(struct Vmxnet3_RxDesc), + VMXNET3_RING_BA_ALIGN); + + /* RX completion ring follows second RX command ring */ + rxQueue->ringCompleteSize = rxQueue->cmdRing[0].ringSize + + rxQueue->cmdRing[1].ringSize; + rxQueue->rxCompleteVirt = + ALIGN_VA((u8 *)rxQueue->cmdRing[1].ring + + rxQueue->cmdRing[1].ringSize * + sizeof(struct Vmxnet3_RxDesc), + VMXNET3_RING_BA_ALIGN); + + /* check for overflow */ + if (((u8 *)rxQueue->rxCompleteVirt) + + sizeof(struct Vmxnet3_RxCompDesc) * + rxQueue->ringCompleteSize > state->rxQueues[i].ringBaseVA + + state->rxQueues[i].ringLength) { + Shell_Log(state, 1, 0, + "rx shared area size is too small\n"); + return 1; + } + + /* initialize completion ring management & gen values */ + rxQueue->nextCompleteRemove = 0; + rxQueue->genComplete = VMXNET3_INIT_GEN; + + rxQueue->cmdRing[0].rxProdOffset = VMXNET3_REG_RXPROD + + (VMXNET3_REG_ALIGN * i); + rxQueue->cmdRing[1].rxProdOffset = VMXNET3_REG_RXPROD2 + + (VMXNET3_REG_ALIGN * i); + + ZeroMemory(&rxQueue->frame, sizeof(struct Shell_RecvFrame)); + + Shell_Log(state, 1, 8, "rxQueue[%u] %p cmdRing[0] %p %u " + "cmdRing[1] %p %u compRing %p %u\n", i, rxQueue, + rxQueue->cmdRing[0].ring, + rxQueue->cmdRing[0].ringSize, + rxQueue->cmdRing[1].ring, + rxQueue->cmdRing[1].ringSize, + rxQueue->rxCompleteVirt, + rxQueue->ringCompleteSize); + } + + for (i = 0; i < state->numTxQueues; i++) { + struct Vmxnet3PluginTxQueue *txQueue = + &customState->txQueues[i]; + + /* check ring size */ + txQueue->ringSize = state->txQueues[i].ringSize; + BUG_ON(txQueue->ringSize == 0); + BUG_ON((txQueue->ringSize & VMXNET3_RING_SIZE_MASK) != 0); + + txQueue->txCmdVirt = ALIGN_VA(state->txQueues[i].ringBaseVA, + VMXNET3_RING_BA_ALIGN); + + /* TX completion ring follows the TX command ring */ + txQueue->txCompleteVirt = ALIGN_VA((u8 *)txQueue->txCmdVirt + + txQueue->ringSize * + sizeof(struct Vmxnet3_TxDesc), + VMXNET3_RING_BA_ALIGN); + + /* check for overflow */ + if (((u8 *)txQueue->txCompleteVirt) + + sizeof(struct Vmxnet3_TxCompDesc) * txQueue->ringSize > + state->txQueues[i].ringBaseVA + + state->txQueues[i].ringLength) { + Shell_Log(state, 1, 0, + "tx shared area size is too small\n"); + return 1; + } + + /* initialize ring management & gen values */ + txQueue->hwCmdInsert = 0; + txQueue->nextCmdInsert = 0; + txQueue->nextCmdRemove = 0; + txQueue->nextCompleteRemove = 0; + txQueue->genCmd = VMXNET3_INIT_GEN; + txQueue->genComplete = VMXNET3_INIT_GEN; + + txQueue->txProdOffset = VMXNET3_REG_TXPROD + + (VMXNET3_REG_ALIGN * i); + + Shell_Log(state, 1, 5, + "txQueue[%u] %p cmdRing %p %u compRing %p\n", + i, txQueue, txQueue->txCmdVirt, txQueue->ringSize, + txQueue->txCompleteVirt); + } + + /* setup max number of SGs per received frame */ + if (state->features & PLUGIN_FEATURES_LRO) + customState->maxSgLength = SHELL_MAX_LRO_RECV_SG_LEN; + else + customState->maxSgLength = SHELL_MAX_RECV_SG_LEN; + + return 0; +} + + +/* + * Reset and clear RX ring(s) for the specified queue. + */ + +static u32 +Vmxnet3Plugin_ReinitRxRing(struct Plugin_State *state, + u32 queueNum) +{ + struct Vmxnet3PluginCustomState *customState = + VMXNET3_PLUGIN_STATE(state); + struct Vmxnet3PluginRxQueue *rxQueue = &customState->rxQueues[queueNum]; + u32 i; + + for (i = 0; i < 2; ++i) { + struct Vmxnet3PluginRxCmdRing *cmdRing = rxQueue->cmdRing + i; + + /* + * Can't BUG_ON(nextCmdInsert != nextCmdRemove) since these + * aren't updated when we garbage collected the buffers from + * the ring. + */ +#ifdef VMX86_DEBUG + if (cmdRing->nextCmdInsert != cmdRing->nextCmdRemove) { + Shell_Log(state, 2, 2, "cmdInsert %u != cmdRemove %u\n", + cmdRing->nextCmdInsert, + cmdRing->nextCmdRemove); + } +#endif + cmdRing->nextCmdInsert = 0; + cmdRing->nextCmdRemove = 0; + cmdRing->genBit = VMXNET3_INIT_GEN; + + Shell_Log(state, 1, 3, "cmdRing[%u] %p %u\n", i, cmdRing, + cmdRing->ringSize); + BUG_ON(!cmdRing->ringSize); + BUG_ON(!cmdRing->ring); + ZeroMemory(cmdRing->ring, sizeof(struct Vmxnet3_RxDesc) * + cmdRing->ringSize); + } + BUG_ON(!rxQueue->rxCompleteVirt); + BUG_ON(!rxQueue->ringCompleteSize); + ZeroMemory(rxQueue->rxCompleteVirt, + sizeof(struct Vmxnet3_RxCompDesc) * + rxQueue->ringCompleteSize); + rxQueue->nextCompleteRemove = 0; + rxQueue->genComplete = VMXNET3_INIT_GEN; + + return 0; +} + + +/* + * Reset and clear TX ring for the specified queue. + */ + +static u32 +Vmxnet3Plugin_ReinitTxRing(struct Plugin_State *state, + u32 queueNum) +{ + struct Vmxnet3PluginCustomState *customState = + VMXNET3_PLUGIN_STATE(state); + struct Vmxnet3PluginTxQueue *txQueue = &customState->txQueues[queueNum]; + + txQueue->hwCmdInsert = 0; + txQueue->nextCmdInsert = 0; + txQueue->nextCmdRemove = 0; + txQueue->nextCompleteRemove = 0; + txQueue->genCmd = VMXNET3_INIT_GEN; + txQueue->genComplete = VMXNET3_INIT_GEN; + + ZeroMemory(txQueue->txCmdVirt, + sizeof(struct Vmxnet3_TxDesc) * txQueue->ringSize); + ZeroMemory(txQueue->txCompleteVirt, + sizeof(struct Vmxnet3_TxCompDesc) * txQueue->ringSize); + return 0; +} + + +/* + * Adds a offset to a ring index value, taking into account the potential for + * wrapping around to the beginning of the rx ring. Returns index in the ring. + */ + +static u32 +ComputeRingIndex(struct Vmxnet3PluginRxCmdRing *ring, u32 base, u32 offset) +{ + u32 result = base + offset; + + BUG_ON(offset >= ring->ringSize); + if (result >= ring->ringSize) + result -= ring->ringSize; + return result; +} + + +static u32 +Vmxnet3Plugin_AddBuffersToRxRing(struct Plugin_State *state, + u32 queueNum) +{ + struct Vmxnet3PluginCustomState *customState = + VMXNET3_PLUGIN_STATE(state); + struct Shell_RxQueueHandle *handle = state->rxQueues[queueNum].handle; + struct Vmxnet3PluginRxQueue *rxQueue = &customState->rxQueues[queueNum]; + struct Vmxnet3PluginRxCmdRing *cmdRing0 = &rxQueue->cmdRing[0]; + struct Vmxnet3PluginRxCmdRing *cmdRing1 = &rxQueue->cmdRing[1]; + u32 oldInsert1; + u32 oldInsert2; + + oldInsert1 = rxQueue->cmdRing[0].nextCmdInsert; + oldInsert2 = rxQueue->cmdRing[1].nextCmdInsert; + + if (state->mtu <= SHELL_SMALL_RECV_BUFFER_SIZE) { + u32 nextCmd; + + nextCmd = ComputeRingIndex(cmdRing0, cmdRing0->nextCmdInsert, + 1); + Shell_Log(state, 2, 2, "nextCmd %u, nextCmdRemove %u\n", + nextCmd, cmdRing0->nextCmdRemove); + + /* fill the ring with 2k skb buffers */ + while (nextCmd != cmdRing0->nextCmdRemove) { + u64 buffer; + struct Vmxnet3_RxDesc *desc0 = cmdRing0->ring + + cmdRing0->nextCmdInsert; + + BUG_ON(cmdRing0->cookieOffset != 0); + buffer = Shell_AllocSmallBuffer(state, handle, + cmdRing0->nextCmdInsert); + if (buffer == 0) + break; + + desc0->addr = buffer; + desc0->len = SHELL_SMALL_RECV_BUFFER_SIZE; + desc0->btype = VMXNET3_RXD_BTYPE_HEAD; + desc0->dtype = 0; + desc0->rsvd = 0; + desc0->ext1 = 0; + desc0->gen = cmdRing0->genBit; + + Shell_Log(state, 2, 4, "desc0[%u] addr:%lu len:%u " + "gen:%u\n", cmdRing0->nextCmdInsert, + desc0->addr, desc0->len, desc0->gen); + + cmdRing0->nextCmdInsert = nextCmd; + if (cmdRing0->nextCmdInsert == 0) { /* we've wrapped */ + VMXNET3_FLIP_RING_GEN(cmdRing0->genBit); + } + nextCmd = ComputeRingIndex(cmdRing0, + cmdRing0->nextCmdInsert, 1); + } + + /* + * We're not using the large buffer queue or the + * second ring unless LPD is enabled + */ + BUG_ON(!(state->features & PLUGIN_FEATURES_LRO) && + cmdRing1->nextCmdInsert != 0); + BUG_ON(!(state->features & PLUGIN_FEATURES_LRO) && + cmdRing1->nextCmdRemove != 0); + } else { + /* + * When jumbo frames are used, nextCmdRemove might + * point to the 2k buffer or either of the 4k buffers, + * depending on whether one or both of the 4k buffers + * were needed to receive a frame. So, this loop + * needs to check for +1, +2, and +3 when it comes to + * buffer occupancy. The alternative is to have the + * code that walks the completion ring detect when the + * 4k buffer(s) weren't used and skip it, but offhand + * I think that approach would be more overhead + * compared to having an additional check in this + * function (simpler, and this function ideally won't + * run as often). + */ + + Shell_Log(state, 2, 3, "nextCmd %u-%u, nextCmdRemove %u\n", + ComputeRingIndex(cmdRing0, cmdRing0->nextCmdInsert, 1), + ComputeRingIndex(cmdRing0, cmdRing0->nextCmdInsert, 3), + cmdRing0->nextCmdRemove); + + while (ComputeRingIndex(cmdRing0, cmdRing0->nextCmdInsert, 1) != + cmdRing0->nextCmdRemove && + ComputeRingIndex(cmdRing0, cmdRing0->nextCmdInsert, 2) != + cmdRing0->nextCmdRemove && + ComputeRingIndex(cmdRing0, cmdRing0->nextCmdInsert, 3) != + cmdRing0->nextCmdRemove) { + struct Vmxnet3_RxDesc *desc[3]; + u32 bufferOffset[3]; + u8 genBit[3]; + u64 bufferPA[3]; + + genBit[0] = cmdRing0->genBit; + genBit[1] = cmdRing0->genBit; + genBit[2] = cmdRing0->genBit; + + BUG_ON(cmdRing0->cookieOffset != 0); + /* + * Compute next ring entries and gen values + * for these entries + */ + bufferOffset[0] = cmdRing0->nextCmdInsert; + bufferOffset[1] = bufferOffset[0] + 1; + if (bufferOffset[1] >= cmdRing0->ringSize) { + bufferOffset[1] = 0; + bufferOffset[2] = 1; + VMXNET3_FLIP_RING_GEN(genBit[1]); + VMXNET3_FLIP_RING_GEN(genBit[2]); + } else { + bufferOffset[2] = bufferOffset[1] + 1; + if (bufferOffset[2] >= cmdRing0->ringSize) { + bufferOffset[2] = 0; + VMXNET3_FLIP_RING_GEN(genBit[2]); + } + } + + desc[0] = cmdRing0->ring + bufferOffset[0]; + desc[1] = cmdRing0->ring + bufferOffset[1]; + desc[2] = cmdRing0->ring + bufferOffset[2]; + + /* allocate 2k + 4k + 4k buffers */ + bufferPA[0] = Shell_AllocSmallBuffer(state, handle, + bufferOffset[0]); + if (!bufferPA[0]) + break; + + bufferPA[1] = Shell_AllocLargeBuffer(state, handle, + bufferOffset[1]); + if (!bufferPA[1]) { + Shell_FreeBuffer(state, handle, + bufferOffset[0]); + break; + } + + bufferPA[2] = Shell_AllocLargeBuffer(state, handle, + bufferOffset[2]); + if (!bufferPA[2]) { + Shell_FreeBuffer(state, handle, + bufferOffset[0]); + Shell_FreeBuffer(state, handle, + bufferOffset[1]); + break; + } + + /* setup the descriptors */ + desc[0]->addr = bufferPA[0]; + desc[0]->len = SHELL_SMALL_RECV_BUFFER_SIZE; + desc[0]->btype = VMXNET3_RXD_BTYPE_HEAD; + desc[0]->dtype = 0; + desc[0]->rsvd = 0; + desc[0]->ext1 = 0; + + desc[1]->addr = bufferPA[1]; + desc[1]->len = SHELL_LARGE_RECV_BUFFER_SIZE; + desc[1]->btype = VMXNET3_RXD_BTYPE_BODY; + desc[1]->dtype = 0; + desc[1]->rsvd = 0; + desc[1]->ext1 = 0; + + desc[2]->addr = bufferPA[2]; + desc[2]->len = SHELL_LARGE_RECV_BUFFER_SIZE; + desc[2]->btype = VMXNET3_RXD_BTYPE_BODY; + desc[2]->dtype = 0; + desc[2]->rsvd = 0; + desc[2]->ext1 = 0; + + desc[2]->gen = genBit[2]; + desc[1]->gen = genBit[1]; + desc[0]->gen = genBit[0]; + +#ifdef VMX86_DEBUG + { + int i; + for (i = 0; i < 3; i++) { + Shell_Log(state, 2, 5, "desc%d[%u] " + "addr:%lu len:%u gen:%u\n", i, + (cmdRing0->nextCmdInsert + i)% + cmdRing0->ringSize, + desc[i]->addr, desc[i]->len, + desc[i]->gen); + } + } +#endif + + cmdRing0->nextCmdInsert += 3; + if (cmdRing0->nextCmdInsert >= cmdRing0->ringSize) { + cmdRing0->nextCmdInsert -= cmdRing0->ringSize; + VMXNET3_FLIP_RING_GEN(cmdRing0->genBit); + } + } + } + + if ((state->features & PLUGIN_FEATURES_LRO) || + state->mtu > SHELL_SMALL_RECV_BUFFER_SIZE) { + + Shell_Log(state, 2, 2, "nextCmd %u, nextCmdRemove %u\n", + ComputeRingIndex(cmdRing1, cmdRing1->nextCmdInsert, 1), + cmdRing1->nextCmdRemove); + + /* fill the 2nd ring with 4k buffers */ + while (ComputeRingIndex(cmdRing1, cmdRing1->nextCmdInsert, 1) != + cmdRing1->nextCmdRemove) { + u64 bufferPA; + + struct Vmxnet3_RxDesc *desc = cmdRing1->ring + + cmdRing1->nextCmdInsert; + + bufferPA = Shell_AllocLargeBuffer(state, handle, + cmdRing1->cookieOffset + + cmdRing1->nextCmdInsert); + if (!bufferPA) + break; + + desc->addr = bufferPA; + desc->len = SHELL_LARGE_RECV_BUFFER_SIZE; + desc->btype = VMXNET3_RXD_BTYPE_BODY; + desc->dtype = 0; + desc->rsvd = 0; + desc->ext1 = 0; + + desc->gen = cmdRing1->genBit; + + Shell_Log(state, 2, 4, "desc[%u] addr:%lu len:%u" + " gen:%u\n", cmdRing1->nextCmdInsert, + desc->addr, desc->len, desc->gen); + + ++cmdRing1->nextCmdInsert; + if (cmdRing1->nextCmdInsert >= cmdRing1->ringSize) { + cmdRing1->nextCmdInsert = 0; + VMXNET3_FLIP_RING_GEN(cmdRing1->genBit); + } + } + } + + if (state->updateRxProd) { + if (oldInsert1 != rxQueue->cmdRing[0].nextCmdInsert) { + VMXNET3_WRITE_REG(state, + rxQueue->cmdRing[0].rxProdOffset, + rxQueue->cmdRing[0].nextCmdInsert); + } + + if (oldInsert2 != rxQueue->cmdRing[1].nextCmdInsert) { + VMXNET3_WRITE_REG(state, + rxQueue->cmdRing[1].rxProdOffset, + rxQueue->cmdRing[1].nextCmdInsert); + } + } + return 0; +} + + +/* + * Checks rx ring(s) for received frame, returns non-zero if we need to + * feed the ring with buffers. + */ + +static u32 +Vmxnet3Plugin_CheckRxRing(struct Plugin_State *state, + u32 queueNum, + u32 maxPackets) +{ + struct Vmxnet3PluginCustomState *customState = + VMXNET3_PLUGIN_STATE(state); + struct Shell_RxQueueHandle *handle = state->rxQueues[queueNum].handle; + struct Vmxnet3PluginRxQueue *rxQueue = &customState->rxQueues[queueNum]; + struct Shell_RecvFrame *frame = &rxQueue->frame; + u8 rxBufferWasCompleted = false; + u32 packetsFound = 0; + + ZeroMemory(frame, sizeof *frame); + + Shell_Log(state, 1, 3, "desc[%u].gen %u q.gen %u\n", + rxQueue->nextCompleteRemove, + rxQueue->rxCompleteVirt[rxQueue->nextCompleteRemove].gen, + rxQueue->genComplete); + /* while we have descriptors to process */ + while (rxQueue->rxCompleteVirt[rxQueue->nextCompleteRemove].gen == + rxQueue->genComplete && packetsFound < maxPackets) { + struct Vmxnet3_RxCompDesc *currDesc; + u32 index; + u32 queueID; + u8 firstRing; /* first ring vs. second ring */ + struct Vmxnet3PluginRxCmdRing *cmdRing; + u8 discardStoredMDLs = false; + u8 discardCurrentDesc = false; + u32 currDescCookie; + + rxBufferWasCompleted = true; + + currDesc = rxQueue->rxCompleteVirt + + rxQueue->nextCompleteRemove; + index = currDesc->rxdIdx; + queueID = currDesc->rqID; + Shell_Log(state, 1, 2, "got queue %u index %u\n", queueID, + index); + BUG_ON(queueID != queueNum && + queueID != queueNum + state->numRxQueues); + firstRing = (queueID < state->numRxQueues) ? true : false; + + cmdRing = rxQueue->cmdRing + (firstRing ? 0 : 1); + currDescCookie = cmdRing->cookieOffset + index; + + /* reclaim any buffers that were skipped by device */ + while (cmdRing->nextCmdRemove != index) { + + Shell_FreeBuffer(state, handle, cmdRing->cookieOffset + + cmdRing->nextCmdRemove); + + cmdRing->nextCmdRemove = + ComputeRingIndex(cmdRing, + cmdRing->nextCmdRemove, 1); + } + /* + * If we got an SOP but have buffers from prior descriptors, + * then free them + */ + if (currDesc->sop && frame->sgLength > 0) + discardStoredMDLs = true; + + /* + * if we got non-sop, but we don't have prior MDLs, then skip + * this descriptor + */ + if (!currDesc->sop && frame->sgLength == 0) + discardCurrentDesc = true; + + /* + * if ran out of room to store frame, then discard prior and + * current desc + */ + if (frame->sgLength >= customState->maxSgLength) { + state->shellApi.log(2, "sgLength exceeded: %u %u\n", + frame->sgLength, + customState->maxSgLength); + Shell_Log(state, 1, 2, "sgLength exceeded: %u %u\n", + frame->sgLength, customState->maxSgLength); + discardStoredMDLs = true; + discardCurrentDesc = true; + } + + /* Make sure that err isn't set on non-eop frame */ + BUG_ON(!currDesc->eop && currDesc->err); + + if (currDesc->eop && currDesc->err) { + state->shellApi.log(1, "Got error on EOP descriptor: " + "fcs %u\n", currDesc->fcs); + Shell_Log(state, 1, 1, "Got error on EOP descriptor: " + "fcs %u\n", currDesc->fcs); + discardStoredMDLs = true; + discardCurrentDesc = true; + } + + /* + * if no length, then don't need to bother to add descriptor + * to frame + */ + if (currDesc->len == 0) + discardCurrentDesc = true; + + if (discardStoredMDLs) { + u32 i; + state->shellApi.log(0, "Discarding stored MDLs\n"); + Shell_Log(state, 1, 0, "Discarding stored MDLs\n"); + for (i = 0; i < frame->sgLength; ++i) { + Shell_FreeBuffer(state, handle, + frame->sg[i].ringOffset); + } + frame->sgLength = 0; + frame->byteLength = 0; + } + + if (discardCurrentDesc) { + Shell_FreeBuffer(state, handle, currDescCookie); + goto nextEntry; + } + + BUG_ON(frame->sgLength >= customState->maxSgLength); + + /* add MDL to list and set/increment the length */ + BUG_ON(currDesc->len <= 0); + frame->sg[frame->sgLength].ringOffset = currDescCookie; + frame->sg[frame->sgLength].length = currDesc->len; + frame->byteLength += currDesc->len; + ++frame->sgLength; + + if (currDesc->eop) { + if (currDesc->ts) { + frame->vlan = true; + frame->vlanTag = (u16)currDesc->tci; + } else { + frame->vlan = false; + frame->vlanTag = 0; + } + + if (currDesc->rssType != VMXNET3_RCD_RSS_TYPE_NONE) { + + frame->rssHashFunction = + SHELL_RECV_HASH_FUNCTION_TOEPLITZ; + frame->rssHashValue = currDesc->rssHash; + + switch (currDesc->rssType) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + frame->rssHashType = + SHELL_RECV_HASH_TYPE_IPV4; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + frame->rssHashType = + SHELL_RECV_HASH_TYPE_TCPIPV4; + break; + case VMXNET3_RCD_RSS_TYPE_IPV6: + frame->rssHashType = + SHELL_RECV_HASH_TYPE_IPV6; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + frame->rssHashType = + SHELL_RECV_HASH_TYPE_TCPIPV6; + break; + default: + BUG_ON(1); + frame->rssHashType = + SHELL_RECV_HASH_TYPE_NONE; + break; + } + } else { + frame->rssHashFunction = + SHELL_RECV_HASH_FUNCTION_NONE; + frame->rssHashValue = 0; + frame->rssHashType = SHELL_RECV_HASH_TYPE_NONE; + } + + /* + * check on V4 vs V6. Validity of bits is not based + * on CNC. + */ + if (currDesc->v4) { + frame->ipv4 = true; + frame->ipv6 = false; + frame->nonIp = false; + } else if (currDesc->v6) { + frame->ipv4 = false; + frame->ipv6 = true; + frame->nonIp = false; + } else { + frame->ipv4 = false; + frame->ipv6 = false; + frame->nonIp = true; + } + + /* + * check on TCP vs UDP. Validity of bits is not based + * on CNC, but on v4 or v6. + */ + if (currDesc->v4 || currDesc->v6) { + if (currDesc->tcp) { + frame->tcp = true; + frame->udp = false; + } else if (currDesc->udp) { + frame->tcp = false; + frame->udp = true; + } else { + frame->tcp = false; + frame->udp = false; + } + } else { + frame->tcp = false; + frame->udp = false; + } + + /* if checksum calculated */ + if (!currDesc->cnc) { + /* ignore csum and frg */ + if (currDesc->v4) { + if (currDesc->ipc) { + frame->ipXsum = + SHELL_XSUM_CORRECT; + } else { + frame->ipXsum = + SHELL_XSUM_INCORRECT; + } + } else { + frame->ipXsum = SHELL_XSUM_UNKNOWN; + } + + if (!currDesc->frg && + (currDesc->v4 || currDesc->v6)) { + if (currDesc->tcp) { + if (currDesc->tuc) { + frame->tcpXsum = + SHELL_XSUM_CORRECT; + } else { + frame->tcpXsum = + SHELL_XSUM_INCORRECT; + } + frame->udpXsum = + SHELL_XSUM_UNKNOWN; + } else if (currDesc->udp) { + if (currDesc->tuc) { + frame->udpXsum = + SHELL_XSUM_CORRECT; + } else { + frame->udpXsum = + SHELL_XSUM_INCORRECT; + } + frame->tcpXsum = + SHELL_XSUM_UNKNOWN; + } else { + frame->tcpXsum = + SHELL_XSUM_UNKNOWN; + frame->udpXsum = + SHELL_XSUM_UNKNOWN; + } + } else { /* ipv4 or ipv6 */ + frame->tcpXsum = SHELL_XSUM_UNKNOWN; + frame->udpXsum = SHELL_XSUM_UNKNOWN; + } + } else { /* cnc */ + frame->tcpXsum = SHELL_XSUM_UNKNOWN; + frame->udpXsum = SHELL_XSUM_UNKNOWN; + frame->ipXsum = SHELL_XSUM_UNKNOWN; + } + + ++packetsFound; + if (Shell_IndicateRecv(state, handle, frame) != 0) { + /* + * for now free buffers, since would + * need to handle case where the EOP + * descriptor is processed again the + * next time this poll function is + * called. + */ + u32 i; + for (i = 0; i < frame->sgLength; ++i) { + Shell_FreeBuffer(state, handle, + frame->sg[i].ringOffset); + } + /* breaks the loop cleanly */ + packetsFound = maxPackets; + } + frame->sgLength = 0; + frame->byteLength = 0; + } + +nextEntry: + + /* we processed this command descriptor, so move to the next */ + BUG_ON(index != cmdRing->nextCmdRemove); + cmdRing->nextCmdRemove = ComputeRingIndex(cmdRing, + cmdRing->nextCmdRemove, 1); + + /* we processed this completion desc, so move to the next */ + if (++rxQueue->nextCompleteRemove >= + rxQueue->ringCompleteSize) { + rxQueue->nextCompleteRemove = 0; + VMXNET3_FLIP_RING_GEN(rxQueue->genComplete); + } + } + + return rxBufferWasCompleted == true ? 1 : 0; +} + + + +static u32 +Vmxnet3Plugin_CheckTxRing(struct Plugin_State *state, + u32 queueNum) +{ + struct Vmxnet3PluginCustomState *customState = + VMXNET3_PLUGIN_STATE(state); + struct Shell_TxQueueHandle *handle = state->txQueues[queueNum].handle; + struct Vmxnet3PluginTxQueue *txQueue = &customState->txQueues[queueNum]; + u32 numCompleted = 0; + u32 index; + u32 nextRemove; + + while (txQueue->txCompleteVirt[txQueue->nextCompleteRemove].gen == + txQueue->genComplete) { + BUG_ON(txQueue->txCompleteVirt[txQueue->nextCompleteRemove].rsvd + != 0); + BUG_ON(txQueue->txCompleteVirt[txQueue->nextCompleteRemove].type + != 0); + + index = txQueue->txCompleteVirt[ + txQueue->nextCompleteRemove].txdIdx; + BUG_ON(!txQueue->txCmdVirt[index].eop); + + ++numCompleted; + + nextRemove = index + 1; + if (nextRemove >= txQueue->ringSize) + nextRemove = 0; + + txQueue->nextCmdRemove = nextRemove; + + txQueue->nextCompleteRemove++; + if (txQueue->nextCompleteRemove >= txQueue->ringSize) { + txQueue->nextCompleteRemove = 0; + VMXNET3_FLIP_RING_GEN(txQueue->genComplete); + } + } + + if (numCompleted > 0) { + Shell_Log(state, 1, 1, "numCompleted: %u\n", numCompleted); + Shell_CompleteSend(state, handle, numCompleted); + } + + return 0; +} + +static u32 +Vmxnet3Plugin_AddFrameToTxRing(struct Plugin_State *state, + u32 queueNum, + const struct Plugin_SendInfo *info, + const struct Plugin_SgList *frame, + bool lastFrame) +{ + struct Vmxnet3PluginCustomState *customState = + VMXNET3_PLUGIN_STATE(state); + struct Vmxnet3PluginTxQueue *txQueue = &customState->txQueues[queueNum]; + u32 bytesRemainInFrame = frame->totalLength; + struct Vmxnet3_TxDesc descTemplate = {0}; + /* can't update nextCmdInsert until success */ + u32 insertOffset = txQueue->nextCmdInsert; + /* firstDesc[GenBit] used to set the gen bit as the last operation */ + struct Vmxnet3_TxDesc *firstDesc = txQueue->txCmdVirt + insertOffset; + u8 firstDescGenBit = txQueue->genCmd; + const struct Plugin_SgElement *currSg = frame->elements; + u32 currSgOffset = 0; + /* can't update genCmd until success */ + u8 currentGen = txQueue->genCmd; + + /* set up a template descriptor used for all entries for the frame */ + descTemplate.gen = !currentGen; /* start with "wrong" generation */ + if (info->vlan) { + descTemplate.ti = 1; + descTemplate.tci = info->vlanTag; + } + + if (info->tso) { + descTemplate.msscof = info->tsoMss; + descTemplate.om = VMXNET3_OM_TSO; + /* end of tcp header */ + descTemplate.hlen = (u16)info->l4DataOffset; + } else if (info->xsumTcpOrUdp) { + descTemplate.msscof = info->l4HeaderOffset + (info->tcp ? + TCP_CSUM_OFFSET : + UDP_CSUM_OFFSET); + descTemplate.om = VMXNET3_OM_CSUM; + /* end of ip header */ + descTemplate.hlen = (u16)info->l4HeaderOffset; + } + + /* loop to stick buffers in the ring */ + while (bytesRemainInFrame) { + struct Vmxnet3_TxDesc *currDesc = txQueue->txCmdVirt + + insertOffset; + u32 nextOffset; + u32 bytesInSg; + + /* make sure we always leave at least one empty + descriptor when the ring get full */ + nextOffset = insertOffset + 1; + if (nextOffset >= txQueue->ringSize) + nextOffset = 0; + + if (nextOffset == txQueue->nextCmdRemove) { + Shell_Log(state, 4, 2, + "full ring since nextOffset %u == " + "txQueue->nextCmdRemove %u\n", + nextOffset, txQueue->nextCmdRemove); + break; + } + + /* copy the template and patch in the address/length info */ + MoveMemory(currDesc, &descTemplate, sizeof descTemplate); + + currDesc->addr = currSg->pa + currSgOffset; + bytesInSg = currSg->length - currSgOffset; + + if (bytesInSg < VMXNET3_MAX_TX_BUF_SIZE) { + currDesc->len = bytesInSg; + ++currSg; + currSgOffset = 0; + } else { + currDesc->len = 0; + if (bytesInSg == VMXNET3_MAX_TX_BUF_SIZE) { + ++currSg; + currSgOffset = 0; + } else { + /* don't advance to next SG element */ + currSgOffset += VMXNET3_MAX_TX_BUF_SIZE; + } + bytesRemainInFrame -= VMXNET3_MAX_TX_BUF_SIZE; + } + + bytesRemainInFrame -= currDesc->len; + + /* set EOP/CQ in the last descriptor */ + if (bytesRemainInFrame == 0) { + currDesc->eop = 1; + currDesc->cq = 1; + } + + /* write gen in all descriptors but the first one */ + if (currDesc != firstDesc) + currDesc->gen = currentGen; + + Shell_Log(state, 4, 4, + "txdesc[%u] sgOffset: %u len: %u gen: %u\n", + insertOffset, currSgOffset, + currDesc->len, currDesc->gen); + + /* advance to the next desc */ + ++insertOffset; + if (insertOffset >= txQueue->ringSize) { + insertOffset = 0; + /* update with new "wrong" generation */ + descTemplate.gen = currentGen; + VMXNET3_FLIP_RING_GEN(currentGen); + } + } + + /* if frame successfully added, then update locations */ + if (bytesRemainInFrame == 0) { + /* set the correct gen bit of the first descriptor */ + firstDesc->gen = firstDescGenBit; + + /* update state stored in tx queue */ + txQueue->nextCmdInsert = insertOffset; + txQueue->genCmd = currentGen; + } + + /* + * Update the device register when we're told it's the + * last frame. The assumption/expectation is that for + * non-vmxnet3 plugs 'lastFrame' will really be based + * on the last frame, whereas for the vmxnet3 plugin the + * shell will use the usual vmxnet3 logic/interaction + * with the shared memory and use 'lastFrame' to tell + * us if we should touch the device register. + * It might be more strightforward for the shell to + * just touch it for for plugin. + * + * Also update the register when we run out of + * descriptor. This may force the device to process packets. + */ + + if ((lastFrame || bytesRemainInFrame != 0) && + txQueue->hwCmdInsert != txQueue->nextCmdInsert) { + VMXNET3_WRITE_REG(state, txQueue->txProdOffset, + txQueue->nextCmdInsert); + txQueue->hwCmdInsert = txQueue->nextCmdInsert; + } + + return (bytesRemainInFrame == 0) ? 0 : 1; +} + + +static u32 +Vmxnet3Plugin_EnableInterrupt(struct Plugin_State *state, + u32 messageIndex) +{ + VMXNET3_WRITE_REG(state, VMXNET3_REG_IMR + messageIndex * 8, 0); + return 0; +} + + +static u32 +Vmxnet3Plugin_DisableInterrupt(struct Plugin_State *state, + u32 messageIndex) +{ + VMXNET3_WRITE_REG(state, VMXNET3_REG_IMR + messageIndex * 8, 1); + return 0; +} + + +u32 +NPA_PluginMain(struct Plugin_Api *pluginApi) +{ + pluginApi->swInit = Vmxnet3Plugin_SwInit; + pluginApi->reinitRxRing = Vmxnet3Plugin_ReinitRxRing; + pluginApi->reinitTxRing = Vmxnet3Plugin_ReinitTxRing; + pluginApi->addBuffersToRxRing = Vmxnet3Plugin_AddBuffersToRxRing; + pluginApi->addFrameToTxRing = Vmxnet3Plugin_AddFrameToTxRing; + pluginApi->checkRxRing = Vmxnet3Plugin_CheckRxRing; + pluginApi->checkTxRing = Vmxnet3Plugin_CheckTxRing; + pluginApi->enableInterrupt = Vmxnet3Plugin_EnableInterrupt; + pluginApi->disableInterrupt = Vmxnet3Plugin_DisableInterrupt; + return 0; +} _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/virtualization