Skip to content

Commit 74ffbd2

Browse files
committed
Merge branch 'master' into gh
2 parents 0b2fee6 + 479705b commit 74ffbd2

4 files changed

Lines changed: 67 additions & 74 deletions

File tree

share/man/man4/netmap.4

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,7 @@ can be delayed indefinitely.
498498
This flag helps detect
499499
when packets have been sent and a file descriptor can be closed.
500500
.It NS_FORWARD
501-
When a ring is in 'transparent' mode (see
502-
.Sx TRANSPARENT MODE ) ,
501+
When a ring is in 'transparent' mode,
503502
packets marked with this flag are forwarded to the other endpoint
504503
at the next system call, thus restoring (in a selective way)
505504
the connection between a NIC and the host stack.
@@ -958,7 +957,7 @@ rates, and use multiple send/receive threads and cores.
958957
is another test program which interconnects two
959958
.Nm
960959
ports.
961-
It can be used for transparent forwarding between
960+
It can be used for zero-copy forwarding between
962961
interfaces, as in
963962
.Dl bridge -i ix0 -i ix1
964963
or even connect the NIC to the host stack using netmap

sys/dev/netmap/netmap.c

Lines changed: 64 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ int netmap_mitigate = 1;
484484
int netmap_no_pendintr = 1;
485485
int netmap_txsync_retry = 2;
486486
int netmap_flags = 0; /* debug flags */
487-
static int netmap_fwd = 0; /* force transparent mode */
487+
static int netmap_fwd = 0; /* force transparent forwarding */
488488

489489
/*
490490
* netmap_admode selects the netmap mode to use.
@@ -1041,20 +1041,27 @@ netmap_dtor(void *data)
10411041
}
10421042

10431043

1044-
1045-
10461044
/*
1047-
* Handlers for synchronization of the queues from/to the host.
1048-
* Netmap has two operating modes:
1049-
* - in the default mode, the rings connected to the host stack are
1050-
* just another ring pair managed by userspace;
1051-
* - in transparent mode (XXX to be defined) incoming packets
1052-
* (from the host or the NIC) are marked as NS_FORWARD upon
1053-
* arrival, and the user application has a chance to reset the
1054-
* flag for packets that should be dropped.
1055-
* On the RXSYNC or poll(), packets in RX rings between
1056-
* kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1057-
* to the other side.
1045+
* Handlers for synchronization of the rings from/to the host stack.
1046+
* These are associated to a network interface and are just another
1047+
* ring pair managed by userspace.
1048+
*
1049+
* Netmap also supports transparent forwarding (NS_FOWARD and NR_FOWARD
1050+
* flags):
1051+
*
1052+
* - Before releasing buffers on hw RX rings, the application can mark
1053+
* them with the NS_FOWARD flag. During the next RXSYNC or poll(), they
1054+
* will be forwarded to the host stack, similarly to what happened if
1055+
* the application moved them to the host TX ring.
1056+
*
1057+
* - Before releasing buffers on the host RX ring, the application can
1058+
* mark them with the NS_FOWARD flag. During the next RXSYNC or poll(),
1059+
* they will be forwarded to the hw TX rings, saving the application
1060+
* from doing the same task in user-space.
1061+
*
1062+
* Transparent fowarding can be enabled per-ring, by setting the NR_FOWARD
1063+
* flag, or globally with the netmap_fwd sysctl.
1064+
*
10581065
* The transfer NIC --> host is relatively easy, just encapsulate
10591066
* into mbufs and we are done. The host --> NIC side is slightly
10601067
* harder because there might not be room in the tx ring so it
@@ -1063,16 +1070,18 @@ netmap_dtor(void *data)
10631070

10641071

10651072
/*
1066-
* pass a chain of buffers to the host stack as coming from 'dst'
1073+
* Pass a whole queue of mbufs to the host stack as coming from 'dst'
10671074
* We do not need to lock because the queue is private.
1075+
* After this call the queue is empty.
10681076
*/
10691077
static void
10701078
netmap_send_up(struct ifnet *dst, struct mbq *q)
10711079
{
10721080
struct mbuf *m;
10731081
struct mbuf *head = NULL, *prev = NULL;
10741082

1075-
/* send packets up, outside the lock */
1083+
/* Send packets up, outside the lock; head/prev machinery
1084+
* is only useful for Windows. */
10761085
while ((m = mbq_dequeue(q)) != NULL) {
10771086
if (netmap_verbose & NM_VERB_HOST)
10781087
D("sending up pkt %p size %d", m, MBUF_LEN(m));
@@ -1087,9 +1096,9 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
10871096

10881097

10891098
/*
1090-
* put a copy of the buffers marked NS_FORWARD into an mbuf chain.
1091-
* Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
1092-
* and pass them up. Drop remaining packets in the unlikely event
1099+
* Scan the buffers from hwcur to ring->head, and put a copy of those
1100+
* marked NS_FORWARD (or all of them if forced) into a queue of mbufs.
1101+
* Drop remaining packets in the unlikely event
10931102
* of an mbuf shortage.
10941103
*/
10951104
static void
@@ -1159,6 +1168,8 @@ netmap_sw_to_nic(struct netmap_adapter *na)
11591168

11601169
/* scan rings to find space, then fill as much as possible */
11611170
for (i = 0; i < na->num_tx_rings; i++) {
1171+
/* XXX some krings may not be in netmap mode,
1172+
* buffers may not be there */
11621173
struct netmap_kring *kdst = &na->tx_rings[i];
11631174
struct netmap_ring *rdst = kdst->ring;
11641175
u_int const dst_lim = kdst->nkr_num_slots - 1;
@@ -1186,6 +1197,8 @@ netmap_sw_to_nic(struct netmap_adapter *na)
11861197
dst->len = tmp.len;
11871198
dst->flags = NS_BUF_CHANGED;
11881199

1200+
/* XXX is it safe to write head/cur concurrently to
1201+
* the userspace application? */
11891202
rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
11901203
}
11911204
/* if (sent) XXX txsync ? */
@@ -1209,9 +1222,7 @@ netmap_txsync_to_host(struct netmap_kring *kring, int flags)
12091222
struct mbq q;
12101223

12111224
/* Take packets from hwcur to head and pass them up.
1212-
* force head = cur since netmap_grab_packets() stops at head
1213-
* In case of no buffers we give up. At the end of the loop,
1214-
* the queue is drained in all cases.
1225+
* Force hwcur = head since netmap_grab_packets() stops at head
12151226
*/
12161227
mbq_init(&q);
12171228
netmap_grab_packets(kring, &q, 1 /* force */);
@@ -1259,7 +1270,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
12591270
uint32_t stop_i;
12601271

12611272
nm_i = kring->nr_hwtail;
1262-
stop_i = nm_prev(nm_i, lim);
1273+
stop_i = nm_prev(kring->nr_hwcur, lim);
12631274
while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
12641275
int len = MBUF_LEN(m);
12651276
struct netmap_slot *slot = &ring->slot[nm_i];
@@ -2481,9 +2492,11 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
24812492
*/
24822493
int retry_tx = 1, retry_rx = 1;
24832494

2484-
/* transparent mode: send_down is 1 if we have found some
2485-
* packets to forward during the rx scan and we have not
2486-
* sent them down to the nic yet
2495+
/* Transparent mode: send_down is 1 if we have found some
2496+
* packets to forward (host RX ring --> NIC) during the rx
2497+
* scan and we have not sent them down to the NIC yet.
2498+
* Transparent mode requires to bind all rings to a single
2499+
* file descriptor.
24872500
*/
24882501
int send_down = 0;
24892502

@@ -2648,21 +2661,24 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
26482661
/* now we can use kring->rcur, rtail */
26492662

26502663
/*
2651-
* transparent mode support: collect packets
2652-
* from the rxring(s).
2664+
* transparent mode support: collect packets from
2665+
* hw rxring(s) that have been released by the user
26532666
*/
26542667
if (nm_may_forward_up(kring)) {
2655-
ND(10, "forwarding some buffers up %d to %d",
2656-
kring->nr_hwcur, ring->cur);
2668+
ND(2, "forwarding some buffers up %d to %d",
2669+
kring->nr_hwcur, ring->cur);
26572670
netmap_grab_packets(kring, &q, netmap_fwd);
26582671
}
26592672

2673+
/* Clear the NR_FORWARD flag anyway, it may be set by
2674+
* the nm_sync() below only on for the host RX ring (see
2675+
* netmap_rxsync_from_host()). */
26602676
kring->nr_kflags &= ~NR_FORWARD;
26612677
if (kring->nm_sync(kring, 0))
26622678
revents |= POLLERR;
26632679
else
26642680
nm_sync_finalize(kring);
2665-
send_down |= (kring->nr_kflags & NR_FORWARD); /* host ring only */
2681+
send_down |= (kring->nr_kflags & NR_FORWARD);
26662682
if (netmap_no_timestamp == 0 ||
26672683
ring->flags & NR_TIMESTAMP) {
26682684
microtime(&ring->ts);
@@ -2680,7 +2696,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
26802696
nm_os_selrecord(sr, check_all_rx ?
26812697
&na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
26822698
}
2683-
if (send_down > 0 || retry_rx) {
2699+
if (send_down || retry_rx) {
26842700
retry_rx = 0;
26852701
if (send_down)
26862702
goto flush_tx; /* and retry_rx */
@@ -2690,17 +2706,13 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
26902706
}
26912707

26922708
/*
2693-
* Transparent mode: marked bufs on rx rings between
2694-
* kring->nr_hwcur and ring->head
2695-
* are passed to the other endpoint.
2696-
*
2697-
* Transparent mode requires to bind all
2698-
* rings to a single file descriptor.
2709+
* Transparent mode: released bufs (i.e. between kring->nr_hwcur and
2710+
* ring->head) marked with NS_FORWARD on hw rx rings are passed up
2711+
* to the host stack.
26992712
*/
27002713

2701-
if (q.head && !nm_kr_tryget(&na->tx_rings[na->num_tx_rings], 1, &revents)) {
2714+
if (mbq_peek(&q)) {
27022715
netmap_send_up(na->ifp, &q);
2703-
nm_kr_put(&na->tx_rings[na->num_tx_rings]);
27042716
}
27052717

27062718
return (revents);
@@ -2729,22 +2741,6 @@ netmap_notify(struct netmap_kring *kring, int flags)
27292741
return NM_IRQ_COMPLETED;
27302742
}
27312743

2732-
#if 0
2733-
static int
2734-
netmap_notify(struct netmap_adapter *na, u_int n_ring,
2735-
enum txrx tx, int flags)
2736-
{
2737-
if (tx == NR_TX) {
2738-
KeSetEvent(notes->TX_EVENT, 0, FALSE);
2739-
}
2740-
else
2741-
{
2742-
KeSetEvent(notes->RX_EVENT, 0, FALSE);
2743-
}
2744-
return 0;
2745-
}
2746-
#endif
2747-
27482744
/* called by all routines that create netmap_adapters.
27492745
* provide some defaults and get a reference to the
27502746
* memory allocator
@@ -3064,7 +3060,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
30643060
u_int error = ENOBUFS;
30653061
unsigned int txr;
30663062
struct mbq *q;
3067-
int space;
3063+
int busy;
30683064

30693065
kring = &na->rx_rings[na->num_rx_rings];
30703066
// XXX [Linux] we do not need this lock
@@ -3097,28 +3093,27 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
30973093
}
30983094

30993095
if (nm_os_mbuf_has_offld(m)) {
3100-
RD(1, "%s drop mbuf requiring offloadings", na->name);
3096+
RD(1, "%s drop mbuf that needs offloadings", na->name);
31013097
goto done;
31023098
}
31033099

3104-
/* protect against rxsync_from_host(), netmap_sw_to_nic()
3100+
/* protect against netmap_rxsync_from_host(), netmap_sw_to_nic()
31053101
* and maybe other instances of netmap_transmit (the latter
31063102
* not possible on Linux).
3107-
* Also avoid overflowing the queue.
3103+
* We enqueue the mbuf only if we are sure there is going to be
3104+
* enough room in the host RX ring, otherwise we drop it.
31083105
*/
31093106
mbq_lock(q);
31103107

3111-
space = kring->nr_hwtail - kring->nr_hwcur;
3112-
if (space < 0)
3113-
space += kring->nkr_num_slots;
3114-
if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
3115-
RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
3116-
na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
3117-
len, m);
3108+
busy = kring->nr_hwtail - kring->nr_hwcur;
3109+
if (busy < 0)
3110+
busy += kring->nkr_num_slots;
3111+
if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
3112+
RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
3113+
kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
31183114
} else {
31193115
mbq_enqueue(q, m);
3120-
ND(10, "%s %d bufs in queue len %d m %p",
3121-
na->name, mbq_len(q), len, m);
3116+
ND(2, "%s %d bufs in queue", na->name, mbq_len(q));
31223117
/* notify outside the lock */
31233118
m = NULL;
31243119
error = 0;

sys/dev/netmap/netmap_freebsd.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
253253
void *
254254
nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
255255
{
256-
257256
NA(ifp)->if_input(ifp, m);
258257
return NULL;
259258
}

sys/dev/netmap/netmap_mbq.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void mbq_purge(struct mbq *q);
6767
static inline struct mbuf *
6868
mbq_peek(struct mbq *q)
6969
{
70-
return q->head ? q->head : NULL;
70+
return q->head;
7171
}
7272

7373
static inline void

0 commit comments

Comments
 (0)