From 36cbd3dcc10384f813ec0814255f576c84f2bcd4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 5 Aug 2009 10:42:58 -0700 Subject: net: mark read-only arrays as const String literals are constant, and usually, we can also tag the array of pointers const too, moving it to the .rodata section. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- net/sctp/debug.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 7ff548a30cfb..bf24fa697de2 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -52,7 +52,7 @@ int sctp_debug_flag = 1; /* Initially enable DEBUG */ #endif /* SCTP_DEBUG */ /* These are printable forms of Chunk ID's from section 3.1. */ -static const char *sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { +static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { "DATA", "INIT", "INIT_ACK", @@ -97,7 +97,7 @@ const char *sctp_cname(const sctp_subtype_t cid) } /* These are printable forms of the states. */ -const char *sctp_state_tbl[SCTP_STATE_NUM_STATES] = { +const char *const sctp_state_tbl[SCTP_STATE_NUM_STATES] = { "STATE_EMPTY", "STATE_CLOSED", "STATE_COOKIE_WAIT", @@ -110,7 +110,7 @@ const char *sctp_state_tbl[SCTP_STATE_NUM_STATES] = { }; /* Events that could change the state of an association. */ -const char *sctp_evttype_tbl[] = { +const char *const sctp_evttype_tbl[] = { "EVENT_T_unknown", "EVENT_T_CHUNK", "EVENT_T_TIMEOUT", @@ -119,7 +119,7 @@ const char *sctp_evttype_tbl[] = { }; /* Return value of a state function */ -const char *sctp_status_tbl[] = { +const char *const sctp_status_tbl[] = { "DISPOSITION_DISCARD", "DISPOSITION_CONSUME", "DISPOSITION_NOMEM", @@ -132,7 +132,7 @@ const char *sctp_status_tbl[] = { }; /* Printable forms of primitives */ -static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { +static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { "PRIMITIVE_ASSOCIATE", "PRIMITIVE_SHUTDOWN", "PRIMITIVE_ABORT", @@ -149,7 +149,7 @@ const char *sctp_pname(const sctp_subtype_t id) return "unknown_primitive"; } -static const char *sctp_other_tbl[] = { +static const char *const sctp_other_tbl[] = { "NO_PENDING_TSN", "ICMP_PROTO_UNREACH", }; @@ -162,7 +162,7 @@ const char *sctp_oname(const sctp_subtype_t id) return "unknown 'other' event"; } -static const char *sctp_timer_tbl[] = { +static const char *const sctp_timer_tbl[] = { "TIMEOUT_NONE", "TIMEOUT_T1_COOKIE", "TIMEOUT_T1_INIT", -- cgit v1.2.3 From 425e0f685230986511b1fdf80340e2f28b214c5d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 16 Jun 2009 14:47:30 +0800 Subject: sctp: avoid overwrite the return value of sctp_process_asconf_ack() The return value of sctp_process_asconf_ack() may be overwritten while process parameters with no error. This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 61cc6075b0df..b7acc9ca793a 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3104,7 +3104,7 @@ done: } /* Process a asconf parameter that is successfully acked. */ -static int sctp_asconf_param_success(struct sctp_association *asoc, +static void sctp_asconf_param_success(struct sctp_association *asoc, sctp_addip_param_t *asconf_param) { struct sctp_af *af; @@ -3113,7 +3113,6 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, union sctp_addr_param *addr_param; struct sctp_transport *transport; struct sctp_sockaddr_entry *saddr; - int retval = 0; addr_param = (union sctp_addr_param *) ((void *)asconf_param + sizeof(sctp_addip_param_t)); @@ -3136,7 +3135,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, break; case SCTP_PARAM_DEL_IP: local_bh_disable(); - retval = sctp_del_bind_addr(bp, &addr); + sctp_del_bind_addr(bp, &addr); local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { @@ -3148,8 +3147,6 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, default: break; } - - return retval; } /* Get the corresponding ASCONF response error code from the ASCONF_ACK chunk @@ -3266,7 +3263,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, switch (err_code) { case SCTP_ERROR_NO_ERROR: - retval = sctp_asconf_param_success(asoc, asconf_param); + sctp_asconf_param_success(asoc, asconf_param); break; case SCTP_ERROR_RSRC_LOW: -- cgit v1.2.3 From 44e65c1ef1e771b32c82546ebfba910137aa8871 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 16 Jun 2009 14:48:24 +0800 Subject: sctp: check the unrecognized ASCONF parameter before access it This patch fix to check the unrecognized ASCONF parameter before access it. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b7acc9ca793a..3d867ce0e5ec 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2861,6 +2861,11 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, addr_param = (union sctp_addr_param *) ((void *)asconf_param + sizeof(sctp_addip_param_t)); + if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && + asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && + asconf_param->param_hdr.type != SCTP_PARAM_SET_PRIMARY) + return SCTP_ERROR_UNKNOWN_PARAM; + switch (addr_param->v4.param_hdr.type) { case SCTP_PARAM_IPV6_ADDRESS: if (!asoc->peer.ipv6_address) @@ -2958,9 +2963,6 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, sctp_assoc_set_primary(asoc, peer); break; - default: - return SCTP_ERROR_UNKNOWN_PARAM; - break; } return SCTP_ERROR_NO_ERROR; -- cgit v1.2.3 From 3cd9749c0b758223a71e059fa44c2234547d9ee0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 16 Jun 2009 10:07:23 +0800 Subject: sctp: update the route for non-active transports after addresses are added Update the route and saddr entries for the non-active transports as some of the added addresses can be used as better source addresses, or may be there is a better route. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 3d867ce0e5ec..9d881a61ac02 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3134,6 +3134,14 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, saddr->state = SCTP_ADDR_SRC; } local_bh_enable(); + list_for_each_entry(transport, &asoc->peer.transport_addr_list, + transports) { + if (transport->state == SCTP_ACTIVE) + continue; + dst_release(transport->dst); + sctp_transport_route(transport, NULL, + sctp_sk(asoc->base.sk)); + } break; case SCTP_PARAM_DEL_IP: local_bh_disable(); -- cgit v1.2.3 From 40187886bc31aee9c5c6f08f46cde4ab618e9736 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 Jun 2009 11:28:05 -0400 Subject: sctp: release cached route when the transport goes down. When the sctp transport is marked down, we can release the cached route and force a new lookup when attempting to use this transport for anything. This way, if a better route or source address is available, we'll try to use it. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 525864bf4f07..215b56951d76 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -810,11 +810,16 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, break; case SCTP_TRANSPORT_DOWN: - /* if the transort was never confirmed, do not transition it - * to inactive state. + /* If the transport was never confirmed, do not transition it + * to inactive state. Also, release the cached route since + * there may be a better route next time. */ if (transport->state != SCTP_UNCONFIRMED) transport->state = SCTP_INACTIVE; + else { + dst_release(transport->dst); + transport->dst = NULL; + } spc_state = SCTP_ADDR_UNREACHABLE; break; -- cgit v1.2.3 From af87b823ca2b05257192e8d48dc686db6173d7b2 Mon Sep 17 00:00:00 2001 From: Doug Graham Date: Wed, 29 Jul 2009 12:05:57 -0400 Subject: sctp: Fix piggybacked ACKs This patch corrects the conditions under which a SACK will be piggybacked on a DATA packet. The previous condition was incorrect due to a misinterpretation of RFC 4960 and/or RFC 2960. Specifically, the following paragraph from section 6.2 had not been implemented correctly: Before an endpoint transmits a DATA chunk, if any received DATA chunks have not been acknowledged (e.g., due to delayed ack), the sender should create a SACK and bundle it with the outbound DATA chunk, as long as the size of the final SCTP packet does not exceed the current MTU. See Section 6.2. When about to send a DATA chunk, the code now checks to see if the SACK timer is running. If it is, we know we have a SACK to send to the peer, so we append the SACK (assuming available space in the packet) and turn off the timer. For a simple request-response scenario, this will result in the SACK being bundled with the response, meaning the the SACK is received quickly by the client, and also meaning that no separate SACK packet needs to be sent by the server to acknowledge the request. Prior to this patch, a separate SACK packet would have been sent by the server SCTP only after its delayed-ACK timer had expired (usually 200ms). This is wasteful of bandwidth, and can also have a major negative impact on performance due the interaction of delayed ACKs with the Nagle algorithm. Signed-off-by: Doug Graham Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index b94c21190566..94c110dcaf1d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -234,18 +234,19 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, if (sctp_chunk_is_data(chunk) && !pkt->has_sack && !pkt->has_cookie_echo) { struct sctp_association *asoc; + struct timer_list *timer; asoc = pkt->transport->asoc; + timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (asoc->a_rwnd > asoc->rwnd) { + /* If the SACK timer is running, we have a pending SACK */ + if (timer_pending(timer)) { struct sctp_chunk *sack; asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { - struct timer_list *timer; retval = sctp_packet_append_chunk(pkt, sack); asoc->peer.sack_needed = 0; - timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); } } -- cgit v1.2.3 From bec9640bb0d451813b1bb1f2cc13a5bfb17c3e48 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 30 Jul 2009 18:08:28 -0400 Subject: sctp: Disallow new connection on a closing socket If a socket has a lot of association that are in the process of of being closed/aborted, it is possible for a remote to establish new associations during the time period that the old ones are shutting down. If this was a result of a close() call, there will be no socket and will cause a memory leak. We'll prevent this by setting the socket state to CLOSING and disallow new associations when in this state. Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 9 +++++++++ net/sctp/socket.c | 1 + 2 files changed, 10 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7288192f7df5..50225dd2e6dc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -334,6 +334,15 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + /* If the INIT is coming toward a closing socket, we'll send back + * and ABORT. Essentially, this catches the race of INIT being + * backloged to the socket at the same time as the user isses close(). + * Since the socket and all its associations are going away, we + * can treat this OOTB + */ + if (sctp_sstate(ep->base.sk, CLOSING)) + return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 971890dbfea0..a7e544e3f28a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1361,6 +1361,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) sctp_lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; + sk->sk_state = SCTP_SS_CLOSING; ep = sctp_sk(sk)->ep; -- cgit v1.2.3 From 3e62abf92f34d75fe22352d8d102e3cd2755804d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:56 -0400 Subject: sctp: Fix data segmentation with small frag_size Since an application may specify the maximum SCTP fragment size that all data should be fragmented to, we need to fix how we do segmentation. Right now, if a user specifies a small fragment size, the segment size can go negative in the presence of AUTH or COOKIE_ECHO bundling. What we need to do is track the largest possbile DATA chunk that can fit into the mtu. Then if the fragment size specified is bigger then this maximum length, we'll shrink it down. Otherwise, we just use the smaller segment size without changing it further. Signed-off-by: Vlad Yasevich --- net/sctp/chunk.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 1748ef90950c..9292294dbc12 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -158,6 +158,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, { int max, whole, i, offset, over, err; int len, first_len; + int max_data; struct sctp_chunk *chunk; struct sctp_datamsg *msg; struct list_head *pos, *temp; @@ -179,8 +180,14 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, __func__, msg, msg->expires_at, jiffies); } - max = asoc->frag_point; + /* This is the biggest possible DATA chunk that can fit into + * the packet + */ + max_data = asoc->pathmtu - + sctp_sk(asoc->base.sk)->pf->af->net_header_len - + sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); + max = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks * we need to accound for bundling of the AUTH chunks along with * DATA. @@ -189,23 +196,30 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + + max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + hmac_desc->hmac_len); } + /* Now, check if we need to reduce our max */ + if (max > max_data) + max = max_data; + whole = 0; first_len = max; /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) { - whole = msg_len / (max - SCTP_ARBITRARY_COOKIE_ECHO_LEN); + max_data -= SCTP_ARBITRARY_COOKIE_ECHO_LEN; - /* Account for the DATA to be bundled with the COOKIE-ECHO. */ - if (whole) { - first_len = max - SCTP_ARBITRARY_COOKIE_ECHO_LEN; - msg_len -= first_len; - whole = 1; - } + /* This is the biggesr first_len we can have */ + if (first_len > max_data) + first_len = max_data; + } + + /* Account for a different sized first fragment */ + if (msg_len >= first_len) { + msg_len -= first_len; + whole = 1; } /* How many full sized? How many bytes leftover? */ -- cgit v1.2.3 From e83963b769a2c38b436f5dcf82309f5cbc2f6012 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 7 Aug 2009 10:43:07 -0400 Subject: sctp: Generate SACKs when actually sending outbound DATA We are now trying to bundle SACKs when we have outbound DATA to send. However, there are situations where this outbound DATA will not be sent (due to congestion or available window). In such cases it's ok to wait for the timer to expire. This patch refactors the sending code so that betfore attempting to bundle the SACK we check to see if the DATA will actually be transmitted. Based on eirlier works for Doug Graham and Wei Youngjun . Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 142 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 57 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index 94c110dcaf1d..e25e2e20b63d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -61,8 +61,13 @@ #include /* Forward declarations for private helpers. */ -static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, +static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk); +static void sctp_packet_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk); +static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len); /* Config a packet. * This appears to be a followup set of initializations. @@ -262,13 +267,20 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, { sctp_xmit_t retval = SCTP_XMIT_OK; __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); - size_t psize; - size_t pmtu; - int too_big; SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, packet, chunk); + /* Data chunks are special. Before seeing what else we can + * bundle into this packet, check to see if we are allowed to + * send this DATA. + */ + if (sctp_chunk_is_data(chunk)) { + retval = sctp_packet_can_append_data(packet, chunk); + if (retval != SCTP_XMIT_OK) + goto finish; + } + /* Try to bundle AUTH chunk */ retval = sctp_packet_bundle_auth(packet, chunk); if (retval != SCTP_XMIT_OK) @@ -279,51 +291,16 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, if (retval != SCTP_XMIT_OK) goto finish; - psize = packet->size; - pmtu = ((packet->transport->asoc) ? - (packet->transport->asoc->pathmtu) : - (packet->transport->pathmtu)); - - too_big = (psize + chunk_len > pmtu); - - /* Decide if we need to fragment or resubmit later. */ - if (too_big) { - /* It's OK to fragmet at IP level if any one of the following - * is true: - * 1. The packet is empty (meaning this chunk is greater - * the MTU) - * 2. The chunk we are adding is a control chunk - * 3. The packet doesn't have any data in it yet and data - * requires authentication. - */ - if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || - (!packet->has_data && chunk->auth)) { - /* We no longer do re-fragmentation. - * Just fragment at the IP layer, if we - * actually hit this condition - */ - packet->ipfragok = 1; - goto append; - - } else { - retval = SCTP_XMIT_PMTU_FULL; - goto finish; - } - } - -append: - /* We believe that this chunk is OK to add to the packet (as - * long as we have the cwnd for it). - */ + /* Check to see if this chunk will fit into the packet */ + retval = sctp_packet_will_fit(packet, chunk, chunk_len); + if (retval != SCTP_XMIT_OK) + goto finish; - /* DATA is a special case since we must examine both rwnd and cwnd - * before we send DATA. - */ + /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: - retval = sctp_packet_append_data(packet, chunk); - if (SCTP_XMIT_OK != retval) - goto finish; + /* Account for the data being in the packet */ + sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; /* Disallow AUTH bundling after DATA */ @@ -633,16 +610,15 @@ nomem: * 2nd Level Abstractions ********************************************************************/ -/* This private function handles the specifics of appending DATA chunks. */ -static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, +/* This private function check to see if a chunk can be added */ +static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - size_t datasize, rwnd, inflight; + size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; - struct sctp_sock *sp = sctp_sk(asoc->base.sk); struct sctp_outq *q = &asoc->outqueue; /* RFC 2960 6.1 Transmission of DATA Chunks @@ -659,7 +635,8 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, */ rwnd = asoc->peer.rwnd; - inflight = asoc->outqueue.outstanding_bytes; + inflight = q->outstanding_bytes; + flight_size = transport->flight_size; datasize = sctp_data_size(chunk); @@ -682,8 +659,8 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * cwnd = flightsize + Max.Burst * MTU */ max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = transport->flight_size + max_burst_bytes; + if ((flight_size + max_burst_bytes) < transport->cwnd) { + transport->cwnd = flight_size + max_burst_bytes; SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " "transport: %p, cwnd: %d, " "ssthresh: %d, flight_size: %d, " @@ -708,7 +685,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * ignore the value of cwnd and SHOULD NOT delay retransmission. */ if (chunk->fast_retransmit != SCTP_NEED_FRTX) - if (transport->flight_size >= transport->cwnd) { + if (flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; } @@ -718,8 +695,8 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * if any previously transmitted data on the connection remains * unacknowledged. */ - if (!sp->nodelay && sctp_packet_empty(packet) && - q->outstanding_bytes && sctp_state(asoc, ESTABLISHED)) { + if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && + inflight && sctp_state(asoc, ESTABLISHED)) { unsigned len = datasize + q->out_qlen; /* Check whether this chunk and all the rest of pending @@ -732,6 +709,19 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, } } +finish: + return retval; +} + +/* This private function does management things when adding DATA chunk */ +static void sctp_packet_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk) +{ + struct sctp_transport *transport = packet->transport; + size_t datasize = sctp_data_size(chunk); + struct sctp_association *asoc = transport->asoc; + u32 rwnd = asoc->peer.rwnd; + /* Keep track of how many bytes are in flight over this transport. */ transport->flight_size += datasize; @@ -754,7 +744,45 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, /* Has been accepted for transmission. */ if (!asoc->peer.prsctp_capable) chunk->msg->can_abandon = 0; +} + +static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len) +{ + size_t psize; + size_t pmtu; + int too_big; + sctp_xmit_t retval = SCTP_XMIT_OK; + + psize = packet->size; + pmtu = ((packet->transport->asoc) ? + (packet->transport->asoc->pathmtu) : + (packet->transport->pathmtu)); + + too_big = (psize + chunk_len > pmtu); + + /* Decide if we need to fragment or resubmit later. */ + if (too_big) { + /* It's OK to fragmet at IP level if any one of the following + * is true: + * 1. The packet is empty (meaning this chunk is greater + * the MTU) + * 2. The chunk we are adding is a control chunk + * 3. The packet doesn't have any data in it yet and data + * requires authentication. + */ + if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || + (!packet->has_data && chunk->auth)) { + /* We no longer do re-fragmentation. + * Just fragment at the IP layer, if we + * actually hit this condition + */ + packet->ipfragok = 1; + } else { + retval = SCTP_XMIT_PMTU_FULL; + } + } -finish: return retval; } -- cgit v1.2.3 From 5d7ff261ef497c62f54c39effc259910a28b313d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 7 Aug 2009 13:23:28 -0400 Subject: sctp: Try to encourage SACK bundling with DATA. If the association has a SACK timer pending and now DATA queued to be send, we'll try to bundle the SACK with the next application send. As such, try encourage bundling by accounting for SACK in the size of the first chunk fragment. Signed-off-by: Vlad Yasevich --- net/sctp/chunk.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 9292294dbc12..7acaf15679b6 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -207,14 +207,25 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, whole = 0; first_len = max; + /* Check to see if we have a pending SACK and try to let it be bundled + * with this message. Do this if we don't have any data queued already. + * To check that, look at out_qlen and retransmit list. + * NOTE: we will not reduce to account for SACK, if the message would + * not have been fragmented. + */ + if (timer_pending(&asoc->timers[SCTP_EVENT_TIMEOUT_SACK]) && + asoc->outqueue.out_qlen == 0 && + list_empty(&asoc->outqueue.retransmit) && + msg_len > max) + max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t)); + /* Encourage Cookie-ECHO bundling. */ - if (asoc->state < SCTP_STATE_COOKIE_ECHOED) { + if (asoc->state < SCTP_STATE_COOKIE_ECHOED) max_data -= SCTP_ARBITRARY_COOKIE_ECHO_LEN; - /* This is the biggesr first_len we can have */ - if (first_len > max_data) - first_len = max_data; - } + /* Now that we adjusted completely, reset first_len */ + if (first_len > max_data) + first_len = max_data; /* Account for a different sized first fragment */ if (msg_len >= first_len) { -- cgit v1.2.3 From 9c5c62be2f794c7cee533d856f9f34c3cf21ff1b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 10 Aug 2009 13:51:03 -0400 Subject: sctp: Send user messages to the lower layer as one Currenlty, sctp breaks up user messages into fragments and sends each fragment to the lower layer by itself. This means that for each fragment we go all the way down the stack and back up. This also discourages bundling of multiple fragments when they can fit into a sigle packet (ex: due to user setting a low fragmentation threashold). We introduce a new command SCTP_CMD_SND_MSG and hand the whole message down state machine. The state machine and the side-effect parser will cork the queue, add all chunks from the message to the queue, and then un-cork the queue thus causing the chunks to get transmitted. Signed-off-by: Vlad Yasevich --- net/sctp/chunk.c | 13 +++++++++++++ net/sctp/sm_sideeffect.c | 29 ++++++++++++++++++++++++++++- net/sctp/sm_statefuns.c | 4 ++-- net/sctp/socket.c | 26 ++++++++++++++------------ 4 files changed, 57 insertions(+), 15 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7acaf15679b6..645577ddc33e 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -73,6 +73,19 @@ SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) return msg; } +void sctp_datamsg_free(struct sctp_datamsg *msg) +{ + struct sctp_chunk *chunk; + + /* This doesn't have to be a _safe vairant because + * sctp_chunk_free() only drops the refs. + */ + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_chunk_free(chunk); + + sctp_datamsg_put(msg); +} + /* Final destructruction of datamsg memory. */ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 86426aac1600..238adf7978e9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -931,6 +931,27 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, } +/* Send the whole message, chunk by chunk, to the outqueue. + * This way the whole message is queued up and bundling if + * encouraged for small fragments. + */ +static int sctp_cmd_send_msg(struct sctp_association *asoc, + struct sctp_datamsg *msg) +{ + struct sctp_chunk *chunk; + int error = 0; + + list_for_each_entry(chunk, &msg->chunks, frag_list) { + error = sctp_outq_tail(&asoc->outqueue, chunk); + if (error) + break; + } + + return error; +} + + + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1575,7 +1596,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_UPDATE_INITTAG: asoc->peer.i.init_tag = cmd->obj.u32; break; - + case SCTP_CMD_SEND_MSG: + if (!asoc->outqueue.cork) { + sctp_outq_cork(&asoc->outqueue); + local_cork = 1; + } + error = sctp_cmd_send_msg(asoc, cmd->obj.msg); + break; default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 50225dd2e6dc..910926906a3a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4555,9 +4555,9 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; + struct sctp_datamsg *msg = arg; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(chunk)); + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_MSG, SCTP_DATAMSG(msg)); return SCTP_DISPOSITION_CONSUME; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a7e544e3f28a..95a5623d79a0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1814,20 +1814,22 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_set_owner_w(chunk); chunk->transport = chunk_tp; - - /* Send it to the lower layers. Note: all chunks - * must either fail or succeed. The lower layer - * works that way today. Keep it that way or this - * breaks. - */ - err = sctp_primitive_SEND(asoc, chunk); - /* Did the lower layer accept the chunk? */ - if (err) - sctp_chunk_free(chunk); - SCTP_DEBUG_PRINTK("We sent primitively.\n"); } - sctp_datamsg_put(datamsg); + /* Send it to the lower layers. Note: all chunks + * must either fail or succeed. The lower layer + * works that way today. Keep it that way or this + * breaks. + */ + err = sctp_primitive_SEND(asoc, datamsg); + /* Did the lower layer accept the chunk? */ + if (err) + sctp_datamsg_free(datamsg); + else + sctp_datamsg_put(datamsg); + + SCTP_DEBUG_PRINTK("We sent primitively.\n"); + if (err) goto out_free; else -- cgit v1.2.3 From a2f36eec5647548fa94fb68e2843b00fb9c0d46b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 22 Aug 2009 11:24:00 +0800 Subject: sctp: drop SHUTDOWN chunk if the TSN is less than the CTSN If Cumulative TSN Ack field of SHUTDOWN chunk is less than the Cumulative TSN Ack Point then drop the SHUTDOWN chunk. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 910926906a3a..73bdeb2b6c62 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2570,6 +2570,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, chunk->subh.shutdown_hdr = sdh; ctsn = ntohl(sdh->cum_tsn_ack); + if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { + SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); + SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; + } + /* If Cumulative TSN Ack beyond the max tsn currently * send, terminating the association and respond to the * sender with an ABORT. @@ -2633,6 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; sctp_shutdownhdr_t *sdh; + __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2644,12 +2651,19 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, commands); sdh = (sctp_shutdownhdr_t *)chunk->skb->data; + ctsn = ntohl(sdh->cum_tsn_ack); + + if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { + SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); + SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; + } /* If Cumulative TSN Ack beyond the max tsn currently * send, terminating the association and respond to the * sender with an ABORT. */ - if (!TSN_lt(ntohl(sdh->cum_tsn_ack), asoc->next_tsn)) + if (!TSN_lt(ctsn, asoc->next_tsn)) return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); /* verify, by checking the Cumulative TSN Ack field of the -- cgit v1.2.3 From dadb50cc1ada2906594df83d991f0bc388039bb6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 22 Aug 2009 11:27:37 +0800 Subject: sctp: fix check the chunk length of received HEARTBEAT-ACK chunk The receiver of the HEARTBEAT should respond with a HEARTBEAT ACK that contains the Heartbeat Information field copied from the received HEARTBEAT chunk. So the received HEARTBEAT-ACK chunk must have a length of: sizeof(sctp_chunkhdr_t) + sizeof(sctp_sender_hb_info_t) A badly formatted HB-ACK chunk, it is possible that we may access invalid memory. We should really make sure that the chunk format is what we expect, before attempting to touch the data. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 73bdeb2b6c62..7fb08a694917 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1115,7 +1115,8 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, return sctp_sf_pdiscard(ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) + + sizeof(sctp_sender_hb_info_t))) return sctp_sf_violation_chunklen(ep, asoc, type, arg, commands); -- cgit v1.2.3 From d71a09ed555e52299b5d3ad8945bdf84f65423a6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 23 Aug 2009 23:11:36 +0400 Subject: sctp: use proc_create() create_proc_entry() is deprecated (not formally, though). Signed-off-by: Alexey Dobriyan Signed-off-by: Vlad Yasevich --- net/sctp/proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index f268910620be..d093cbfeaac4 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -512,10 +512,8 @@ int __init sctp_remaddr_proc_init(void) { struct proc_dir_entry *p; - p = create_proc_entry("remaddr", S_IRUGO, proc_net_sctp); + p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops); if (!p) return -ENOMEM; - p->proc_fops = &sctp_remaddr_seq_fops; - return 0; } -- cgit v1.2.3 From b9f8478682445c2a3e0b87718a0563ef543ad94e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 26 Aug 2009 09:36:25 -0400 Subject: sctp: Fix error count increments that were results of HEARTBEATS SCTP RFC 4960 states that unacknowledged HEARTBEATS count as errors agains a given transport or endpoint. As such, we should increment the error counts for only for unacknowledged HB, otherwise we detect failure too soon. This goes for both the overall error count and the path error count. Now, there is a difference in how the detection is done between the two. The path error detection is done after the increment, so to detect it properly, we actually need to exceed the path threshold. The overall error detection is done _BEFORE_ the increment. Thus to detect the failure, it's enough for the error count to match the threshold. This is why all the state functions use '>=' to detect failure, while path detection uses '>'. Thanks goes to Chunbo Luo who first proposed patches to fix this issue and made me re-read the spec and the code to figure out how this cruft really works. Signed-off-by: Vlad Yasevich --- net/sctp/sm_sideeffect.c | 20 ++++++++++++++++---- net/sctp/sm_statefuns.c | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 238adf7978e9..694f7491731d 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -440,14 +440,26 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ - /* When probing UNCONFIRMED addresses, the association overall - * error count is NOT incremented + /* We are here due to a timer expiration. If the timer was + * not a HEARTBEAT, then normal error tracking is done. + * If the timer was a heartbeat, we only increment error counts + * when we already have an outstanding HEARTBEAT that has not + * been acknowledged. + * Additionaly, some tranport states inhibit error increments. */ - if (transport->state != SCTP_UNCONFIRMED) + if (!is_hb) { asoc->overall_error_count++; + if (transport->state != SCTP_INACTIVE) + transport->error_count++; + } else if (transport->hb_sent) { + if (transport->state != SCTP_UNCONFIRMED) + asoc->overall_error_count++; + if (transport->state != SCTP_INACTIVE) + transport->error_count++; + } if (transport->state != SCTP_INACTIVE && - (transport->error_count++ >= transport->pathmaxrxt)) { + (transport->error_count > transport->pathmaxrxt)) { SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", " transport IP: port:%d failed.\n", asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7fb08a694917..45b8bcafd827 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -971,7 +971,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, { struct sctp_transport *transport = (struct sctp_transport *) arg; - if (asoc->overall_error_count > asoc->max_retrans) { + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ -- cgit v1.2.3 From 33ce828131ca6655b48bd2070dadd80f816dfe0d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:58 -0400 Subject: sctp: Clear fast_recovery on the transport when T3 timer expires. If T3 timer expires, we are retransmitting data due to timeout any any fast recovery is null and void. We can clear the fast recovery flag. Signed-off-by: Vlad Yasevich --- net/sctp/transport.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e5dde45c79d3..c256e4839316 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -503,6 +503,9 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, transport->ssthresh = max(transport->cwnd/2, 4*transport->asoc->pathmtu); transport->cwnd = transport->asoc->pathmtu; + + /* T3-rtx also clears fast recovery on the transport */ + transport->fast_recovery = 0; break; case SCTP_LOWER_CWND_FAST_RTX: -- cgit v1.2.3 From 4d3c46e6833208428d366630aa708f6876e61fc1 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:59 -0400 Subject: sctp: drop a_rwnd to 0 when receive buffer overflows. SCTP has a problem that when small chunks are used, it is possible to exhaust the receiver buffer without fully closing receive window. This happens due to all overhead that we have account for with small messages. To fix this, when receive buffer is exceeded, we'll drop the window to 0 and save the 'drop' portion. When application starts reading data and freeing up recevie buffer space, we'll wait until we've reached the 'drop' window and then add back this 'drop' one mtu at a time. This worked well in testing and under stress produced rather even recovery. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 215b56951d76..39c3821b7d3d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -202,6 +202,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->a_rwnd = asoc->rwnd; asoc->rwnd_over = 0; + asoc->rwnd_press = 0; /* Use my own max window until I learn something better. */ asoc->peer.rwnd = SCTP_DEFAULT_MAXWINDOW; @@ -1374,6 +1375,17 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned len) asoc->rwnd += len; } + /* If we had window pressure, start recovering it + * once our rwnd had reached the accumulated pressure + * threshold. The idea is to recover slowly, but up + * to the initial advertised window. + */ + if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) { + int change = min(asoc->pathmtu, asoc->rwnd_press); + asoc->rwnd += change; + asoc->rwnd_press -= change; + } + SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) " "- %u\n", __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, asoc->a_rwnd); @@ -1406,17 +1418,38 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned len) /* Decrease asoc's rwnd by len. */ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) { + int rx_count; + int over = 0; + SCTP_ASSERT(asoc->rwnd, "rwnd zero", return); SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return); + + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + + /* If we've reached or overflowed our receive buffer, announce + * a 0 rwnd if rwnd would still be positive. Store the + * the pottential pressure overflow so that the window can be restored + * back to original value. + */ + if (rx_count >= asoc->base.sk->sk_rcvbuf) + over = 1; + if (asoc->rwnd >= len) { asoc->rwnd -= len; + if (over) { + asoc->rwnd_press = asoc->rwnd; + asoc->rwnd = 0; + } } else { asoc->rwnd_over = len - asoc->rwnd; asoc->rwnd = 0; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u)\n", + SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u, %u)\n", __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over); + asoc->rwnd_over, asoc->rwnd_press); } /* Build the bind address list for the association based on info from the -- cgit v1.2.3 From d4d6fb5787a6ef6e1dab731d617ebda6be73d561 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:59 -0400 Subject: sctp: Try not to change a_rwnd when faking a SACK from SHUTDOWN. We currently set a_rwnd to 0 when faking a SACK from SHUTDOWN. This results in an hung association if the remote only uses SHUTDOWNs (which it's allowed to do) to acknowlege DATA when closing. The reason for that is that we simply honor the a_rwnd from the sack, but since we faked it to be 0, we enter 0-window probing. The fix is to use the peers old rwnd and add our flight size to it. Signed-off-by: Vlad Yasevich --- net/sctp/sm_sideeffect.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 694f7491731d..8674d4919556 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1533,7 +1533,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PROCESS_CTSN: /* Dummy up a SACK for processing. */ sackh.cum_tsn_ack = cmd->obj.be32; - sackh.a_rwnd = 0; + sackh.a_rwnd = asoc->peer.rwnd + + asoc->outqueue.outstanding_bytes; sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, @@ -1632,9 +1633,9 @@ out: */ if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { if (chunk->end_of_packet || chunk->singleton) - sctp_outq_uncork(&asoc->outqueue); + error = sctp_outq_uncork(&asoc->outqueue); } else if (local_cork) - sctp_outq_uncork(&asoc->outqueue); + error = sctp_outq_uncork(&asoc->outqueue); return error; nomem: error = -ENOMEM; -- cgit v1.2.3 From b29e7907288554db9c987c36facfd0304ee8ff5a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:59 -0400 Subject: sctp: Nagle delay should be based on path mtu The decision to delay due to Nagle should be based on the path mtu and future packet size. We currently incorrectly base it on 'frag_point' which is the SCTP DATA segment size, and also we do not count DATA chunk header overhead in the computation. This actuall allows situations where a user can set low 'frag_point', and then send small messages without delay. Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index e25e2e20b63d..d0b84f6eba4d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -697,13 +697,14 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && inflight && sctp_state(asoc, ESTABLISHED)) { - unsigned len = datasize + q->out_qlen; + unsigned max = transport->pathmtu - packet->overhead; + unsigned len = chunk->skb->len + q->out_qlen; /* Check whether this chunk and all the rest of pending * data will fit or delay in hopes of bundling a full * sized packet. */ - if (len < asoc->frag_point) { + if (len < max) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } -- cgit v1.2.3 From cb95ea32a457871f72752164de8d94fa20f4703c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:59 -0400 Subject: sctp: Don't do NAGLE delay on large writes that were fragmented small SCTP will delay the last part of a large write due to NAGLE, if that part is smaller then MTU. Since we are doing large writes, we might as well send the last portion now instead of waiting untill the next large write happens. The small portion will be sent as is regardless, so it's better to not delay it. This is a result of much discussions with Wei Yongjun and Doug Graham . Many thanks go out to them. Signed-off-by: Vlad Yasevich --- net/sctp/chunk.c | 2 ++ net/sctp/output.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 645577ddc33e..acf7c4d128f7 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -59,6 +59,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->can_abandon = 0; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); + msg->msg_size = 0; } /* Allocate and initialize datamsg. */ @@ -155,6 +156,7 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu { sctp_datamsg_hold(msg); chunk->msg = msg; + msg->msg_size += chunk->skb->len; } diff --git a/net/sctp/output.c b/net/sctp/output.c index d0b84f6eba4d..b801bc9fb639 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -703,8 +703,10 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, /* Check whether this chunk and all the rest of pending * data will fit or delay in hopes of bundling a full * sized packet. + * Don't delay large message writes that may have been + * fragmeneted into small peices. */ - if (len < max) { + if ((len < max) && (chunk->msg->msg_size < max)) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } -- cgit v1.2.3 From f68b2e05f326971cd76c65aa91a1a41771dd7485 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:21:00 -0400 Subject: sctp: Fix SCTP_MAXSEG socket option to comply to spec. We had a bug that we never stored the user-defined value for MAXSEG when setting the value on an association. Thus future PMTU events ended up re-writing the frag point and increasing it past user limit. Additionally, when setting the option on the socket/endpoint, we effect all current associations, which is against spec. Now, we store the user 'maxseg' value along with the computed 'frag_point'. We inherit 'maxseg' from the socket at association creation and use it as an upper limit for 'frag_point' when its set. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 6 +++--- net/sctp/socket.c | 11 +++-------- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 39c3821b7d3d..1f05b942564e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -112,6 +112,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) * 1000; asoc->frag_point = 0; + asoc->user_frag = sp->user_frag; /* Set the association max_retrans and RTO values from the * socket values. @@ -674,7 +675,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, "%d\n", asoc, asoc->pathmtu); peer->pmtu_pending = 0; - asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); + asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); /* The asoc->peer.port might not be meaningful yet, but * initialize the packet structure anyway. @@ -1330,9 +1331,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) } if (pmtu) { - struct sctp_sock *sp = sctp_sk(asoc->base.sk); asoc->pathmtu = pmtu; - asoc->frag_point = sctp_frag_point(sp, pmtu); + asoc->frag_point = sctp_frag_point(asoc, pmtu); } SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 95a5623d79a0..89af37a6c871 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2243,7 +2243,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, sctp_assoc_sync_pmtu(asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; - sctp_frag_point(sp, params->spp_pathmtu); + sctp_frag_point(asoc, params->spp_pathmtu); } else { sp->pathmtu = params->spp_pathmtu; } @@ -2880,15 +2880,10 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl val -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk); } - - asoc->frag_point = val; + asoc->user_frag = val; + asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); } else { sp->user_frag = val; - - /* Update the frag_point of the existing associations. */ - list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { - asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); - } } return 0; -- cgit v1.2.3 From 31b02e1549406efa346534acad956a42bc3f28c4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:21:00 -0400 Subject: sctp: Failover transmitted list on transport delete Add-IP feature allows users to delete an active transport. If that transport has chunks in flight, those chunks need to be moved to another transport or association may get into unrecoverable state. Reported-by: Rafael Laufer Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 27 +++++++++++++++++++++++++++ net/sctp/outqueue.c | 47 ++++++++++++++++++++++++++++++++++------------- net/sctp/sm_statefuns.c | 6 ++++++ 3 files changed, 67 insertions(+), 13 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1f05b942564e..caba989f4e76 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -584,6 +584,33 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, asoc->addip_last_asconf->transport == peer) asoc->addip_last_asconf->transport = NULL; + /* If we have something on the transmitted list, we have to + * save it off. The best place is the active path. + */ + if (!list_empty(&peer->transmitted)) { + struct sctp_transport *active = asoc->peer.active_path; + struct sctp_chunk *ch; + + /* Reset the transport of each chunk on this list */ + list_for_each_entry(ch, &peer->transmitted, + transmitted_list) { + ch->transport = NULL; + ch->rtt_in_progress = 0; + } + + list_splice_tail_init(&peer->transmitted, + &active->transmitted); + + /* Start a T3 timer here in case it wasn't running so + * that these migrated packets have a chance to get + * retrnasmitted. + */ + if (!timer_pending(&active->T3_rtx_timer)) + if (!mod_timer(&active->T3_rtx_timer, + jiffies + active->rto)) + sctp_transport_hold(active); + } + asoc->peer.transport_count--; sctp_transport_free(peer); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d765fc53e74d..c9f20e28521b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -406,8 +406,9 @@ void sctp_retransmit_mark(struct sctp_outq *q, * not be retransmitted */ if (!chunk->tsn_gap_acked) { - chunk->transport->flight_size -= - sctp_data_size(chunk); + if (chunk->transport) + chunk->transport->flight_size -= + sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); q->asoc->peer.rwnd += (sctp_data_size(chunk) + sizeof(struct sk_buff)); @@ -443,7 +444,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, q->asoc->peer.rwnd += (sctp_data_size(chunk) + sizeof(struct sk_buff)); q->outstanding_bytes -= sctp_data_size(chunk); - transport->flight_size -= sctp_data_size(chunk); + if (chunk->transport) + transport->flight_size -= sctp_data_size(chunk); /* sctpimpguide-05 Section 2.8.2 * M5) If a T3-rtx timer expires, the @@ -1310,6 +1312,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, __u32 rtt; __u8 restart_timer = 0; int bytes_acked = 0; + int migrate_bytes = 0; /* These state variables are for coherent debug output. --xguo */ @@ -1343,8 +1346,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, * considering it as 'outstanding'. */ if (!tchunk->tsn_gap_acked) { - tchunk->transport->flight_size -= - sctp_data_size(tchunk); + if (tchunk->transport) + tchunk->transport->flight_size -= + sctp_data_size(tchunk); q->outstanding_bytes -= sctp_data_size(tchunk); } continue; @@ -1378,6 +1382,20 @@ static void sctp_check_transmitted(struct sctp_outq *q, rtt); } } + + /* If the chunk hasn't been marked as ACKED, + * mark it and account bytes_acked if the + * chunk had a valid transport (it will not + * have a transport if ASCONF had deleted it + * while DATA was outstanding). + */ + if (!tchunk->tsn_gap_acked) { + tchunk->tsn_gap_acked = 1; + bytes_acked += sctp_data_size(tchunk); + if (!tchunk->transport) + migrate_bytes += sctp_data_size(tchunk); + } + if (TSN_lte(tsn, sack_ctsn)) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -1391,8 +1409,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, restart_timer = 1; if (!tchunk->tsn_gap_acked) { - tchunk->tsn_gap_acked = 1; - bytes_acked += sctp_data_size(tchunk); /* * SFR-CACC algorithm: * 2) If the SACK contains gap acks @@ -1432,10 +1448,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, * older than that newly acknowledged DATA * chunk, are qualified as 'Stray DATA chunks'. */ - if (!tchunk->tsn_gap_acked) { - tchunk->tsn_gap_acked = 1; - bytes_acked += sctp_data_size(tchunk); - } list_add_tail(lchunk, &tlist); } @@ -1491,7 +1503,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, tsn); tchunk->tsn_gap_acked = 0; - bytes_acked -= sctp_data_size(tchunk); + if (tchunk->transport) + bytes_acked -= sctp_data_size(tchunk); /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -1561,6 +1574,14 @@ static void sctp_check_transmitted(struct sctp_outq *q, #endif /* SCTP_DEBUG */ if (transport) { if (bytes_acked) { + /* We may have counted DATA that was migrated + * to this transport due to DEL-IP operation. + * Subtract those bytes, since the were never + * send on this transport and shouldn't be + * credited to this transport. + */ + bytes_acked -= migrate_bytes; + /* 8.2. When an outstanding TSN is acknowledged, * the endpoint shall clear the error counter of * the destination transport address to which the @@ -1589,7 +1610,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, transport->flight_size -= bytes_acked; if (transport->flight_size == 0) transport->partial_bytes_acked = 0; - q->outstanding_bytes -= bytes_acked; + q->outstanding_bytes -= bytes_acked + migrate_bytes; } else { /* RFC 2960 6.1, sctpimpguide-06 2.15.2 * When a sender is doing zero window probing, it diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 45b8bcafd827..a7f18a352364 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3543,6 +3543,12 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, asconf_ack = sctp_assoc_lookup_asconf_ack(asoc, hdr->serial); if (!asconf_ack) return SCTP_DISPOSITION_DISCARD; + + /* Reset the transport so that we select the correct one + * this time around. This is to make sure that we don't + * accidentally use a stale transport that's been removed. + */ + asconf_ack->transport = NULL; } else { /* ADDIP 5.2 E5) Otherwise, the ASCONF Chunk is discarded since * it must be either a stale packet or from an attacker. -- cgit v1.2.3 From d521c08f4c16d27f193718da778503a6472501da Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 2 Sep 2009 13:05:33 +0800 Subject: sctp: fix to reset packet information after packet transmit The packet information does not reset after packet transmit, this may cause some problems such as following DATA chunk be sent without AUTH chunk, even if the authentication of DATA chunk has been requested by the peer. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index b801bc9fb639..1f9336177ee2 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -136,6 +136,17 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, return packet; } +static void sctp_packet_reset(struct sctp_packet *packet) +{ + packet->size = packet->overhead; + packet->has_cookie_echo = 0; + packet->has_sack = 0; + packet->has_data = 0; + packet->has_auth = 0; + packet->ipfragok = 0; + packet->auth = NULL; +} + /* Free a packet. */ void sctp_packet_free(struct sctp_packet *packet) { @@ -576,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) (*tp->af_specific->sctp_xmit)(nskb, tp); out: - packet->size = packet->overhead; + sctp_packet_reset(packet); return err; no_route: kfree_skb(nskb); -- cgit v1.2.3 From 4007cc88ceec8892b74792f0a10983b140beae72 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:21:00 -0400 Subject: sctp: Correctly track if AUTH has been bundled. We currently track if AUTH has been bundled using the 'auth' pointer to the chunk. However, AUTH is disallowed after DATA is already in the packet, so we need to instead use the 'has_auth' field. Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index 1f9336177ee2..e47398c07185 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -220,7 +220,7 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, /* See if this is an auth chunk we are bundling or if * auth is already bundled. */ - if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->auth) + if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth) return retval; /* if the peer did not request this chunk to be authenticated, -- cgit v1.2.3 From 8da645e101a8c20c6073efda3c7cc74eec01b87f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:21:01 -0400 Subject: sctp: Get rid of an extra routing lookup when adding a transport. We used to perform 2 routing lookups for a new transport: one just for path mtu detection, and one to actually route to destination and path mtu update when sending a packet. There is no point in doing both of them, especially since the first one just for path mtu doesn't take into account source address and sometimes gives the wrong route, causing path mtu updates anyway. We now do just the one call to do both route to destination and get path mtu updates. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index caba989f4e76..8450960df24f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -680,13 +680,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, */ peer->param_flags = asoc->param_flags; + sctp_transport_route(peer, NULL, sp); + /* Initialize the pmtu of the transport. */ - if (peer->param_flags & SPP_PMTUD_ENABLE) - sctp_transport_pmtu(peer); - else if (asoc->pathmtu) - peer->pathmtu = asoc->pathmtu; - else - peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT; + if (peer->param_flags & SPP_PMTUD_DISABLE) { + if (asoc->pathmtu) + peer->pathmtu = asoc->pathmtu; + else + peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT; + } /* If this is the first transport addr on this association, * initialize the association PMTU to the peer's PMTU. -- cgit v1.2.3 From 723884339f90a9c420783135168cc1045750eb5d Mon Sep 17 00:00:00 2001 From: Bhaskar Dutta Date: Thu, 3 Sep 2009 17:25:47 +0530 Subject: sctp: Sysctl configuration for IPv4 Address Scoping This patch introduces a new sysctl option to make IPv4 Address Scoping configurable . In networking environments where DNAT rules in iptables prerouting chains convert destination IP's to link-local/private IP addresses, SCTP connections fail to establish as the INIT chunk is dropped by the kernel due to address scope match failure. For example to support overlapping IP addresses (same IP address with different vlan id) a Layer-5 application listens on link local IP's, and there is a DNAT rule that maps the destination IP to a link local IP. Such applications never get the SCTP INIT if the address-scoping draft is strictly followed. This sysctl configuration allows SCTP to function in such unconventional networking environments. Sysctl options: 0 - Disable IPv4 address scoping draft altogether 1 - Enable IPv4 address scoping (default, current behavior) 2 - Enable address scoping but allow IPv4 private addresses in init/init-ack 3 - Enable address scoping but allow IPv4 link local address in init/init-ack Signed-off-by: Bhaskar Dutta Signed-off-by: Vlad Yasevich --- net/sctp/bind_addr.c | 21 ++++++++++++++++++++- net/sctp/protocol.c | 11 ++++++----- net/sctp/sysctl.c | 12 ++++++++++++ 3 files changed, 38 insertions(+), 6 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 6d5944a745d4..13a6fba41077 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -510,9 +510,28 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) * of requested destination address, sender and receiver * SHOULD include all of its addresses with level greater * than or equal to L. + * + * Address scoping can be selectively controlled via sysctl + * option */ - if (addr_scope <= scope) + switch (sctp_scope_policy) { + case SCTP_SCOPE_POLICY_DISABLE: return 1; + case SCTP_SCOPE_POLICY_ENABLE: + if (addr_scope <= scope) + return 1; + break; + case SCTP_SCOPE_POLICY_PRIVATE: + if (addr_scope <= scope || SCTP_SCOPE_PRIVATE == addr_scope) + return 1; + break; + case SCTP_SCOPE_POLICY_LINK: + if (addr_scope <= scope || SCTP_SCOPE_LINK == addr_scope) + return 1; + break; + default: + break; + } return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a76da657244a..60093be8385d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -431,16 +431,14 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) * of requested destination address, sender and receiver * SHOULD include all of its addresses with level greater * than or equal to L. + * + * IPv4 scoping can be controlled through sysctl option + * net.sctp.addr_scope_policy */ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr) { sctp_scope_t retval; - /* Should IPv4 scoping be a sysctl configurable option - * so users can turn it off (default on) for certain - * unconventional networking environments? - */ - /* Check for unusable SCTP addresses. */ if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) { retval = SCTP_SCOPE_UNUSABLE; @@ -1259,6 +1257,9 @@ SCTP_STATIC __init int sctp_init(void) /* Disable AUTH by default. */ sctp_auth_enable = 0; + /* Set SCOPE policy to enabled */ + sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 63eabbc71298..ab7151da120f 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -51,6 +51,7 @@ static int timer_max = 86400000; /* ms in one day */ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; +static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -272,6 +273,17 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, .strategy = sysctl_intvec }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "addr_scope_policy", + .data = &sctp_scope_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &addr_scope_max, + }, { .ctl_name = 0 } }; -- cgit v1.2.3 From be2971438dec2e2d041af4701472a93a7dd03642 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 4 Sep 2009 14:34:06 +0800 Subject: sctp: remove dup code in net/sctp/output.c Use sctp_packet_reset() instead of dup code. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index e47398c07185..5cbda8f1ddfd 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -69,6 +69,17 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len); +static void sctp_packet_reset(struct sctp_packet *packet) +{ + packet->size = packet->overhead; + packet->has_cookie_echo = 0; + packet->has_sack = 0; + packet->has_data = 0; + packet->has_auth = 0; + packet->ipfragok = 0; + packet->auth = NULL; +} + /* Config a packet. * This appears to be a followup set of initializations. */ @@ -80,13 +91,8 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); + sctp_packet_reset(packet); packet->vtag = vtag; - packet->has_cookie_echo = 0; - packet->has_sack = 0; - packet->has_auth = 0; - packet->has_data = 0; - packet->ipfragok = 0; - packet->auth = NULL; if (ecn_capable && sctp_packet_empty(packet)) { chunk = sctp_get_ecne_prepend(packet->transport->asoc); @@ -124,29 +130,12 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, } overhead += sizeof(struct sctphdr); packet->overhead = overhead; - packet->size = overhead; + sctp_packet_reset(packet); packet->vtag = 0; - packet->has_cookie_echo = 0; - packet->has_sack = 0; - packet->has_auth = 0; - packet->has_data = 0; - packet->ipfragok = 0; packet->malloced = 0; - packet->auth = NULL; return packet; } -static void sctp_packet_reset(struct sctp_packet *packet) -{ - packet->size = packet->overhead; - packet->has_cookie_echo = 0; - packet->has_sack = 0; - packet->has_data = 0; - packet->has_auth = 0; - packet->ipfragok = 0; - packet->auth = NULL; -} - /* Free a packet. */ void sctp_packet_free(struct sctp_packet *packet) { -- cgit v1.2.3 From f1751c57f7bb816c9b6b4cb5d79c703aaa7199da Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:21:03 -0400 Subject: sctp: Catch bogus stream sequence numbers Since our TSN map is capable of holding at most a 4K chunk gap, there is no way that during this gap, a stream sequence number (unsigned short) can wrap such that the new number is smaller then the next expected one. If such a case is encountered, this is a protocol violation. Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a7f18a352364..c8fae1983dd1 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2891,6 +2891,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; + case SCTP_IERROR_PROTO_VIOLATION: + return sctp_sf_abort_violation(ep, asoc, chunk, commands, + (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); } @@ -3001,6 +3004,9 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, break; case SCTP_IERROR_NO_DATA: goto consume; + case SCTP_IERROR_PROTO_VIOLATION: + return sctp_sf_abort_violation(ep, asoc, chunk, commands, + (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); } @@ -5877,6 +5883,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; + u16 ssn; + u16 sid; + u8 ordered = 0; data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); @@ -6016,8 +6025,10 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS); - else + else { SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS); + ordered = 1; + } /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number * @@ -6027,7 +6038,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, * with cause set to "Invalid Stream Identifier" (See Section 3.3.10) * and discard the DATA chunk. */ - if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { + sid = ntohs(data_hdr->stream); + if (sid >= asoc->c.sinit_max_instreams) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); @@ -6040,6 +6052,18 @@ static int sctp_eat_data(const struct sctp_association *asoc, return SCTP_IERROR_BAD_STREAM; } + /* Check to see if the SSN is possible for this TSN. + * The biggest gap we can record is 4K wide. Since SSNs wrap + * at an unsigned short, there is no way that an SSN can + * wrap and for a valid TSN. We can simply check if the current + * SSN is smaller then the next expected one. If it is, it wrapped + * and is invalid. + */ + ssn = ntohs(data_hdr->ssn); + if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->ssnmap->in, sid))) { + return SCTP_IERROR_PROTO_VIOLATION; + } + /* Send the data up to the user. Note: Schedule the * SCTP_CMD_CHUNK_ULP cmd before the SCTP_CMD_GEN_SACK, as the SACK * chunk needs the updated rwnd. -- cgit v1.2.3 From 32613090a96dba2ca2cc524c8d4749d3126fdde5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 14 Sep 2009 12:21:47 +0000 Subject: net: constify struct net_protocol Remove long removed "inet_protocol_base" declaration. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 60093be8385d..c557f1fb1c66 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -924,7 +924,7 @@ static struct inet_protosw sctp_stream_protosw = { }; /* Register with IP layer. */ -static struct net_protocol sctp_protocol = { +static const struct net_protocol sctp_protocol = { .handler = sctp_rcv, .err_handler = sctp_v4_err, .no_policy = 1, -- cgit v1.2.3 From 41135cc836a1abeb12ca1416bdb29e87ad021153 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 14 Sep 2009 12:22:28 +0000 Subject: net: constify struct inet6_protocol Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 6a4b19094143..bb280e60e00a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -949,7 +949,7 @@ static int sctp6_rcv(struct sk_buff *skb) return sctp_rcv(skb) ? -1 : 0; } -static struct inet6_protocol sctpv6_protocol = { +static const struct inet6_protocol sctpv6_protocol = { .handler = sctp6_rcv, .err_handler = sctp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, -- cgit v1.2.3