Wednesday, January 15, 2014

Large flow marking using hybrid OpenFlow

Top of rack switches are in a unique position at the edge of the network to implement traffic engineering controls. Marking large flows describes a use case for dynamically detecting and marking large flows as they enter the network:
Figure 1: Marking large flows
Physical switch hybrid OpenFlow example described how real-time sFlow analytics can be used to trigger OpenFlow controls to block denial of service attacks. This article will describe how the sFlow-RT, Floodlight OpenFlow controller, and Alcatel-Lucent OmniSwitch hybrid OpenFlow SDN controller setup can be programmed to dynamically detect and mark large (Elephant) flows as they enter the network.
Figure 2: Large flow marking controller results
In the experimental setup, a flood ping is used to generate a large flow:
ping -f 10.0.0.238 -s 1400
Figure 2 shows the results, the left half of the chart shows traffic when the controller is disabled and the right half shows traffic when the controller is enabled. The blue line trends the largest unmarked flow seen in the network and the gold line shows the largest marked flow. When controller is disabled, none of the traffic is marked. When the controller is enabled, sFlow-RT detects the large flow within a second and makes a call to Floodlight's Static Flow Pusher API to create a rule that matches the IP source and destination addresses of the large flow with actions to set the IP Type of Service bits and forward the packet using the normal forwarding path. The Floodlight controller pushes an OpenFlow rule to the switch. The upstream is also sending sFlow data to sFlow-RT and so the marked traffic be detected and reported by sFlow-RT, confirming that the control has in fact been implemented.

The controller logic is implemented by the following embedded script running within sFlow-RT:
include('extras/aluws.js');

var flowkeys = 'ipsource,ipdestination';
var value = 'bytes';
var filter = 'direction=ingress';

var trigger = 100000;
var release = 100;

var tos = '0x4';

var metricName = 'mark';
var id = 0;
var controls = {};
var enabled = true;

var user = 'admin';
var password = 'password';
var sampling = 128;
var polling = 30;

var collectorIP = "10.0.0.162";
var collectorPort = 8343;

// Floodlight OpenFlow Controller REST API
var floodlight = 'http://10.0.0.53:8080/';
var listswitches = floodlight+'wm/core/controller/switches/json';
var flowpusher = floodlight+'wm/staticflowentrypusher/json';
var clearflows = floodlight+'wm/staticflowentrypusher/clear/all/json'; 

function clearOpenFlow() {
  http(clearflows);
}

function setOpenFlow(spec) {
  http(flowpusher, 'post','application/json',JSON.stringify(spec));
}

function deleteOpenFlow(spec) {
  http(flowpusher, 'delete','application/json',JSON.stringify(spec));
}

var agents = {};
function discoverAgents() {
  var res = http(listswitches);
  var dps = JSON.parse(res);
  for(var i = 0; i < dps.length; i++) {
    var dp = dps[i];
    var agent = dp.inetAddress.match(/\/(.*):/)[1];
    var ports = dp.ports;
    var nameToNumber = {};
    var names = [];
    // get ifName to OpenFlow port number mapping
    // and list of OpenFlow enabled ports
    for (var j = 0; j < dp.ports.length; j++) {
      var port = dp.ports[j];
      var name = port.name.match(/^port (.*)$/)[1];
      names.push(name);
      nameToNumber[name] = port.portNumber;
    }
    agents[agent] = {dpid:dp.dpid,names:names,nameToNumber:nameToNumber}; 
  }
}

function initializeAgent(agent) {
  var rec = agents[agent];
  var server = new ALUServer(agent,user,password);
  rec.server = server;

  var ports = rec.names.join(' ');

  server.login();

  // configure sFlow
  server.runCmds([
    'sflow agent ip ' + agent,
    'sflow receiver 1 name InMon address '+collectorIP+' udp-port '+collectorPort,
    'sflow sampler 1 port '+ports+' receiver 1 rate '+sampling,
    'sflow poller 1 port '+ports+' receiver 1 interval '+polling
  ]);

  // get ifIndex to ifName mapping
  var res = server.rest('get','mib','ifXTable',{mibObject0:'ifName'});
  var rows = res.result.data.rows;
  var ifIndexToName = {};
  for(var ifIndex in rows) ifIndexToName[ifIndex] = rows[ifIndex].ifName;

  server.logout();

  agents[agent].ifIndexToName = ifIndexToName;
}

function mark(agent,dataSource,flowkey) {
  if(controls[flowkey]) return;

  var rec = agents[agent];
  if(!rec) return;

  var name = 'ctl' + id++;
  var parts = flowkey.split(',');
  setOpenFlow({name:name,switch:rec.dpid,cookie:0,
               priority:500,active:true,
               'ether-type':'0x0800','src-ip':parts[0],'dst-ip':parts[1],
               actions:'set-tos-bits='+tos+',output=normal'});

    controls[flowkey] = { 
 name: name, 
 agent:agent,
        dataSource:dataSource,
 action:'mark', 
 time: (new Date()).getTime() 
    };
}

function unmark(flowkey) {
  if(!controls[flowkey]) return;

  deleteOpenFlow({name:controls[flowkey].name});
  delete controls[flowkey];
}

setEventHandler(function(evt) {
  if(!enabled) return;

  mark(evt.agent,evt.dataSource,evt.flowKey);
}, [metricName]);


setIntervalHandler(function() {
  // remove controls when flow below release threshold
  var stale = [];
  for(var flowkey in controls) {
    var ctl = controls[flowkey];
    var val = flowvalue(ctl.agent,ctl.dataSource+'.'+metricName,flowkey);
    if(!val || val <= release) stale.push(flowkey);
  }
  for(var i = 0; i < stale.length; i++) unmark(stale[i]);
},5);


setHttpHandler(function(request) {
  var result = {};
  try {
    var action = '' + request.query.action;
    switch(action) {
    case 'enable':
      enabled = true;
      break;
    case 'disable':
      enabled = false;
      break;
    case 'clear':
      clearOpenFlow();
      controls = {};
      break;
    }
  }
  catch(e) { result.error = e.message }
  result.controls = controls;
  result.enabled = enabled;
  return JSON.stringify(result);
});

discoverAgents();
for(var agent in agents) {
    initializeAgent(agent);
}

setFlow(metricName,{keys:flowkeys,value:value,filter:filter});
setThreshold(metricName,{metric:metricName,value:trigger,byFlow:true,timeout:10});
The following command line argument loads the script on startup:
-D script.file=omniofmark.js
Some notes on the script:
  1. A call to the Floodlight REST API is used to discover the set of switches, their IP addresses and OpenFlow datapath identifiers, ports, port names and OpenFlow port numbers.
  2. The initializeAgent() function uses OmniSwitch Web Services API is used to configure sFlow on the switches and ports that are controllable using OpenFlow/Floodlight.
  3. A threshold is set to trigger an event when a flow exceeds 100,000 bytes/second
  4. The eventHandler() is triggered when large flows are detected and it calls the mark() function to push a control to Floodlight.
  5. The mark() function extracts source and destination IP address information from the flowkey and constructs a Static Flow Pusher message that matches the flow. The key to making this example work is a switch that is able to implement the actions set-tos-bits=0x4,output=normal  These actions instruct the switch to mark the traffic by setting the IP TOS bits and then use the normal hardware forwarding path.
  6. The intervalHander() function runs every 5 seconds and checks the traffic levels of each of the large flows that are being controlled. If the flow is no longer detectable or below the release threshold  of 100 bytes/second then Floodlight is instructed to remove the rule, freeing up hardware resources for new large flows.
Large flow marking is only one use case for large flow control, others described on this blog include: DDoS mitigation, ECMP / LAG load balancing, blacklists, and packet capture. Scripts can be added to address these different use cases, as well as providing information on network health and server performance to operations teams (see Exporting events using syslog and Metric export to Graphite)

Tuesday, January 14, 2014

sFlow leads convergence of multi-vendor application, server, and network performance management

Over the last six months, leading Application Delivery Controller (ADC) vendors F5 and A10 have added support for the sFlow standard to their respective TMOS and ACOS operating systems, making multi-vendor, real-time application layer visibility available in approximately 50% of commercial ADC market.
Figure 1: Best of Velocity 2012, The sFlow Standard
Equally important is the availability of sFlow support in leading open source web servers, load balancers, applications servers, hypervisors and operating systems, including: Apache, NGINX, Tomcat, Java, HAproxy, Hyper-V, Xen, KVM, Linux, Windows, Solaris, FreeBSD and AIX. The combination sFlow in ADCs and the application infrastructure behind them provides comprehensive end to end visibility in multi-tier, scale-out, application architectures.

Figure 1 shows the strategic role that ADCs (load balancers) play in controlling the flow of application requests, regulating admission, filtering, directing loads, and virtualizing services. RESTful control of ADCs combined with real-time visibility provides a powerful capability for flexing resources as demand changes, reducing costs and increasing performance as resources are closely matched to workloads.

What is unusual about diagram is the inclusion of the network. Application architects often give little thought to the network since its complexity is conveniently hidden behind APIs. Unfortunately, it is in the nature of scale-out applications that their performance is tightly coupled to that of the network. In addition, the network is shared between application tiers, allowing performance problems to propagate.
Figure 2: sFlow drivers for growth
Application visibility and control in the ADC space along with near universal support for sFlow among switch vendors combines with Software Defined Networking (SDN) to transform application performance management by orchestrating all the elements of the data center to deliver a comprehensive performance management solution, what VMware calls the Software Defined Data Center (SDDC), Cisco terms the Application Centric Infrastructure (ACI), and Microsoft refers to as the Cloud OS.
Figure 3: Visibility and the software defined data center
Recent breakthroughs in real-time sFlow analysis incorporated in the sFlow-RT analytics engine delivers comprehensive, timely, and actionable metrics through a programmatic interface. Expect to see this technology incorporated in next generation self optimizing orchestration solutions in 2014.
Performance Aware SDN describes the theory behind analytics driven orchestration. The talk describes how fast controller response, programmatic configuration interfaces such as OpenFlow, and consistent instrumentation of all the elements being orchestrated are pre-requisites for feedback control.
The requirement for complete measurement coverage by next generation orchestration systems will create a strong demand for sFlow instrumented infrastructure since sFlow is the only widely supported multi-vendor standard that spans network, server and application resources and delivers the low latency and scaleability required for adaptive control.

Saturday, January 11, 2014

Physical switch hybrid OpenFlow example

Alcatel-Lucent OmniSwitch analytics driven control provided an example with a physical switch, using the Web Services API to send CLI controls to the switch as HTTP requests, the following screen shot shows the results:
Figure 1: Controller using HTTP / REST API
Integrated hybrid OpenFlow describes how the combination of normal forwarding combined with OpenFlow for control of large flows provides a scaleable and practical solution for traffic engineering. The article used the Mininet testbed to develop a DDoS mitigation controller consisting of the sFlow-RT real-time analytics engine to detect large flows and the Floodlight OpenFlow controller to push control rules to the software virtual switch in the testbed.
Figure 2: Performance aware software defined networking
The OmniSwitch supports hybrid mode OpenFlow and this article will evaluate the performance of a physical switch hybrid OpenFlow solution using the OmniSwitch. The following results were obtained when repeating the DDoS attack test using Floodlight and OpenFlow as the control mechanism:
Figure 3: OmniSwitch controller using hybrid OpenFlow
Figure 3 shows that implementing traffic controls using OpenFlow is considerably faster than those obtained using the HTTP API shown in Figure 1, cutting the time to implement controls from seconds to milliseconds.
Figure 4: Mininet controller using hybrid OpenFlow
Figure 4 shows that the physical switch results are consistent with those obtained using Mininet, demonstrating the value of network simulation as a way to develop controllers before moving them into production. In fact, the Open vSwitch virtual switch used by Mininet is integrated in the mainstream Linux kernel and is an integral part of many commercial and open source virtualization platforms, including: VMware/Nicira NSX, OpenStack, Xen Cloud Platform, XenServer, and KVM. In these environments virtual machine traffic can be controlled using Open vSwitch.

The following command arguments configure the OmniSwitch to connect to the Floodlight controller running on host 10.0.0.53:
openflow logical-switch ls1 mode api
openflow logical-switch ls1 controller 1.0.0.53:6633
openflow logical-switch ls1 version 1.0
The Floodlight web based user interface can be used to confirm that the switch is connected.
The following sFlow-RT script implements the controller:
include('extras/aluws.js');

var flowkeys = 'inputifindex,ipsource';
var value = 'frames';
var filter = 'direction=ingress&icmptype=8';
var threshold = 1000;

var metricName = 'ddos';
var controls = {};
var enabled = true;
var blockSeconds = 20;

var user = 'admin';
var password = 'password';
var sampling = 128;
var polling = 30;

var collectorIP = "10.0.0.162";
var collectorPort = 6343;

// Floodlight OpenFlow Controller REST API
var floodlight = 'http://10.0.0.53:8080/';
var listswitches = floodlight+'wm/core/controller/switches/json';
var flowpusher = floodlight+'wm/staticflowentrypusher/json';
var clearflows = floodlight+'wm/staticflowentrypusher/clear/all/json'; 

function clearOpenFlow() {
  http(clearflows);
}

function setOpenFlow(spec) {
  http(flowpusher, 'post','application/json',JSON.stringify(spec));
}

function deleteOpenFlow(spec) {
  http(flowpusher, 'delete','application/json',JSON.stringify(spec));
}

var agents = {};
function discoverAgents() {
  var res = http(listswitches);
  var dps = JSON.parse(res);
  for(var i = 0; i < dps.length; i++) {
    var dp = dps[i];
    var agent = dp.inetAddress.match(/\/(.*):/)[1];
    var ports = dp.ports;
    var nameToNumber = {};
    var names = [];
    // get ifName to OpenFlow port number mapping
    // and list of OpenFlow enabled ports
    for (var j = 0; j < dp.ports.length; j++) {
      var port = dp.ports[j];
      var name = port.name.match(/^port (.*)$/)[1];
      names.push(name);
      nameToNumber[name] = port.portNumber;
    }
    agents[agent] = {dpid:dp.dpid,names:names,nameToNumber:nameToNumber}; 
  }
}

function initializeAgent(agent) {
  var rec = agents[agent];
  var server = new ALUServer(agent,user,password);
  rec.server = server;

  var ports = rec.names.join(' ');

  server.login();

  // configure sFlow
  server.runCmds([
    'sflow agent ip ' + agent,
    'sflow receiver 1 name InMon address '+collectorIP+' udp-port '+collectorPort,
    'sflow sampler 1 port '+ports+' receiver 1 rate '+sampling,
    'sflow poller 1 port '+ports+' receiver 1 interval '+polling
  ]);

  // get ifIndex to ifName mapping
  var res = server.rest('get','mib','ifXTable',{mibObject0:'ifName'});
  var rows = res.result.data.rows;
  var ifIndexToName = {};
  for(var ifIndex in rows) ifIndexToName[ifIndex] = rows[ifIndex].ifName;

  server.logout();

  agents[agent].ifIndexToName = ifIndexToName;
}

function block(agent,ip,port) {
  if(controls[ip]) return;

  var rec = agents[agent];
  if(!rec) return;

  var name = 'block-' + ip;
  setOpenFlow({name:name,switch:rec.dpid,cookie:0,
               priority:500,active:true,
               'ether-type':'0x0800','src-ip':ip,
               actions:''});

  controls[ip] = { 
    name: name, 
    agent:agent,
    action:'block', 
    time: (new Date()).getTime() 
  };
}

function allow(ip) {
  if(!controls[ip]) return;

  deleteOpenFlow({name:controls[ip].name});

  delete controls[ip];
}

setEventHandler(function(evt) {
  if(!enabled) return;

  var agent = evt.agent;
  var parts = evt.flowKey.split(',');
  var ifindex = parts[0];
  var ipsource = parts[1];

  var rec = agents[agent];
  if(!rec) return;

  block(agent,ipsource,rec.ifIndexToName[ifindex]);
}, [metricName]);


setIntervalHandler(function() {
  // remove stale controls
  var stale = [];
  var now = (new Date()).getTime();
  var threshMs = 1000 * blockSeconds;
  for(var addr in controls) {
    if((now - controls[addr].time) > threshMs) stale.push(addr);
  }
  for(var i = 0; i < stale.length; i++) allow(stale[i]);
},10);


setHttpHandler(function(request) {
  var result = {};
  try {
    var action = '' + request.query.action;
    switch(action) {
    case 'block':
      var agent = request.query.agent[0];
      var address = request.query.address[0];
      var port = request.query.port[0];
      if(agent&&address&&port) block(agent,address,port);
      break;
    case 'allow':
      var address = request.query.address[0];
      if(address) allow(address);
      break;
    case 'enable':
      enabled = true;
      break;
    case 'disable':
      enabled = false;
      break;
    case 'clearof':
      clearOpenFlow();
      break;
     }
  }
  catch(e) { result.error = e.message }
  result.controls = controls;
  result.enabled = enabled;
  return JSON.stringify(result);
});

discoverAgents();
for(var agent in agents) {
  initializeAgent(agent);
}

setFlow(metricName,{keys:flowkeys,value:value,filter:filter});
setThreshold(metricName,{metric:metricName,value:threshold,byFlow:true,timeout:10});
The following command line argument loads the script on startup:
-D script.file=omniofddos.js
Some notes on the script:
  1. A call to the Floodlight REST API is used to discover the set of switches, their IP addresses and OpenFlow datapath identifiers, ports, port names and OpenFlow port numbers.
  2. The initializeAgent() function uses OmniSwitch Web Services API is used to configure sFlow on the switches and ports that are controllable using OpenFlow/Floodlight.
  3. The script maintains mappings between port names, ifIndex numbers and OpenFlow port numbers so that ifIndex numbers used to identify ports in sFlow can be mapped to the port identifiers used into configuration commands and OpenFlow rules.
DDoS mitigation is only one use case for large flow control, others described on this blog include: ECMP / LAG load balancing, traffic marking, blacklists, and packet capture. Scripts can be added to address these different use cases, as well as providing information on network health and server performance to operations teams (see Exporting events using syslog and Metric export to Graphite)

Thursday, January 9, 2014

Alcatel-Lucent OmniSwitch analytics driven control

There are a many articles on this blog that demonstrate how real-time sFlow analytics driven control of switches using a Mininet testbed. This article is the first of a series that will shift the focus to physical switches and demonstrate different techniques for adapting network behavior to changing traffic.
Performance Aware SDN describes the theory behind analytics driven orchestration. The talk describes how fast controller response, programmatic configuration interfaces and consistent instrumentation of all the elements being orchestrated are pre-requisites for feedback control.
This article uses an Alcatel-Lucent OmniSwitch 6900 as an example. The switch has hardware sFlow support for line rate visibility on all ports, and support for OpenFlow and a RESTful configuration API to deploy control actions. In this example a basic DDoS mitigation filtering function will be triggered when large flood attacks are detected. The script is based on the version described in the article Integrated hybrid OpenFlow, but modified to use the OmniSwitch RESTful API.
RESTful control of switches describes how RESTFul configuration access to switches can be used to develop simple, controller-less SDN solutions. In this example the controller application is implemented using JavaScript that runs within the sFlow-RT analytics engine. The script has access to analytics data based on sFlow received from all the switches in the network and can directly access any switch using HTTP to make configuration changes. The script also provides a simple HTTP "Northbound API" that allows orchestration software to enable / disable the control function and manually add and remove controls.
include('extras/aluws.js');

var flowkeys = 'inputifindex,ipsource';
var value = 'frames';
var filter = 'direction=ingress&icmptype=8';
var threshold = 1000;

var metricName = 'ddos';
var controls = {};
var enabled = true;
var blockSeconds = 20;
var ruleid = 0;

var collectorIP = "10.0.0.162";
var collectorPort = 6343;

var agents = {
    '10.0.0.234':{user:'admin',password:'password',ports:'1/1-20',sampling:128, polling:20}
}

function initializeAgent(agent) {
    var rec = agents[agent];
    var server = new ALUServer(agent,rec.user,rec.password);
    rec.server = server;

    server.login();

    // configure sFlow
    server.runCmds([
      'sflow agent ip ' + agent,
      'sflow receiver 1 name InMon address '+collectorIP+' udp-port '+collectorPort,
      'sflow sampler 1 port '+rec.ports +' receiver 1 rate '+rec.sampling,
      'sflow poller 1 port '+rec.ports +' receiver 1 interval '+rec.polling
    ]);

    // get ifIndex to ifName mapping
    var res = server.rest('get','mib','ifXTable',{mibObject0:'ifName'});
    var rows = res.result.data.rows;
    var ifIndexToName = {};
    for(var ifIndex in rows) ifIndexToName[ifIndex] = rows[ifIndex].ifName;

    server.logout();

    agents[agent].ifIndexToName = ifIndexToName;
}

function block(agent,ip,port) {
    if(controls[ip]) return;

    var rec = agents[agent];
    if(!rec) return;

    var name = 'rt' + ruleid++;

    rec.server.login();

    rec.server.runCmds([
      'policy condition '+name+' source ip '+ip,
      'policy action '+name+' disposition drop',
      'policy rule '+name+' condition '+name+' action '+name,
      'qos apply'
    ]);

    rec.server.logout();

    controls[ip] = { 
 name: name, 
 agent:agent,
 action:'block', 
 time: (new Date()).getTime() 
    };
}

function allow(ip) {
    if(!controls[ip]) return;

    var ctl = controls[ip];
    var agent = ctl.agent;
    var rec = agents[agent];

    rec.server.login();

    rec.server.runCmds([
      'no policy rule '+ctl.name,
      'no policy action '+ctl.name,
      'no policy condition '+ctl.name,
      'qos apply'
   ]);

    rec.server.logout();

    delete controls[ip];
}

setEventHandler(function(evt) {
 if(!enabled) return;

 var agent = evt.agent;
 var parts = evt.flowKey.split(',');
 var ifindex = parts[0];
 var ipsource = parts[1];

 var rec = agents[agent];
 if(!rec) return;

 block(agent,ipsource,rec.ifIndexToName[ifindex]);
}, [metricName]);


setIntervalHandler(function() {
  // remove stale controls
  var stale = [];
  var now = (new Date()).getTime();
  var threshMs = 1000 * blockSeconds;
  for(var addr in controls) {
    if((now - controls[addr].time) > threshMs) stale.push(addr);
  }
  for(var i = 0; i < stale.length; i++) allow(stale[i]);
},10);


setHttpHandler(function(request) {
 var result = {};
 try {
     var action = '' + request.query.action;
     switch(action) {
     case 'block':
  var agent = request.query.agent[0];
  var address = request.query.address[0];
  var port = request.query.port[0];
  if(agent&&address&&port) block(agent,address,port);
  break;
     case 'allow':
  var address = request.query.address[0];
  if(address) allow(address);
  break;
     case 'enable':
  enabled = true;
  break;
     case 'disable':
  enabled = false;
  break;
     }
 }
 catch(e) { result.error = e.message }
 result.controls = controls;
 result.enabled = enabled;
 return JSON.stringify(result);
});

setFlow(metricName,{keys:flowkeys,value:value,filter:filter});
setThreshold(metricName,{metric:metricName,value:threshold,byFlow:true,timeout:10});

for(var agent in agents) {
    initializeAgent(agent);
}
The following command line argument loads the script on startup:
-D script.file=omniddos.js
Some notes on the script:
  1. The included extras/aluws.js script defines the ALUServer() function which provides access to the OmniSwitch Web Services API
  2. The filter looks for flows of ingress ICMP echo request packets - this is useful for the demo, but in practice filters would be constructed to look for attacks from external sources, targeting internal servers - see Performance aware software defined networking
  3. The controls structure is used to keep track of state associated with deployed configuration changes so that they can be undone
  4. The intervalHandler() function is used to automatically release controls after 20 seconds - the timeout is short for the purposes of demonstration, in practical deployments the timeout would be much measured in hours
  5. The ifIndexToName mapping allows the ifIndex numbers reported by sFlow to be mapped to interface names in CLI commands
  6. Additional switches and settings can be added to agents structure - hundreds of switches can be monitored and controlled by a single sFlow-RT instance.
  7. The block() and allow() commands use filtering policy commands to implement controls that block traffic. The script can easily be modified to implement different policies (for example to rate limit or mark traffic), or in the case of large flood attacks, changing BGP settings to cause the upstream provider to drop traffic (e.g. Hurricane Electric Customer Blackhole Community)
To try out the script, use a web browser to view a trend of traffic and then perform the following steps:
  1. disable the controller (http://10.0.0.162:8008/script/omniddos.js/json?action=disable)
  2. perform a simulated DoS attack (using a flood ping)
  3. enable the controller (http://10.0.0.162:8008/script/omniddos.js/json?action=enable)
  4. simulate a second DoS attack

When the controller is disabled, the simulated attack traffic exceeds 3,000 packets per second and persists until the attacker stops sending. When the controller is enabled, traffic is blocked when it hits the 1,000 packet per second threshold in the application. The control is removed 20 seconds later and re-triggers if the attacker is still sending traffic.
DDoS mitigation is only one use case for large flow control, others described on this blog include: ECMP / LAG load balancing, traffic marking, blacklists, and packet capture. Scripts can be added to address these different use cases, as well as providing information on network health and server performance to operations teams (see Exporting events using syslog and Metric export to Graphite)

Wednesday, January 8, 2014

Configuring Alcatel-Lucent switches

The following configuration enables sFlow monitoring of all interfaces on an Alcatel-Lucent OmniSwitch switch (10.0.0.235), sampling packets at 1-in-512, polling counters every 30 seconds and sending the sFlow to an analyzer (10.0.0.1) on UDP port 6343 (the default sFlow port):
sflow agent ip 10.0.0.235
sflow receiver 1 name InMon address 10.0.0.1 udp-port 6343
sflow sampler 1 port 1/1-20 receiver 1 rate 512
sflow poller 1 port 1/1-20 receiver 1 interval 30
The switches also support the sFlow MIB for configuration.

See Trying out sFlow for suggestions on getting started with sFlow monitoring and reporting.

Monday, January 6, 2014

OpenDaylight

This article looks takes the DDoS example and repeats it using the OpenDaylight controller.

First install Open Daylight in the Mininet testbed.
$ wget https://jenkins.opendaylight.org/controller/job/controller-merge/lastSuccessfulBuild/artifact/opendaylight/distribution/opendaylight/target/distribution.opendaylight-osgipackage.zip
unzip distribution.opendaylight-osgipackage.zip
Next start Mininet.
sudo mn --topo single,3 --controller=remote,ip=127.0.0.1
Enable sFlow on the switch:
sudo ovs-vsctl -- --id=@sflow create sflow agent=eth0  target=\"127.0.0.1:6343\" sampling=10 polling=20 -- -- set bridge s1 sflow=@sflow
Start OpenDaylight.
cd opendaylight
./run.sh
Confirm that the controller is running and has discovered the switch by connecting a browser to port 8080 on the testbed - the screen shot at the start of the article shows the OpenDaylight Devices tab with the switch 00:00:00:00:00:00:00:01 shown in the Nodes Learned list and in the map (the default credentials to log into the OpenDaylight interface are User:admin, Password:admin).

The following sFlow-RT script modified the original to use the OpenDaylight Flow Programmer REST API to push OpenFlow rules to the switch.
include('extras/json2.js');

var flowkeys = 'ipsource';
var value = 'frames';
var filter = 'outputifindex!=discard&direction=ingress&sourcegroup=external';
var threshold = 1000;
var groups = {'external':['0.0.0.0/0'],'internal':['10.0.0.2/32']};

var metricName = 'ddos';
var controls = {};
var enabled = true;
var blockSeconds = 20;
var ruleid = 0;

var flowprogrammer = 'http://127.0.0.1:8080/controller/nb/v2/flowprogrammer/default/node/OF/';
var user = 'admin';
var password = 'admin';
var bridge = '00:00:00:00:00:00:00:01';

function setOpenFlow(bridge,name,spec) {
  http(flowprogrammer+bridge+'/staticFlow/'+name,'put','application/json',
       JSON.stringify(spec),user,password);
}

function deleteOpenFlow(bridge,name) {
  http(flowprogrammer+bridge+'/staticFlow/'+name,'delete','application/json',
       null,user,password);
}

function block(address) {
  if(!controls[address]) {
     var name = 'block' + ruleid++;
     setOpenFlow(bridge,name,{installInHw:true,name:name, 
                 node:{id:bridge, type:'OF'},
                 priority:'11', etherType:'0x0800', 
                 nwSrc: address, actions:['DROP']});
     controls[address] = { name: name, action:'block', 
                           time: (new Date()).getTime() };
  }
}

function allow(address) {
  if(controls[address]) {
     deleteOpenFlow(bridge,controls[address].name);
     delete controls[address];
  }
}

setEventHandler(function(evt) {
  if(!enabled) return;

  var addr = evt.flowKey;
  block(addr);  
},[metricName]);

setIntervalHandler(function() {
  // remove stale controls
  var stale = [];
  var now = (new Date()).getTime();
  var threshMs = 1000 * blockSeconds;
  for(var addr in controls) {
    if((now - controls[addr].time) > threshMs) stale.push(addr);
  }
  for(var i = 0; i < stale.length; i++) allow(stale[i]);
},10);

setHttpHandler(function(request) {
  var result = {};
  try {
    var action = '' + request.query.action;
    switch(action) {
    case 'block':
       var address = request.query.address[0];
       if(address) block(address);
        break;
    case 'allow':
       var address = request.query.address[0];
       if(address) allow(address);
       break;
    case 'enable':
      enabled = true;
      break;
    case 'disable':
      enabled = false;
      break;
    }
  }
  catch(e) { result.error = e.message }
  result.controls = controls;
  result.enabled = enabled;
  return JSON.stringify(result);
});

setGroups(groups);
setFlow(metricName,{keys:flowkeys,value:value,filter:filter});
setThreshold(metricName,{metric:metricName,value:threshold,byFlow:true,timeout:5});
The following command line argument loads the script on startup:
-D file.script=odl.js
Repeating the simulated denial of service attack without the controller active and with the controller active shows the same results demonstrated in the previous article:
When the controller is disabled, the attack traffic exceeds 6,000 packets per second and persists until the attacker stops sending. When the controller is enabled, traffic is stopped the instant it hits the 1,000 packet per second threshold in the application. The control is removed 20 seconds later and re-triggers if the attacker is still sending traffic.

DDoS mitigation is only one use case for large flow control, others described on this blog include: ECMP / LAG load balancing, traffic marking and packet capture. This script can be modified to address these different use cases. The Mininet test bed provides a useful way to test OpenFlow control schemes before moving them into production using physical switches.

Thursday, January 2, 2014

Drivers for growth

This article examines the factors that are continuing to accelerate adoption of the sFlow measurement standard as the universal source of analytics in the data center, including: rising popularity of merchant silicon based switches, open switch operating systems and platforms, virtual switching, network virtualization, and integration of real-time sFlow analytics in orchestration stacks to create automated self-optimizing data centers.
Two years ago the article Merchant silicon described the broad adoption of the Broadcom Trident ASIC by switch vendors. This trend is picking up pace with the rapid adoption of the new Trident II ASIC (announced last year, but only available in volume this Fall). Vendors don't typically disclose when they use merchant silicon, however, based on news reports, similarities in specifications and rumors, the following switches appear to use Broadcom Trident II chipsets: Extreme Summit X770, HP 5930, Dell S6000, Cumulus HCL partners (Agema, Edge-Core, Penguin Computing and Quanta), Arista 7250X and 7500E series, Cisco Nexus 3100 and 9000 series, Juniper QFX 3500 series and Nuage 7850 VSG.
Note: While most of the Broadcom based switches listed already support sFlow, a few vendors have yet to enable the feature in their firmware. If you have, or are considering, Broadcom based switches in your data center, ask your vendor when they plan to enable sFlow. A list of switches with sFlow support is maintained on sFlow.org.
Merchant silicon lowers the barriers to entering the networking market in much the same way as standardizing on x86 compute platforms commoditized hardware and made it possible for a large number of PC manufacturers to emerge. The second component driving this trend is the availability of switch operating systems (Broadcom FASTPATHCumulus Linux, Big Switch's Switch Light Linux, Pluribus OpenNetvisor, Pica8 PicOS, etc.) that further reduce the barrier to entry. Another project to watch is the Open Compute Project's efforts to define an open switch hardware platform - if successful, it will create high volume standard hardware platform and competition between hardware vendors that will drive down hardware costs and increase the market for switch operating systems and the ecosystem of software running on those platforms - analogous to Windows and Linux running on x86 and their respective application ecosystems.
The slide from Bruce Davie's keynote address at Open Server Summit 2013, Network Virtualization: What it is, Why it Matters, shows the rapid transition from a physical edge in which physical servers are attached to physical switch ports, to a virtual edge in which virtual machines are attached to virtual switches. Second generation virtual switches are starting to enter the market, delivering increased performance and integrating support for overlays and network virtualization. In SDN market predictions for New Year: NFV, OpenFlow, Open vSwitch boom, Eric Hanselman, chief analyst at 451 Research states, "The improved scalability of Open vSwitch 2.0 will affect the numerous SDN vendors who use it as an OpenFlow agent on switches or as an endpoint in overlay technologies. These vendors include high-profile players such as VMware Inc. and startups such as Midokura and Pica8."

Accelerating adoption of virtual switching is helping to drive sFlow growth since support for the standard is integrated in virtual switches:
The article Visibility and the software defined data center describes how the sFlow standard has been extended to include not just the network, but server and application resources as well. For example, growing support for sFlow in web servers (Apache, NGINX, Tomcat) and load balancers (F5 BIG IP, HAproxy) extends visibility to include application response time, URLs, response codes etc. Best of Velocity 2012: The sFlow Standard describes how sFlow analytics integrate into the DevOps tool stack to provide scaleable, real-time monitoring of application resources.
So far this article has described the widespread support for the sFlow measurement standard within the data center infrastructure. The remainder of the article explores the rise in automation and the vital role that real-time analytics is poised to play in orchestration stacks.

As SDN solutions move from pilot to large scale deployments, attention is shifting from using SDN merely to configure networking, to optimizing performance and increasing efficiency. There is also a clear move to an integrated view of orchestration that includes networking, servers, storage and applications, going beyond SDN to create what VMware calls the Software Defined Data Center (SDDC), Cisco terms the Application Centric Infrastructure (ACI), and Microsoft refers to as the Cloud OS.

The following articles demonstrate the growing awareness among industry leaders about the importance of analytics as they develop their cloud orchestration controllers:
  1. Of Mice and Elephants by Martin Casado and Justin Pettit with input from Bruce Davie, Teemu Koponen, Brad Hedlund, Scott Lowe, and T. Sridhar - VMware
  2. Software Defined Networking on VMWare with Scott Lowe on RunAs Radio - VMware
  3. How Software-defined Networking is rewriting the rules of application delivery by Senior Vice President and General Manager, HP Networking - Hewlett-Packard
  4. Networking Without Limits: SDN by Brad Anderson, Corporate Vice President, Windows Server & System Center - Microsoft
  5. Where the puck is going: analytics by Mike Bushong - Plexxi
  6. Wandl and Cariden: Is There a Real Value? by Tom Nolle - CIMI Corp.
The article Workload placement describes this author's take on the strategic value of analytics and orchestration as a way to transform the economics of cloud computing by more densely packing workloads in the data center.
Recent breakthroughs in real-time sFlow analysis incorporated in the sFlow-RT analytics engine delivers timely, comprehensive, and actionable metrics through a programmatic interface. Expect to see this technology incorporated in next generation self optimizing orchestration solutions in 2014.
Performance Aware SDN describes the theory behind analytics driven orchestration. The talk describes how fast controller response, programmatic configuration interfaces such as OpenFlow, and consistent instrumentation of all the elements being orchestrated are pre-requisites for feedback control.
The requirement for complete measurement coverage by next generation orchestration systems will create a strong demand for sFlow instrumented infrastructure since sFlow is the only widely supported multi-vendor standard that spans network, server and application resources and delivers the low latency and scaleability required for adaptive control.