This article develops a script for automatically detecting large flows that can form the basis of a load balancing performance aware software defined networking (SDN) application. The script is based on the node.js DDoS mitigation example presented in Controlling large flows with OpenFlow. Here the script is adapted poll sFlow-RT every second link loads as well as receiving large flow events.
var fs = require('fs'); var http = require('http'); var rt = { hostname: 'localhost', port: 8008 }; var flowkeys = 'ipsource,ipdestination,udpsourceport,udpdestinationport'; var threshold = 125000; // 1 Mbit/s = 10% of link bandwidth var links = {}; // mininet mapping between sFlow ifIndex numbers and switch/port names var ifindexToPort = {}; var path = '/sys/devices/virtual/net/'; var devs = fs.readdirSync(path); for(var i = 0; i < devs.length; i++) { var dev = devs[i]; var parts = dev.match(/(.*)-(.*)/); if(!parts) continue; var ifindex = fs.readFileSync(path + dev + '/ifindex'); ifindex = parseInt(ifindex).toString(); var port = {'switch':parts[1],'port':dev}; ifindexToPort[ifindex] = port; } function extend(destination, source) { for (var property in source) { if (source.hasOwnProperty(property)) { destination[property] = source[property]; } } return destination; } function jsonGet(target,path,callback) { var options = extend({method:'GET',path:path},target); var req = http.request(options,function(resp) { var chunks = []; resp.on('data', function(chunk) { chunks.push(chunk); }); resp.on('end', function() { callback(JSON.parse(chunks.join(''))); }); }); req.end(); }; function jsonPut(target,path,value,callback) { var options = extend({method:'PUT',headers:{'content-type':'application/json'},path:path},target); var req = http.request(options,function(resp) { var chunks = []; resp.on('data', function(chunk) { chunks.push(chunk); }); resp.on('end', function() { callback(chunks.join('')); }); }); req.write(JSON.stringify(value)); req.end(); }; function getLinkRecord(agent,ifindex) { var linkkey = agent + ">" + ifindex; var rec = links[linkkey]; if(!rec) { rec = {agent:agent, ifindex:ifindex, port:ifindexToPort[ifindex]}; links[linkkey] = rec; } return rec; } function updateLinkLoads(metrics) { for(var i = 0; i < metrics.length; i++) { var metric = metrics[i]; var rec = getLinkRecord(metric.agent,metric.dsIndex); rec.total = metric.metricValue; } } function largeFlow(link,flowKey,now,dt) { console.log(now + " " + " " + dt + " " + link.port.port + " " + flowKey); } function getEvents(id) { jsonGet(rt,'/events/json?maxEvents=10&timeout=60&eventID='+ id, function(events) { var nextID = id; if(events.length > 0) { nextID = events[0].eventID; events.reverse(); var now = (new Date()).getTime(); for(var i = 0; i < events.length; i++) { var evt = events[i]; var dt = now - evt.timestamp; if('detail' == evt.thresholdID && Math.abs(dt) < 5000) { var flowKey = evt.flowKey; var rec = getLinkRecord(evt.agent,evt.dataSource); largeFlow(rec,flowKey,now,dt); } } } getEvents(nextID); } ); } function startMonitor() { getEvents(-1); setInterval(function() { jsonGet(rt,'/dump/ALL/total/json', updateLinkLoads) }, 1000); } function setTotalFlows() { jsonPut(rt,'/flow/total/json', {value:'bytes',filter:'outputifindex!=discard&direction=ingress', t:2}, function() { setDetailedFlows(); } ); } function setDetailedFlows() { jsonPut(rt,'/flow/detail/json', { keys:flowkeys, value:'bytes',filter:'outputifindex!=discard&direction=ingress', n:10, t:2 }, function() { setThreshold(); } ); } function setThreshold() { jsonPut(rt,'/threshold/detail/json', { metric:'detail', value: threshold, byFlow: true }, function() { startMonitor(); } ); } function initialize() { setTotalFlows(); } initialize();Some notes on the script:
- The script tracks total bytes per second on each link using packet sampling since this gives a low latency measurement of link utilization consistent with the flow measurements, see Measurement delay, counters vs. packet counters. The link load values are stored in the links hashmap so they are available when deciding if and how to re-reroute large flows.
- The flow keys used in this example are ipsource,ipdestination,udpsourceport,udpdestinationport in order to detect the flows generated by iperf. However, any keys can be used and multiple flow definitions could be monitored concurrently.
- The threshold is set so that any flow that consumes 10% or more of link bandwidth is reported.
- The function largeFlow() simply prints out the large flows. However, given a network topology and the link utilization data, this function could be used to implement flow steering controls.
# bash sweep2.bash 1372176102617 1372176132691 1372176162765 1372176192831 1372176222896 1372176252949 1372176283003 1372176313065 1372176343138 1372176373194The results from the flow detection script are as follows:
$ nodejs lf.js 1372176107535 2 s3-eth1 10.0.0.1,10.0.0.3,57576,5001 1372176108583 5 s2-eth3 10.0.0.1,10.0.0.3,57576,5001 1372176109488 4 s1-eth1 10.0.0.1,10.0.0.3,57576,5001 1372176134246 4 s3-eth1 10.0.0.1,10.0.0.3,48214,5001 1372176134852 4 s2-eth3 10.0.0.1,10.0.0.3,48214,5001 1372176134974 4 s1-eth1 10.0.0.1,10.0.0.3,48214,5001 1372176163792 4 s2-eth3 10.0.0.1,10.0.0.3,39956,5001 1372176164018 5 s1-eth1 10.0.0.1,10.0.0.3,39956,5001 1372176164369 4 s3-eth1 10.0.0.1,10.0.0.3,39956,5001 1372176193584 2 s3-eth1 10.0.0.1,10.0.0.3,38970,5001 1372176193631 6 s2-eth3 10.0.0.1,10.0.0.3,38970,5001 1372176193980 5 s1-eth1 10.0.0.1,10.0.0.3,38970,5001 1372176223492 4 s3-eth1 10.0.0.1,10.0.0.3,44499,5001 1372176223517 3 s2-eth3 10.0.0.1,10.0.0.3,44499,5001 1372176223595 5 s1-eth1 10.0.0.1,10.0.0.3,44499,5001 1372176253274 4 s2-eth3 10.0.0.1,10.0.0.3,39900,5001 1372176253437 3 s3-eth1 10.0.0.1,10.0.0.3,39900,5001 1372176253557 5 s1-eth1 10.0.0.1,10.0.0.3,39900,5001 1372176283485 4 s2-eth3 10.0.0.1,10.0.0.3,55620,5001 1372176283496 9 s1-eth1 10.0.0.1,10.0.0.3,55620,5001 1372176283515 2 s3-eth1 10.0.0.1,10.0.0.3,55620,5001 1372176313469 3 s3-eth1 10.0.0.1,10.0.0.3,58151,5001 1372176313509 2 s1-eth1 10.0.0.1,10.0.0.3,58151,5001 1372176313556 3 s2-eth3 10.0.0.1,10.0.0.3,58151,5001 1372176343398 6 s1-eth1 10.0.0.1,10.0.0.3,41406,5001 1372176343490 4 s2-eth3 10.0.0.1,10.0.0.3,41406,5001 1372176343589 3 s3-eth1 10.0.0.1,10.0.0.3,41406,5001 1372176373525 5 s2-eth3 10.0.0.1,10.0.0.3,44200,5001 1372176373542 5 s3-eth1 10.0.0.1,10.0.0.3,44200,5001 1372176373650 6 s1-eth1 10.0.0.1,10.0.0.3,44200,5001Note that each flow is detected on ingress by all of the links it traverses as it crosses the network (the blue path shown in the figure below).
Detecting each flow on all the links it traversed gives the controller end to end visibility and allows it to choose globally optimal paths. In this experiment, having multiple independent agents report on the flows provides useful data on the spread of detection times for each flow.
The following table summarizes Detection Time vs. Flow Size from this experiment:
Flow Size (% of link bandwidth) | Detection Time |
---|---|
10% | 4.918 - 6.871 seconds |
20% | 1.555 - 2.283 seconds |
30% | 1.027 - 1.604 seconds |
40% | 0.753 - 1.149 seconds |
50% | 0.596 - 0.699 seconds |
60% | 0.325 - 0.608 seconds |
70% | 0.482 - 0.512 seconds |
80% | 0.404 - 0.491 seconds |
90% | 0.260 - 0.451 seconds |
100% | 0.331 - 0.456 seconds |
The relatively slow time to detect the 10% flow results because the threshold was set at 10% and so these flows are on the margin. If a lower threshold had been set, they would have been detected more quickly. For flow sizes larger than 10%, the detection are between 1 and 2 seconds for flows in the range of 20% - 40% of bandwidth and detection times for larger flows is consistently sub-second.
The detection times shown in the table are achievable with the following sampling rates, see Large flow detection:
Link Speed | Large Flow | Sampling Rate | Polling Interval |
---|---|---|---|
10 Mbit/s | >= 1 Mbit/s | 1-in-10 | 20 seconds |
100 Mbit/s | >= 10 Mbit/s | 1-in-100 | 20 seconds |
1 Gbit/s | >= 100 Mbit/s | 1-in-1,000 | 20 seconds |
10 Gbit/s | >= 1 Gbit/s | 1-in-10,000 | 20 seconds |
40 Gbit/s | >= 4 Gbit/s | 1-in-40,000 | 20 seconds |
100 Gbit/s | >= 10 Gbit/s | 1-in-100,000 | 20 seconds |
These sampling rates allow a central controller to monitor very large scale switch fabrics. In addition, multiple control functions can be applied in parallel based on the sFlow data feed, see Software defined analytics. For example, implementing load balancing, mitigating denial of service attacks and capturing suspicious traffic as SDN applications.
Hi, how do you calculate/generate the detection time for each flow size? Thanks
ReplyDeleteThe load generation script prints out a timestamp each time a flow is initiated and the flow detection script prints out a timestamp when a large flow threshold event is generated for each link across the network (3 events). The range of results is the spread of times for the first and last link to detect the large flow (obtained by subtraction the event and load generation timestamps).
DeleteThis comment has been removed by the author.
ReplyDeleteThis comment has been removed by the author.
Deletewhen I run this script, error is " require is not defined",, How can I fix it?
ReplyDeleteThe script in this article is run using node.js to access the sFlow-RT REST API. The error you described would be generated if you were trying to run the script using sFlow-RT's internal JavaScript engine.
DeleteThank you.
Deletecan i get any tutorial to learn this code..
ReplyDeletethnk you.
For node.js there are a number of suggestions on StackOverflow How do I get started with Node.js.
DeleteFor sFlow-RT, see Writing Applications
Hi, i am trying to know if there is a possibility to implement these script on mininet stimulinv host? how to do it? Tank!
ReplyDelete