MINI Sh3ll
#!/usr/bin/env python3
#
# Copyright (c) 2021 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Script information:
# -------------------
# upcall_cost.py uses various user space and kernel space probes to determine
# the costs (in time) for handling the first packet in user space. It
# calculates the following costs:
#
# - Time it takes from the kernel sending the upcall till it's received by the
# ovs-vswitchd process.
# - Time it takes from ovs-vswitchd sending the execute actions command till
# the kernel receives it.
# - The total time it takes from the kernel to sent the upcall until it
# receives the packet execute command.
# - The total time of the above, minus the time it takes for the actual lookup.
#
# In addition, it will also report the number of packets batched, as OVS will
# first try to read UPCALL_MAX_BATCH(64) packets from kernel space and then
# does the flow lookups and execution. So the smaller the batch size, the more
# realistic are the cost estimates.
#
# The script does not need any options to attach to a running instance of
# ovs-vswitchd. However, it's recommended always run the script with the
# --write-events option. This way, if something does go wrong, the collected
# data is saved. Use the --help option to see all the available options.
#
# Note: In addition to the bcc tools for your specific setup, you need the
# following Python packages:
# pip install alive-progress halo psutil scapy strenum text_histogram3
#
try:
from bcc import BPF, USDT, USDTException
except ModuleNotFoundError:
print("WARNING: Can't find the BPF Compiler Collection (BCC) tools!")
print(" This is NOT problem if you analyzing previously collected"
" data.\n")
from alive_progress import alive_bar
from collections import namedtuple
from halo import Halo
from scapy.all import TCP, UDP
from scapy.layers.l2 import Ether
from strenum import StrEnum
from text_histogram3 import histogram
from time import process_time
import argparse
import ast
import psutil
import re
import struct
import subprocess
import sys
import time
#
# Global definitions
#
DP_TUNNEL_PORT = -1
#
# Actual eBPF source code
#
ebpf_source = """
#include <linux/sched.h>
#include <linux/skbuff.h>
#include <uapi/linux/bpf.h>
#define MAX_PACKET <MAX_PACKET_VAL>
#define MAX_KEY <MAX_KEY_VAL>
enum {
EVENT_RECV_UPCALL = 0,
EVENT_DP_UPCALL,
EVENT_OP_FLOW_PUT,
EVENT_OP_FLOW_EXECUTE,
EVENT_OVS_PKT_EXEC,
_EVENT_MAX_EVENT
};
#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
struct event_t {
u32 event;
u32 cpu;
u32 pid;
u32 upcall_type;
u64 ts;
u32 pkt_frag_size;
u32 pkt_size;
u64 key_size;
char comm[TASK_COMM_LEN];
char dpif_name[32];
char dev_name[16];
unsigned char pkt[MAX_PACKET];
unsigned char key[MAX_KEY];
};
BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>);
BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, _EVENT_MAX_EVENT);
static struct event_t *init_event(u32 type)
{
struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t));
if (!event) {
uint64_t *value = dropcnt.lookup(&type);
if (value)
__sync_fetch_and_add(value, 1);
return NULL;
}
event->event = type;
event->ts = bpf_ktime_get_ns();
event->cpu = bpf_get_smp_processor_id();
event->pid = bpf_get_current_pid_tgid();
bpf_get_current_comm(&event->comm, sizeof(event->comm));
return event;
}
int trace__recv_upcall(struct pt_regs *ctx) {
uint32_t upcall_type;
uint64_t addr;
uint64_t size;
bpf_usdt_readarg(2, ctx, &upcall_type);
if (upcall_type != 0)
return 0;
struct event_t *event = init_event(EVENT_RECV_UPCALL);
if (!event)
return 1;
bpf_usdt_readarg(1, ctx, &addr);
bpf_probe_read_str(&event->dpif_name, sizeof(event->dpif_name),
(void *)addr);
event->upcall_type = upcall_type;
bpf_usdt_readarg(4, ctx, &event->pkt_size);
bpf_usdt_readarg(6, ctx, &event->key_size);
if (event->pkt_size > MAX_PACKET)
size = MAX_PACKET;
else
size = event->pkt_size;
bpf_usdt_readarg(3, ctx, &addr);
bpf_probe_read(&event->pkt, size, (void *)addr);
if (event->key_size > MAX_KEY)
size = MAX_KEY;
else
size = event->key_size;
bpf_usdt_readarg(5, ctx, &addr);
bpf_probe_read(&event->key, size, (void *)addr);
events.ringbuf_submit(event, 0);
return 0;
};
int trace__op_flow_put(struct pt_regs *ctx) {
uint64_t addr;
uint64_t size;
struct event_t *event = init_event(EVENT_OP_FLOW_PUT);
if (!event) {
return 1;
}
events.ringbuf_submit(event, 0);
return 0;
};
int trace__op_flow_execute(struct pt_regs *ctx) {
uint64_t addr;
uint64_t size;
struct event_t *event = init_event(EVENT_OP_FLOW_EXECUTE);
if (!event) {
return 1;
}
bpf_usdt_readarg(4, ctx, &event->pkt_size);
if (event->pkt_size > MAX_PACKET)
size = MAX_PACKET;
else
size = event->pkt_size;
bpf_usdt_readarg(3, ctx, &addr);
bpf_probe_read(&event->pkt, size, (void *)addr);
events.ringbuf_submit(event, 0);
return 0;
};
TRACEPOINT_PROBE(openvswitch, ovs_dp_upcall) {
uint64_t size;
struct sk_buff *skb = args->skbaddr;
if (args->upcall_cmd != 1 || skb == NULL || skb->data == NULL)
return 0;
struct event_t *event = init_event(EVENT_DP_UPCALL);
if (!event) {
return 1;
}
event->upcall_type = args->upcall_cmd;
event->pkt_size = args->len;
TP_DATA_LOC_READ_CONST(&event->dpif_name, dp_name,
sizeof(event->dpif_name));
TP_DATA_LOC_READ_CONST(&event->dev_name, dev_name,
sizeof(event->dev_name));
if (skb->data_len != 0) {
event->pkt_frag_size = (skb->len - skb->data_len) & 0xfffffff;
size = event->pkt_frag_size;
} else {
event->pkt_frag_size = 0;
size = event->pkt_size;
}
/* Prevent clang from using register mirroring (or any optimization) on
* the 'size' variable. */
barrier_var(size);
if (size > MAX_PACKET)
size = MAX_PACKET;
bpf_probe_read_kernel(event->pkt, size, skb->data);
events.ringbuf_submit(event, 0);
return 0;
}
int kprobe__ovs_packet_cmd_execute(struct pt_regs *ctx, struct sk_buff *skb)
{
uint64_t size;
if (skb == NULL || skb->data == NULL)
return 0;
struct event_t *event = init_event(EVENT_OVS_PKT_EXEC);
if (!event) {
return 1;
}
events.ringbuf_submit(event, 0);
return 0;
}
"""
#
# Event types
#
class EventType(StrEnum):
RECV_UPCALL = 'dpif_recv__recv_upcall'
DP_UPCALL = 'openvswitch__dp_upcall'
OP_FLOW_PUT = 'dpif_netlink_operate__op_flow_put'
OP_FLOW_EXECUTE = 'dpif_netlink_operate__op_flow_execute'
OVS_PKT_EXEC = 'ktrace__ovs_packet_cmd_execute'
def short_name(name, length=22):
if len(name) < length:
return name
return '..' + name[-(length - 2):]
def from_trace(trace_event):
if trace_event == 0:
return EventType.RECV_UPCALL
elif trace_event == 1:
return EventType.DP_UPCALL
elif trace_event == 2:
return EventType.OP_FLOW_PUT
elif trace_event == 3:
return EventType.OP_FLOW_EXECUTE
elif trace_event == 4:
return EventType.OVS_PKT_EXEC
raise ValueError
#
# Simple event class
#
class Event(object):
def __init__(self, ts, pid, comm, cpu, event_type):
self.ts = ts
self.pid = pid
self.comm = comm
self.cpu = cpu
self.event_type = event_type
def __str__(self):
return "[{:<22}] {:<16} {:8} [{:03}] {:18.9f}".format(
EventType.short_name(self.event_type),
self.comm,
self.pid,
self.cpu,
self.ts / 1000000000)
def __repr__(self):
more = ""
if self.__class__.__name__ != "Event":
more = ", ..."
return "{}({}, {}, {}, {}, {}{})".format(self.__class__.__name__,
self.ts, self.pid,
self.comm, self.cpu,
self.event_type, more)
def handle_event(event):
event = Event(event.ts, event.pid, event.comm.decode("utf-8"),
event.cpu, EventType.from_trace(event.event))
if not options.quiet:
print(event)
return event
def get_event_header_str():
return "{:<24} {:<16} {:>8} {:<3} {:<18} {}".format(
"EVENT", "COMM", "PID", "CPU", "TIME",
"EVENT DATA[dpif_name/dp_port/pkt_len/pkt_frag_len]")
#
# dp_upcall event class
#
class DpUpcall(Event):
def __init__(self, ts, pid, comm, cpu, dpif_name, port, pkt, pkt_len,
pkt_frag_len):
super(DpUpcall, self).__init__(ts, pid, comm, cpu, EventType.DP_UPCALL)
self.dpif_name = dpif_name
self.dp_port = get_dp_mapping(dpif_name, port)
if self.dp_port is None:
#
# As we only identify interfaces at startup, new interfaces could
# have been added, causing the lookup to fail. Just something to
# keep in mind when running this in a dynamic environment.
#
raise LookupError("Can't find datapath port mapping!")
self.pkt = pkt
self.pkt_len = pkt_len
self.pkt_frag_len = pkt_frag_len
def __str__(self):
return "[{:<22}] {:<16} {:8} [{:03}] {:18.9f}: " \
"{:<17} {:4} {:4} {:4}".format(self.event_type,
self.comm,
self.pid,
self.cpu,
self.ts / 1000000000,
self.dpif_name,
self.dp_port,
self.pkt_len,
self.pkt_frag_len)
def handle_event(event):
if event.pkt_size < options.packet_size:
pkt_len = event.pkt_size
else:
pkt_len = options.packet_size
pkt_data = bytes(event.pkt)[:pkt_len]
if len(pkt_data) <= 0 or event.pkt_size == 0:
return
try:
event = DpUpcall(event.ts, event.pid, event.comm.decode("utf-8"),
event.cpu, event.dpif_name.decode("utf-8"),
event.dev_name.decode("utf-8"),
pkt_data,
event.pkt_size,
event.pkt_frag_size)
except LookupError:
#
# If we can't do the port lookup, ignore this event.
#
return None
if not options.quiet:
print(event)
return event
#
# recv_upcall event class
#
class RecvUpcall(Event):
def __init__(self, ts, pid, comm, cpu, dpif_name, key, pkt, pkt_len):
super(RecvUpcall, self).__init__(ts, pid, comm, cpu,
EventType.RECV_UPCALL)
if dpif_name.startswith("system@"):
dpif_name = dpif_name[len("system@"):]
self.dpif_name = dpif_name
nla = RecvUpcall.decode_nlm(key, dump=False)
if "OVS_KEY_ATTR_IN_PORT" in nla:
self.dp_port = struct.unpack('=L', nla["OVS_KEY_ATTR_IN_PORT"])[0]
elif "OVS_KEY_ATTR_TUNNEL" in nla:
self.dp_port = DP_TUNNEL_PORT
else:
self.dp_port = RecvUpcall.get_system_dp_port(self.dpif_name)
if self.dp_port is None:
raise LookupError("Can't find RecvUpcall dp port mapping!")
self.pkt = pkt
self.pkt_len = pkt_len
def __str__(self):
return "[{:<22}] {:<16} {:8} [{:03}] {:18.9f}: {:<17} {:4} {:4}". \
format(
self.event_type,
self.comm,
self.pid,
self.cpu,
self.ts / 1000000000,
self.dpif_name,
self.dp_port,
self.pkt_len)
def get_system_dp_port(dpif_name):
dp_map = get_dp_mapping(dpif_name, "ovs-system", return_map=True)
if dpif_name not in dp_map:
return None
try:
return dp_map[dpif_name]["ovs-system"]
except KeyError:
return None
def decode_nlm(msg, indent=4, dump=True):
bytes_left = len(msg)
result = {}
while bytes_left:
if bytes_left < 4:
if dump:
print("{}WARN: decode truncated; can't read header".format(
' ' * indent))
break
nla_len, nla_type = struct.unpack("=HH", msg[:4])
if nla_len < 4:
if dump:
print("{}WARN: decode truncated; nla_len < 4".format(
' ' * indent))
break
nla_data = msg[4:nla_len]
trunc = ""
if nla_len > bytes_left:
trunc = "..."
nla_data = nla_data[:(bytes_left - 4)]
if RecvUpcall.get_ovs_key_attr_str(nla_type) == \
"OVS_KEY_ATTR_TUNNEL":
#
# If we have truncated tunnel information, we still would
# like to know. This is due to the special tunnel handling
# needed for port matching.
#
result[RecvUpcall.get_ovs_key_attr_str(nla_type)] = bytes()
else:
result[RecvUpcall.get_ovs_key_attr_str(nla_type)] = nla_data
if dump:
print("{}nla_len {}, nla_type {}[{}], data: {}{}".format(
' ' * indent, nla_len,
RecvUpcall.get_ovs_key_attr_str(nla_type),
nla_type,
"".join("{:02x} ".format(b) for b in nla_data), trunc))
if trunc != "":
if dump:
print("{}WARN: decode truncated; nla_len > msg_len[{}] ".
format(' ' * indent, bytes_left))
break
# Update next offset, but make sure it's aligned correctly.
next_offset = (nla_len + 3) & ~(3)
msg = msg[next_offset:]
bytes_left -= next_offset
return result
def get_ovs_key_attr_str(attr):
ovs_key_attr = ["OVS_KEY_ATTR_UNSPEC",
"OVS_KEY_ATTR_ENCAP",
"OVS_KEY_ATTR_PRIORITY",
"OVS_KEY_ATTR_IN_PORT",
"OVS_KEY_ATTR_ETHERNET",
"OVS_KEY_ATTR_VLAN",
"OVS_KEY_ATTR_ETHERTYPE",
"OVS_KEY_ATTR_IPV4",
"OVS_KEY_ATTR_IPV6",
"OVS_KEY_ATTR_TCP",
"OVS_KEY_ATTR_UDP",
"OVS_KEY_ATTR_ICMP",
"OVS_KEY_ATTR_ICMPV6",
"OVS_KEY_ATTR_ARP",
"OVS_KEY_ATTR_ND",
"OVS_KEY_ATTR_SKB_MARK",
"OVS_KEY_ATTR_TUNNEL",
"OVS_KEY_ATTR_SCTP",
"OVS_KEY_ATTR_TCP_FLAGS",
"OVS_KEY_ATTR_DP_HASH",
"OVS_KEY_ATTR_RECIRC_ID",
"OVS_KEY_ATTR_MPLS",
"OVS_KEY_ATTR_CT_STATE",
"OVS_KEY_ATTR_CT_ZONE",
"OVS_KEY_ATTR_CT_MARK",
"OVS_KEY_ATTR_CT_LABELS",
"OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4",
"OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6",
"OVS_KEY_ATTR_NSH"]
if attr < 0 or attr > len(ovs_key_attr):
return "<UNKNOWN>"
return ovs_key_attr[attr]
def handle_event(event):
#
# For us, only upcalls with a packet, flow_key, and upcall_type
# DPIF_UC_MISS are of interest.
#
if event.pkt_size <= 0 or event.key_size <= 0 or \
event.upcall_type != 0:
return
if event.key_size < options.flow_key_size:
key_len = event.key_size
else:
key_len = options.flow_key_size
if event.pkt_size < options.packet_size:
pkt_len = event.pkt_size
else:
pkt_len = options.packet_size
try:
event = RecvUpcall(event.ts, event.pid, event.comm.decode("utf-8"),
event.cpu, event.dpif_name.decode("utf-8"),
bytes(event.key)[:key_len],
bytes(event.pkt)[:pkt_len],
event.pkt_size)
except LookupError:
return None
if not options.quiet:
print(event)
return event
#
# op_flow_execute event class
#
class OpFlowExecute(Event):
def __init__(self, ts, pid, comm, cpu, pkt, pkt_len):
super(OpFlowExecute, self).__init__(ts, pid, comm, cpu,
EventType.OP_FLOW_EXECUTE)
self.pkt = pkt
self.pkt_len = pkt_len
def __str__(self):
return "[{:<22}] {:<16} {:8} [{:03}] {:18.9f}: " \
"{:<17} {:4} {:4}".format(EventType.short_name(self.event_type),
self.comm,
self.pid,
self.cpu,
self.ts / 1000000000,
"",
"",
self.pkt_len)
def handle_event(event):
if event.pkt_size < options.packet_size:
pkt_len = event.pkt_size
else:
pkt_len = options.packet_size
pkt_data = bytes(event.pkt)[:pkt_len]
if len(pkt_data) <= 0 or event.pkt_size == 0:
return
event = OpFlowExecute(event.ts, event.pid, event.comm.decode("utf-8"),
event.cpu, pkt_data, event.pkt_size)
if not options.quiet:
print(event)
return event
#
# get_dp_mapping()
#
def get_dp_mapping(dp, port, return_map=False, dp_map=None):
if options.unit_test:
return port
if dp_map is not None:
get_dp_mapping.dp_port_map_cache = dp_map
#
# Build a cache, so we do not have to execue the ovs command each time.
#
if not hasattr(get_dp_mapping, "dp_port_map_cache"):
try:
output = subprocess.check_output(['ovs-appctl', 'dpctl/show'],
encoding='utf8').split("\n")
except subprocess.CalledProcessError:
output = ""
pass
current_dp = None
get_dp_mapping.dp_port_map_cache = {}
for line in output:
match = re.match("^system@(.*):$", line)
if match is not None:
current_dp = match.group(1)
match = re.match("^ port ([0-9]+): ([^ /]*)", line)
if match is not None and current_dp is not None:
try:
get_dp_mapping.dp_port_map_cache[
current_dp][match.group(2)] = int(match.group(1))
except KeyError:
get_dp_mapping.dp_port_map_cache[current_dp] = \
{match.group(2): int(match.group(1))}
if return_map:
return get_dp_mapping.dp_port_map_cache
if dp not in get_dp_mapping.dp_port_map_cache or \
port not in get_dp_mapping.dp_port_map_cache[dp]:
return None
return get_dp_mapping.dp_port_map_cache[dp][port]
#
# event_to_dict()
#
def event_to_dict(event):
event_dict = {}
for field, _ in event._fields_:
if isinstance(getattr(event, field), (int, bytes)):
event_dict[field] = getattr(event, field)
else:
if (field == "key" and event.key_size == 0) or \
(field == "pkt" and event.pkt_size == 0):
data = bytes()
else:
data = bytes(getattr(event, field))
event_dict[field] = data
return event_dict
#
# receive_event_bcc()
#
def receive_event_bcc(ctx, data, size):
global events_received
events_received += 1
event = b['events'].event(data)
if export_file is not None:
export_file.write("event = {}\n".format(event_to_dict(event)))
receive_event(event)
#
# receive_event()
#
def receive_event(event):
global event_count
if event.event == 0:
trace_event = RecvUpcall.handle_event(event)
elif event.event == 1:
trace_event = DpUpcall.handle_event(event)
elif event.event == 2:
trace_event = Event.handle_event(event)
elif event.event == 3:
trace_event = OpFlowExecute.handle_event(event)
elif event.event == 4:
trace_event = Event.handle_event(event)
try:
event_count['total'][EventType.from_trace(event.event)] += 1
except KeyError:
event_count['total'][EventType.from_trace(event.event)] = 1
event_count['valid'][EventType.from_trace(event.event)] = 0
if trace_event is not None:
event_count['valid'][EventType.from_trace(event.event)] += 1
trace_data.append(trace_event)
#
# collect_event_sets()
#
def collect_event_sets(events, collect_stats=False, profile=False,
spinner=False):
t1_time = 0
def t1_start():
nonlocal t1_time
t1_time = process_time()
def t1_stop(description):
print("* PROFILING: {:<50}: {:.06f} seconds".format(
description, process_time() - t1_time))
warn_parcial_match = False
warn_frag = False
if profile:
t1_start()
#
# First let's create a dict of per handler thread events.
#
threads = {}
threads_result = {}
for idx, event in enumerate(events):
if event.event_type == EventType.DP_UPCALL:
continue
if event.pid not in threads:
threads[event.pid] = []
threads[event.pid].append([idx, event])
if profile:
t1_stop("Creating per thread dictionary")
t1_start()
#
# Now spit them in per upcall sets, but remember that
# RecvUpcall event can be batched.
#
batch_stats = []
for thread, items in threads.items():
thread_set = []
batch = []
ovs_pkt_exec_set = []
batching = True
collecting = 0
has_flow_put = False
has_flow_exec = False
def next_batch():
nonlocal batching, batch, collecting, has_flow_put, has_flow_exec
nonlocal ovs_pkt_exec_set, thread_set
if len(batch) > 0:
#
# If we are done with the batch, see if we need to match up
# any batched OVS_PKT_EXEC events.
#
for event in batch:
if len(ovs_pkt_exec_set) <= 0:
break
if any(isinstance(item,
OpFlowExecute) for item in event[2]):
event[2].append(ovs_pkt_exec_set.pop(0))
#
# Append the batch to the thread-specific set.
#
thread_set = thread_set + batch
if collect_stats:
batch_stats.append(len(batch))
batching = True
batch = []
ovs_pkt_exec_set = []
has_flow_put = False
has_flow_exec = False
collecting = 0
def next_batch_set():
nonlocal has_flow_put, has_flow_exec, collecting
has_flow_put = False
has_flow_exec = False
collecting += 1
for item in items:
idx, event = item
if batching:
if event.event_type == EventType.RECV_UPCALL:
batch.append(item + [[]])
elif len(batch) > 0:
batching = False
collecting = 0
else:
continue
if not batching:
if event.event_type == EventType.RECV_UPCALL:
next_batch()
batch.append(item + [[]])
else:
if event.event_type == EventType.OP_FLOW_PUT:
if has_flow_put:
next_batch_set()
if collecting >= len(batch):
next_batch()
continue
batch[collecting][2].append(item[1])
has_flow_put = True
elif event.event_type == EventType.OP_FLOW_EXECUTE:
if has_flow_exec:
next_batch_set()
if collecting >= len(batch):
next_batch()
continue
if (event.pkt_len == batch[collecting][1].pkt_len
and event.pkt == batch[collecting][1].pkt):
batch[collecting][2].append(item[1])
has_flow_put = True
has_flow_exec = True
else:
#
# If we end up here it could be that an upcall in a
# batch did not generate an EXECUTE and we are out
# of sync. Try to match it to the next batch entry.
#
next_idx = collecting + 1
while True:
if next_idx >= len(batch):
next_batch()
break
if (event.pkt_len == batch[next_idx][1].pkt_len
and event.pkt == batch[next_idx][1].pkt):
batch[next_idx][2] = batch[collecting][2]
batch[collecting][2] = []
collecting = next_idx
batch[collecting][2].append(item[1])
has_flow_put = True
has_flow_exec = True
break
next_idx += 1
elif event.event_type == EventType.OVS_PKT_EXEC:
#
# The OVS_PKT_EXEC might also be batched, so we keep
# them in a separate list and assign them to the
# correct set when completing the set.
#
ovs_pkt_exec_set.append(item[1])
continue
if collecting >= len(batch):
next_batch()
next_batch()
threads_result[thread] = thread_set
if profile:
t1_stop("Creating upcall sets")
t1_start()
#
# Move thread results from list to dictionary
#
thread_stats = {}
for thread, sets in threads_result.items():
if len(sets) > 0:
thread_stats[sets[0][1].comm] = len(sets)
threads_result[thread] = {}
for upcall in sets:
threads_result[thread][upcall[0]] = [upcall[1]] + upcall[2]
if profile:
t1_stop("Moving upcall list to dictionary")
t1_start()
if options.debug & 0x4000000 != 0:
print()
for thread, sets in threads_result.items():
for idx, idx_set in sets.items():
print("DBG: {}".format(idx_set))
#
# Create two lists on with DP_UPCALLs and RECV_UPCALLs
#
dp_upcall_list = []
recv_upcall_list = []
for idx, event in enumerate(events):
if event.event_type == EventType.DP_UPCALL:
dp_upcall_list.append([idx, event])
elif event.event_type == EventType.RECV_UPCALL:
recv_upcall_list.append([idx, event])
if profile:
t1_stop("Creating DP_UPCALL and RECV_UPCALL lists")
t1_start()
if options.debug & 0x4000000 != 0:
print()
for dp_upcall in dp_upcall_list:
print("DBG: {}".format(dp_upcall))
print()
for recv_upcall in recv_upcall_list:
print("DBG: {}".format(recv_upcall))
#
# Now find the matching DP_UPCALL and RECV_UPCALL events
#
event_sets = []
if spinner:
print()
with alive_bar(len(dp_upcall_list),
title="- Matching DP_UPCALLs to RECV_UPCALLs",
spinner=None, disable=not spinner) as bar:
for (idx, event) in dp_upcall_list:
remove_indexes = []
this_set = None
#
# TODO: This part needs some optimization, as it's slow in the
# PVP test scenario. This is because a lot of DP_UPCALLS
# will not have a matching RECV_UPCALL leading to walking
# the entire recv_upcall_list list.
#
# Probably some dictionary, but in the PVP scenario packets
# come from a limited set of ports, and the length is all the
# same. So we do need the key to be recv.dport +
# len(recv.pkt) + recv.pkt, however, the recv.pkt compare
# needs to happen on min(len(event.pkt), len(recv.pkt)).
#
for idx_in_list, (idx_recv, recv) in enumerate(recv_upcall_list):
match = False
if idx_recv < idx:
remove_indexes.append(idx_in_list)
continue
#
# If the RecvUpcall is a tunnel port, we can not map it to
# the correct tunnel. For now, we assume the first matching
# packet is the correct one. For more details see the OVS
# ukey_to_flow_netdev() function.
#
if (event.dp_port == recv.dp_port or
recv.dp_port == DP_TUNNEL_PORT) \
and event.pkt_len == recv.pkt_len:
compare_len = min(len(event.pkt), len(recv.pkt))
if len(event.pkt) != len(recv.pkt) \
and event.pkt_frag_len == 0:
warn_parcial_match = True
elif event.pkt_frag_len != 0:
warn_frag = True
compare_len = min(compare_len, event.pkt_frag_len)
if event.pkt[:compare_len] == recv.pkt[:compare_len]:
match = True
else:
#
# There are still some corner cases due to the fact
# the kernel dp_upcall tracepoint is hit before the
# packet is prepared/modified for upcall pass on.
# Example cases are packet checksum update, VLAN
# insertion, etc., etc. For now, we try to handle the
# checksum part, but we might need to add more
# exceptions in the future.
#
diff_bytes = sum(i != j for i, j in zip(
event.pkt[:compare_len], recv.pkt[:compare_len]))
if diff_bytes <= 2 and compare_len > 56:
# This could be a TCP or UDP checksum
event_pkt = Ether(bytes(event.pkt)[:compare_len])
recv_pkt = Ether(bytes(recv.pkt)[:compare_len])
if (event_pkt.haslayer(TCP) and
recv_pkt.haslayer(TCP)) or (
event_pkt.haslayer(UDP) and
recv_pkt.haslayer(UDP)):
if event_pkt.haslayer(TCP):
event_chksum = event_pkt[TCP].chksum
recv_chksum = recv_pkt[TCP].chksum
else:
event_chksum = event_pkt[UDP].chksum
recv_chksum = recv_pkt[UDP].chksum
if event_chksum & 0xff != recv_chksum & 0xff:
diff_bytes -= 1
if event_chksum & 0xff00 != \
recv_chksum & 0xff00:
diff_bytes -= 1
if diff_bytes == 0:
match = True
if match:
this_set = {event.event_type: event}
for sevent in threads_result[recv.pid][idx_recv]:
this_set[sevent.event_type] = sevent
event_sets.append(this_set)
remove_indexes.append(idx_in_list)
if options.debug & 0x4000000 != 0:
print("DBG: Matched DpUpcall({:6}) => "
"RecvUpcall({:6})".format(idx, idx_recv))
break
elif options.debug & 0x8000000 != 0:
print("DBG: COMPARE DpUpcall({:6}) != "
"RecvUpcall({:6})".format(idx, idx_recv))
event_pkt = Ether(bytes(event.pkt)[:compare_len])
recv_pkt = Ether(bytes(recv.pkt)[:compare_len])
print(re.sub('^', 'DBG:' + ' ' * 4,
event_pkt.show(dump=True),
flags=re.MULTILINE))
print(re.sub('^', 'DBG:' + ' ' * 4,
recv_pkt.show(dump=True),
flags=re.MULTILINE))
elif options.debug & 0x8000000 != 0:
print("DBG: COMPATE DpUpcall({:6}) != "
"RecvUpcall({:6}) -> port {}, {} -> "
"len = {}, {}".format(idx, idx_recv,
event.dp_port,
recv.dp_port,
event.pkt_len,
recv.pkt_len))
bar()
for remove_idx in sorted(remove_indexes, reverse=True):
del recv_upcall_list[remove_idx]
if profile:
t1_stop("Matching DP_UPCALLs to a set")
if warn_parcial_match:
print("WARNING: Packets not fully captured for matching!\n "
"Increase the packet buffer with the '--packet-size' option.")
if warn_frag:
print("WARNING: SKB from kernel had fragments, we could only copy/"
"compare the first part!")
if collect_stats:
return event_sets, batch_stats, thread_stats
return event_sets
#
# unit_test()
#
def unit_test():
pkt1 = b'\x01\x02\x03\x04\x05'
pkt2 = b'\x01\x02\x03\x04\x06'
pkt3 = b'\x01\x02\x03\x04\x07'
key = b'\x08\x00\x03\x00\x01\x00\x00\x00' # Port 1
#
# Basic test with all events in line
#
t1_events = [DpUpcall(1, 100, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
RecvUpcall(2, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
Event(3, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(4, 1, "hndl", 1, pkt1, len(pkt1)),
Event(5, 1, "hndl", 1, EventType.OVS_PKT_EXEC)]
t1_result = [{EventType.DP_UPCALL: t1_events[0],
EventType.RECV_UPCALL: t1_events[1],
EventType.OP_FLOW_PUT: t1_events[2],
EventType.OP_FLOW_EXECUTE: t1_events[3],
EventType.OVS_PKT_EXEC: t1_events[4]}]
#
# Basic test with missing flow put
#
t2_events = [DpUpcall(1, 100, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
RecvUpcall(2, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
OpFlowExecute(4, 1, "hndl", 1, pkt1, len(pkt1)),
Event(5, 1, "hndl", 1, EventType.OVS_PKT_EXEC)]
t2_result = [{EventType.DP_UPCALL: t2_events[0],
EventType.RECV_UPCALL: t2_events[1],
EventType.OP_FLOW_EXECUTE: t2_events[2],
EventType.OVS_PKT_EXEC: t2_events[3]}]
#
# Test with RecvUpcall's being batched
#
t3_events = [DpUpcall(1, 101, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
DpUpcall(2, 102, "ping", 2, "system", 1, pkt2, len(pkt2), 0),
DpUpcall(3, 101, "ping", 3, "system", 1, pkt3, len(pkt3), 0),
RecvUpcall(4, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
RecvUpcall(5, 1, "hndl", 1, "systen", key, pkt3, len(pkt3)),
RecvUpcall(6, 1, "hndl", 1, "systen", key, pkt2, len(pkt2)),
Event(7, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(8, 1, "hndl", 1, pkt1, len(pkt1)),
Event(9, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(10, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(11, 1, "hndl", 1, pkt3, len(pkt3)),
Event(12, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(13, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(14, 1, "hndl", 1, pkt2, len(pkt2)),
Event(15, 1, "hndl", 1, EventType.OVS_PKT_EXEC)]
t3_result = [{EventType.DP_UPCALL: t3_events[0],
EventType.RECV_UPCALL: t3_events[3],
EventType.OP_FLOW_PUT: t3_events[6],
EventType.OP_FLOW_EXECUTE: t3_events[7],
EventType.OVS_PKT_EXEC: t3_events[8]},
{EventType.DP_UPCALL: t3_events[1],
EventType.RECV_UPCALL: t3_events[5],
EventType.OP_FLOW_PUT: t3_events[12],
EventType.OP_FLOW_EXECUTE: t3_events[13],
EventType.OVS_PKT_EXEC: t3_events[14]},
{EventType.DP_UPCALL: t3_events[2],
EventType.RECV_UPCALL: t3_events[4],
EventType.OP_FLOW_PUT: t3_events[9],
EventType.OP_FLOW_EXECUTE: t3_events[10],
EventType.OVS_PKT_EXEC: t3_events[11]}]
#
# Test with RecvUpcall's single + batch
#
t4_events = [DpUpcall(1, 100, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
RecvUpcall(2, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
Event(3, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(4, 1, "hndl", 1, pkt1, len(pkt1)),
Event(5, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
DpUpcall(6, 101, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
DpUpcall(7, 102, "ping", 2, "system", 1, pkt2, len(pkt2), 0),
DpUpcall(8, 101, "ping", 3, "system", 1, pkt3, len(pkt3), 0),
RecvUpcall(9, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
RecvUpcall(10, 1, "hndl", 1, "systen", key, pkt3, len(pkt3)),
RecvUpcall(11, 1, "hndl", 1, "systen", key, pkt2, len(pkt2)),
Event(12, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(13, 1, "hndl", 1, pkt1, len(pkt1)),
Event(14, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(15, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(16, 1, "hndl", 1, pkt3, len(pkt3)),
Event(17, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(18, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(14, 1, "hndl", 1, pkt2, len(pkt2)),
Event(19, 1, "hndl", 1, EventType.OVS_PKT_EXEC)]
t4_result = [{EventType.DP_UPCALL: t4_events[0],
EventType.RECV_UPCALL: t4_events[1],
EventType.OP_FLOW_PUT: t4_events[2],
EventType.OP_FLOW_EXECUTE: t4_events[3],
EventType.OVS_PKT_EXEC: t4_events[4]},
{EventType.DP_UPCALL: t4_events[5],
EventType.RECV_UPCALL: t4_events[8],
EventType.OP_FLOW_PUT: t4_events[11],
EventType.OP_FLOW_EXECUTE: t4_events[12],
EventType.OVS_PKT_EXEC: t4_events[13]},
{EventType.DP_UPCALL: t4_events[6],
EventType.RECV_UPCALL: t4_events[10],
EventType.OP_FLOW_PUT: t4_events[17],
EventType.OP_FLOW_EXECUTE: t4_events[18],
EventType.OVS_PKT_EXEC: t4_events[19]},
{EventType.DP_UPCALL: t4_events[7],
EventType.RECV_UPCALL: t4_events[9],
EventType.OP_FLOW_PUT: t4_events[14],
EventType.OP_FLOW_EXECUTE: t4_events[15],
EventType.OVS_PKT_EXEC: t4_events[16]}]
#
# Test with two threads interleaved
#
t5_events = [DpUpcall(1, 100, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
DpUpcall(2, 100, "ping", 1, "system", 1, pkt2, len(pkt2), 0),
RecvUpcall(3, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
RecvUpcall(4, 2, "hndl", 2, "systen", key, pkt2, len(pkt2)),
Event(5, 2, "hndl", 2, EventType.OP_FLOW_PUT),
Event(6, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(7, 2, "hndl", 1, pkt2, len(pkt2)),
OpFlowExecute(8, 1, "hndl", 1, pkt1, len(pkt1)),
Event(9, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(10, 2, "hndl", 1, EventType.OVS_PKT_EXEC)]
t5_result = [{EventType.DP_UPCALL: t5_events[0],
EventType.RECV_UPCALL: t5_events[2],
EventType.OP_FLOW_PUT: t5_events[5],
EventType.OP_FLOW_EXECUTE: t5_events[7],
EventType.OVS_PKT_EXEC: t5_events[8]},
{EventType.DP_UPCALL: t5_events[1],
EventType.RECV_UPCALL: t5_events[3],
EventType.OP_FLOW_PUT: t5_events[4],
EventType.OP_FLOW_EXECUTE: t5_events[6],
EventType.OVS_PKT_EXEC: t5_events[9]}]
#
# Test batch with missing events
#
t6_events = [DpUpcall(1, 101, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
DpUpcall(2, 102, "ping", 2, "system", 1, pkt2, len(pkt2), 0),
RecvUpcall(3, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
RecvUpcall(4, 1, "hndl", 1, "systen", key, pkt2, len(pkt2)),
Event(5, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(6, 1, "hndl", 1, pkt2, len(pkt2)),
Event(7, 1, "hndl", 1, EventType.OVS_PKT_EXEC)]
t6_result = [{EventType.DP_UPCALL: t6_events[0],
EventType.RECV_UPCALL: t6_events[2]},
{EventType.DP_UPCALL: t6_events[1],
EventType.RECV_UPCALL: t6_events[3],
EventType.OP_FLOW_PUT: t6_events[4],
EventType.OP_FLOW_EXECUTE: t6_events[5],
EventType.OVS_PKT_EXEC: t6_events[6]}]
#
# Test with RecvUpcall's and OVS_PKT_EXEC being batched
#
t7_events = [DpUpcall(1, 101, "ping", 1, "system", 1, pkt1, len(pkt1), 0),
DpUpcall(2, 102, "ping", 2, "system", 1, pkt2, len(pkt2), 0),
DpUpcall(3, 101, "ping", 3, "system", 1, pkt3, len(pkt3), 0),
RecvUpcall(4, 1, "hndl", 1, "systen", key, pkt1, len(pkt1)),
RecvUpcall(5, 1, "hndl", 1, "systen", key, pkt2, len(pkt2)),
RecvUpcall(6, 1, "hndl", 1, "systen", key, pkt3, len(pkt3)),
Event(7, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(8, 1, "hndl", 1, pkt1, len(pkt1)),
Event(9, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(10, 1, "hndl", 1, pkt2, len(pkt2)),
Event(11, 1, "hndl", 1, EventType.OP_FLOW_PUT),
OpFlowExecute(12, 1, "hndl", 1, pkt3, len(pkt3)),
Event(13, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(14, 1, "hndl", 1, EventType.OVS_PKT_EXEC),
Event(15, 1, "hndl", 1, EventType.OVS_PKT_EXEC)]
t7_result = [{EventType.DP_UPCALL: t7_events[0],
EventType.RECV_UPCALL: t7_events[3],
EventType.OP_FLOW_PUT: t7_events[6],
EventType.OP_FLOW_EXECUTE: t7_events[7],
EventType.OVS_PKT_EXEC: t7_events[12]},
{EventType.DP_UPCALL: t7_events[1],
EventType.RECV_UPCALL: t7_events[4],
EventType.OP_FLOW_PUT: t7_events[8],
EventType.OP_FLOW_EXECUTE: t7_events[9],
EventType.OVS_PKT_EXEC: t7_events[13]},
{EventType.DP_UPCALL: t7_events[2],
EventType.RECV_UPCALL: t7_events[5],
EventType.OP_FLOW_PUT: t7_events[10],
EventType.OP_FLOW_EXECUTE: t7_events[11],
EventType.OVS_PKT_EXEC: t7_events[14]}]
#
# Actual test sets
#
test_set = [["Simple single event", t1_events, t1_result],
["Single event, missing flow_put", t2_events, t2_result],
["Batched events", t3_events, t3_result],
["Single + batched events", t4_events, t4_result],
["Two sets, different threads", t5_events, t5_result],
["Batch with missing exec", t6_events, t6_result],
["Batched events including exec", t7_events, t7_result]]
print("Running some simple unit tests:")
for test in test_set:
print("- {:<32} ".format(test[0]), end="")
result = collect_event_sets(test[1][:])
if result == test[2]:
print("PASS")
else:
print("FAIL")
print(" OUTPUT :")
for event_set in result:
hdr = " - "
for event_type, event in event_set.items():
print("{} {:<16}: {}".format(hdr, event_type.name, event))
hdr = " "
print(" EXPECTED:")
for event_set in test[2]:
hdr = " - "
for event_type, event in event_set.items():
print("{} {:<16}: {}".format(hdr, event_type.name, event))
hdr = " "
#
# show_key_value()
#
def show_key_value(data_set, description=None):
if description is not None:
print("\n=> {}:".format(description))
for k, v in data_set.items():
print(" {:36}: {:>10}".format(str(k), str(v)))
#
# show_batch_histogram()
#
def show_batch_histogram(data_set, description=None):
nr_of_buckets = 64
if description is not None:
print("\n=> {}:".format(description))
if len(data_set) == 0:
print("# NumSamples = 0")
return
min_val = nr_of_buckets
max_val = 0
entries = 0
high_buckets = 0
buckets = [0] * nr_of_buckets
for entry in data_set:
min_val = min(min_val, entry)
max_val = max(max_val, entry)
if entry == 0:
continue
elif entry > nr_of_buckets:
high_buckets += 1
else:
buckets[entry - 1] += 1
entries += 1
if max(buckets + [high_buckets]) > 4:
scale = int(max(buckets + [high_buckets]) / 4)
else:
scale = 1
print("# NumSamples = {}; Min = {}; Max = {}".format(entries, min_val,
max_val))
print("# each ∎ represents a count of {}".format(scale))
for idx in range(int(nr_of_buckets / 2)):
idx_2nd = idx + int(nr_of_buckets / 2)
print("{:5} [{:8}]: {:22} {:5} [{:8}]: {:22}".format(
idx + 1, buckets[idx], "∎" * int(buckets[idx] / scale),
idx_2nd + 1, buckets[idx_2nd],
"∎" * int(buckets[idx_2nd] / scale)))
if high_buckets > 0:
print("{:>5} [{:8}]: {:22}".format(">" + str(nr_of_buckets),
high_buckets,
"∎" * int(high_buckets / scale)))
#
# show_histogram()
#
def show_histogram(data_set, description=None, options=None,
minimum=None, maximum=None, buckets=None, custbuckets=None):
if description is not None:
print("\n=> {}:".format(description))
if options is not None:
if buckets is None:
buckets = options.histogram_buckets
if options is not None and options.sets:
print(data_set)
if len(data_set) == 0:
print("# NumSamples = 0")
elif len(data_set) == 1:
print("# NumSamples = 1; Min = {0:.4f}; Max = {0:.4f}".
format(data_set[0]))
elif len(set(data_set)) == 1 and maximum is None and minimum is None and \
custbuckets is None:
histogram(data_set, buckets=buckets, minimum=list(set(data_set))[0],
maximum=list(set(data_set))[0] + 1)
else:
histogram(data_set, buckets=buckets,
minimum=minimum, maximum=maximum, custbuckets=custbuckets)
#
# buffer_size_type()
#
def buffer_size_type(astr, min=64, max=2048):
value = int(astr)
if min <= value <= max:
return value
else:
raise argparse.ArgumentTypeError(
'value not in range {}-{}'.format(min, max))
#
# next_power_of_two()
#
def next_power_of_two(val):
np = 1
while np < val:
np *= 2
return np
#
# main()
#
def main():
#
# Don't like these globals, but ctx passing does not seem to work with the
# existing open_ring_buffer() API :(
#
global b
global options
global trace_data
global events_received
global event_count
global export_file
#
# Argument parsing
#
parser = argparse.ArgumentParser()
parser.add_argument("-b", "--histogram-buckets",
help="Number of buckets per histogram, default 20",
type=int, default=20, metavar="BUCKETS")
parser.add_argument("--buffer-page-count",
help="Number of BPF ring buffer pages, default 1024",
type=int, default=1024, metavar="NUMBER")
parser.add_argument("-D", "--debug",
help="Enable eBPF debugging",
type=lambda x: int(x, 0), const=0x3f, default=0,
nargs='?')
parser.add_argument("-f", "--flow-key-size",
help="Set maximum flow key size to capture, "
"default 64", type=buffer_size_type, default=64,
metavar="[64-2048]")
parser.add_argument("--handler-filter",
help="Post processing handler thread filter",
type=str, default=None, metavar="HANDLERS")
parser.add_argument("-P", "--packet-size",
help="Set maximum packet size to capture, "
"default 256", type=buffer_size_type, default=256,
metavar="[64-2048]")
parser.add_argument("-p", "--pid", metavar="VSWITCHD_PID",
help="ovs-vswitch's PID",
type=int, default=None)
parser.add_argument("-q", "--quiet", action="store_true",
help="Do not show individual events")
parser.add_argument("-r", "--read-events",
help="Read events from FILE instead of installing "
"tracepoints", type=str, default=None, metavar="FILE")
parser.add_argument("--sets", action="store_true",
help="Dump content of data sets")
parser.add_argument("-s", "--stop",
help="Stop after receiving EVENTS number of trace "
"events",
type=int, default=0, metavar="EVENTS")
parser.add_argument("--unit-test", action="store_true",
help=argparse.SUPPRESS)
parser.add_argument("-w", "--write-events",
help="Write events to FILE",
type=str, default=None, metavar="FILE")
options = parser.parse_args()
if options.unit_test:
unit_test()
sys.exit(0)
#
# Find the PID of the ovs-vswitchd daemon if not specified.
#
if options.pid is None and options.read_events is None:
for proc in psutil.process_iter():
if 'ovs-vswitchd' in proc.name():
if options.pid is not None:
print("ERROR: Multiple ovs-vswitchd daemons running, "
"use the -p option!")
sys.exit(-1)
options.pid = proc.pid
#
# Error checking on input parameters.
#
if options.pid is None and options.read_events is None:
print("ERROR: Failed to find ovs-vswitchd's PID!")
sys.exit(-1)
if options.read_events is not None and options.write_events is not None:
print("ERROR: Either supply the read or write events option, "
"not both!")
sys.exit(-1)
if options.handler_filter is not None and options.read_events is None:
print("ERROR: The --handler-filter option is only valid with the "
"--read-events option!")
sys.exit(-1)
options.buffer_page_count = next_power_of_two(options.buffer_page_count)
#
# Open write handle if needed.
#
if options.write_events is not None:
try:
export_file = open(options.write_events, "w")
except (FileNotFoundError, IOError, PermissionError) as e:
print("ERROR: Can't create export file \"{}\": {}".format(
options.write_events, e.strerror))
sys.exit(-1)
else:
export_file = None
trace_data = []
event_count = {'total': {}, 'valid': {}, 'miss': {}}
if options.read_events is None:
#
# Call get_dp_mapping() to prepare the cache
#
dp_port_map = get_dp_mapping("ovs-system", "eth0", return_map=True)
if export_file is not None:
export_file.write("dp_port_map = {}\n".format(dp_port_map))
#
# Attach the usdt probe
#
u = USDT(pid=int(options.pid))
try:
u.enable_probe(probe="recv_upcall", fn_name="trace__recv_upcall")
u.enable_probe(probe="op_flow_put", fn_name="trace__op_flow_put")
u.enable_probe(probe="op_flow_execute",
fn_name="trace__op_flow_execute")
except USDTException as e:
print("ERROR: {}"
"ovs-vswitchd!".format(
(re.sub('^', ' ' * 7, str(e),
flags=re.MULTILINE)).strip().
replace("--with-dtrace or --enable-dtrace",
"--enable-usdt-probes")))
sys.exit(-1)
#
# Uncomment to see how arguments are decoded.
# print(u.get_text())
#
print("- Compiling eBPF programs...")
#
# Attach probes to the running process
#
source = ebpf_source.replace("<MAX_PACKET_VAL>",
str(options.packet_size))
source = source.replace("<MAX_KEY_VAL>", str(options.flow_key_size))
source = source.replace("<BUFFER_PAGE_CNT>",
str(options.buffer_page_count))
b = BPF(text=source, usdt_contexts=[u], debug=options.debug & 0xffffff)
#
# Dump out all events
#
print("- Capturing events [Press ^C to stop]...")
events_received = 0
if not options.quiet:
print("\n" + Event.get_event_header_str())
b['events'].open_ring_buffer(receive_event_bcc)
while 1:
try:
b.ring_buffer_poll()
if options.stop != 0 and events_received >= options.stop:
break
time.sleep(0.5)
except KeyboardInterrupt:
break
dropcnt = b.get_table("dropcnt")
export_misses = {}
for k in dropcnt.keys():
event = EventType.from_trace(k.value)
count = dropcnt.sum(k).value
if count > 0:
if event not in event_count['total']:
event_count['total'][event] = 0
event_count['valid'][event] = 0
event_count['miss'][event] = count
export_misses[k.value] = count
if options.write_events is not None:
if sum(event_count['miss'].values()) > 0:
export_file.write("event_miss = {}\n".format(export_misses))
export_file.close()
print()
else:
#
# Here we are requested to read event from an event export
#
thread_filter = None
if options.handler_filter is not None:
thread_filter = options.handler_filter.split(',')
try:
dp_port_mapping_valid = False
with open(options.read_events, 'r') as fd:
events_received = 0
if options.quiet:
spinner = Halo(spinner="dots", color="cyan",
text="Reading events from \"{}\"...".format(
options.read_events))
spinner.start()
else:
print("- Reading events from \"{}\"...".format(
options.read_events))
if not options.quiet:
print("\n" + Event.get_event_header_str())
for entry in fd:
if options.stop != 0 and events_received >= options.stop:
break
entry.rstrip()
if entry.startswith('dp_port_map = {'):
if not dp_port_mapping_valid:
dp_port_mapping_valid = True
get_dp_mapping("", "",
dp_map=ast.literal_eval(entry[14:]))
elif (entry.startswith('event = {') and
dp_port_mapping_valid):
event = ast.literal_eval(entry[8:])
event = namedtuple("EventObject",
event.keys())(*event.values())
if thread_filter is not None \
and EventType.from_trace(event.event) != \
EventType.DP_UPCALL \
and event.comm.decode("utf-8") not in thread_filter:
# Skip none filtered threads
continue
if len(event.pkt) > 0:
options.packet_size = len(event.pkt)
if len(event.key) > 0:
options.flow_key_size = len(event.key)
receive_event(event)
events_received += 1
elif entry.startswith('event_miss = {'):
misses = ast.literal_eval(entry[13:])
for e, count in misses.items():
event = EventType.from_trace(e)
if count > 0:
if event not in event_count['total']:
event_count['total'][event] = 0
event_count['valid'][event] = 0
event_count['miss'][event] = count
if options.quiet:
spinner.stop()
print("- Reading events from \"{}\"...".format(
options.read_events))
except (FileNotFoundError, PermissionError):
print("ERROR: Can't open file \"{}\" for reading!".format(
options.read_events))
sys.exit(-1)
#
# Start analyzing the data
#
print("- Analyzing results ({} events)...".format(len(trace_data)))
if events_received > 0:
if sum(event_count['miss'].values()) > 0:
print("\nWARNING: Not all events were captured!\n "
"Increase the BPF ring buffer size with the "
"--buffer-page-count option.")
print("\n=> Events received per type (usable/total) [missed events]:")
for event, total in sorted(event_count['total'].items()):
miss = event_count['miss'][event] if event in event_count['miss'] \
else 0
print(" {:36}: {:10}/{:10} [{:10}]".format(
event, event_count['valid'][event], total, miss))
collection, batch_stats, thread_stats = collect_event_sets(
trace_data, collect_stats=True, spinner=True)
if len(collection) <= 0:
print("No upcall data sets where found!!")
sys.exit(0)
print("\n- Analyzing {} event sets...".format(len(collection)))
if options.debug & 0x1000000 != 0:
for upcall in collection:
print("DBG: {}{}{}{}{}".format(
"U" if EventType.DP_UPCALL in upcall else "-",
"u" if EventType.RECV_UPCALL in upcall else "-",
"p" if EventType.OP_FLOW_PUT in upcall else "-",
"e" if EventType.OP_FLOW_EXECUTE in upcall else "-",
"E" if EventType.OVS_PKT_EXEC in upcall else "-"))
if options.debug & 0x2000000 != 0:
try:
print("DBG: - {}".format(upcall[EventType.DP_UPCALL]))
print("DBG: - {}".format(upcall[EventType.RECV_UPCALL]))
print("DBG: - {}".format(upcall[EventType.OP_FLOW_PUT]))
print("DBG: - {}".format(
upcall[EventType.OP_FLOW_EXECUTE]))
print("DBG: - {}".format(upcall[EventType.OVS_PKT_EXEC]))
except LookupError:
continue
show_key_value(thread_stats, description="Upcalls handled per thread")
show_batch_histogram(batch_stats,
description="Histogram of upcalls per batch")
kernel_to_vswitchd = []
kernel_to_kernel_exec = []
vswitchd_to_kernel = []
time_minus_lookup = []
for upcall in collection:
kernel_to_vswitchd.append((upcall[EventType.RECV_UPCALL].ts -
upcall[EventType.DP_UPCALL].ts) /
1000)
if EventType.OP_FLOW_PUT in upcall and \
EventType.OVS_PKT_EXEC in upcall:
time_minus_lookup.append(
((upcall[EventType.OVS_PKT_EXEC].ts -
upcall[EventType.DP_UPCALL].ts) -
(upcall[EventType.OP_FLOW_PUT].ts -
upcall[EventType.RECV_UPCALL].ts)) / 1000)
if EventType.OP_FLOW_EXECUTE in upcall and \
EventType.OVS_PKT_EXEC in upcall:
vswitchd_to_kernel.append((upcall[EventType.OVS_PKT_EXEC].ts
- upcall[EventType.OP_FLOW_EXECUTE].ts)
/ 1000)
if EventType.OVS_PKT_EXEC in upcall:
kernel_to_kernel_exec.append((upcall[EventType.OVS_PKT_EXEC].ts -
upcall[EventType.DP_UPCALL].ts) /
1000)
show_histogram(kernel_to_vswitchd,
description="Kernel upcall action to vswitchd receive "
"(microseconds)",
options=options)
show_histogram(vswitchd_to_kernel,
description="vswitchd execute to kernel receive "
"(microseconds)",
options=options)
show_histogram(time_minus_lookup,
description="Upcall overhead (total time minus lookup) "
"(microseconds)",
options=options)
show_histogram(kernel_to_kernel_exec,
description="Kernel upcall to kernel packet execute "
"(microseconds)",
options=options)
#
# Start main() as the default entry point...
#
if __name__ == '__main__':
main()
OHA YOOOO