Skip to content

Commit

Permalink
Kernel: Plumb packet receive timestamp from NetworkAdapter to Socket:…
Browse files Browse the repository at this point in the history
…:recvfrom

Since the receiving socket isn't yet known at packet receive time,
keep timestamps for all packets.

This is useful for keeping statistics about in-kernel queue latencies
in the future, and it can be used to implement SO_TIMESTAMP.
  • Loading branch information
nico authored and awesomekling committed Sep 17, 2020
1 parent b36a2d6 commit 416d470
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 37 deletions.
12 changes: 7 additions & 5 deletions Kernel/Net/IPv4Socket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ KResultOr<size_t> IPv4Socket::receive_byte_buffered(FileDescription& description
return nreceived;
}

KResultOr<size_t> IPv4Socket::receive_packet_buffered(FileDescription& description, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*> addr, Userspace<socklen_t*> addr_length)
KResultOr<size_t> IPv4Socket::receive_packet_buffered(FileDescription& description, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*> addr, Userspace<socklen_t*> addr_length, timeval& packet_timestamp)
{
Locker locker(lock());
ReceivedPacket packet;
Expand Down Expand Up @@ -330,6 +330,8 @@ KResultOr<size_t> IPv4Socket::receive_packet_buffered(FileDescription& descripti
ASSERT(packet.data.has_value());
auto& ipv4_packet = *(const IPv4Packet*)(packet.data.value().data());

packet_timestamp = packet.timestamp;

if (addr) {
#ifdef IPV4_SOCKET_DEBUG
dbg() << "Incoming packet is from: " << packet.peer_address << ":" << packet.peer_port;
Expand Down Expand Up @@ -359,7 +361,7 @@ KResultOr<size_t> IPv4Socket::receive_packet_buffered(FileDescription& descripti
return protocol_receive(packet.data.value(), buffer, buffer_length, flags);
}

KResultOr<size_t> IPv4Socket::recvfrom(FileDescription& description, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*> user_addr, Userspace<socklen_t*> user_addr_length)
KResultOr<size_t> IPv4Socket::recvfrom(FileDescription& description, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*> user_addr, Userspace<socklen_t*> user_addr_length, timeval& packet_timestamp)
{
if (user_addr_length) {
socklen_t addr_length;
Expand All @@ -377,14 +379,14 @@ KResultOr<size_t> IPv4Socket::recvfrom(FileDescription& description, UserOrKerne
if (buffer_mode() == BufferMode::Bytes)
nreceived = receive_byte_buffered(description, buffer, buffer_length, flags, user_addr, user_addr_length);
else
nreceived = receive_packet_buffered(description, buffer, buffer_length, flags, user_addr, user_addr_length);
nreceived = receive_packet_buffered(description, buffer, buffer_length, flags, user_addr, user_addr_length, packet_timestamp);

if (!nreceived.is_error())
Thread::current()->did_ipv4_socket_read(nreceived.value());
return nreceived;
}

bool IPv4Socket::did_receive(const IPv4Address& source_address, u16 source_port, KBuffer&& packet)
bool IPv4Socket::did_receive(const IPv4Address& source_address, u16 source_port, KBuffer&& packet, const timeval& packet_timestamp)
{
LOCKER(lock());

Expand Down Expand Up @@ -413,7 +415,7 @@ bool IPv4Socket::did_receive(const IPv4Address& source_address, u16 source_port,
dbg() << "IPv4Socket(" << this << "): did_receive refusing packet since queue is full.";
return false;
}
m_receive_queue.append({ source_address, source_port, move(packet) });
m_receive_queue.append({ source_address, source_port, packet_timestamp, move(packet) });
m_can_read = true;
}
m_bytes_received += packet_size;
Expand Down
7 changes: 4 additions & 3 deletions Kernel/Net/IPv4Socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ class IPv4Socket : public Socket {
virtual bool can_read(const FileDescription&, size_t) const override;
virtual bool can_write(const FileDescription&, size_t) const override;
virtual KResultOr<size_t> sendto(FileDescription&, const UserOrKernelBuffer&, size_t, int, Userspace<const sockaddr*>, socklen_t) override;
virtual KResultOr<size_t> recvfrom(FileDescription&, UserOrKernelBuffer&, size_t, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>) override;
virtual KResultOr<size_t> recvfrom(FileDescription&, UserOrKernelBuffer&, size_t, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>, timeval&) override;
virtual KResult setsockopt(int level, int option, Userspace<const void*>, socklen_t) override;
virtual KResult getsockopt(FileDescription&, int level, int option, Userspace<void*>, Userspace<socklen_t*>) override;

virtual int ioctl(FileDescription&, unsigned request, FlatPtr arg) override;

bool did_receive(const IPv4Address& peer_address, u16 peer_port, KBuffer&&);
bool did_receive(const IPv4Address& peer_address, u16 peer_port, KBuffer&&, const timeval&);

const IPv4Address& local_address() const { return m_local_address; }
u16 local_port() const { return m_local_port; }
Expand Down Expand Up @@ -111,14 +111,15 @@ class IPv4Socket : public Socket {
virtual bool is_ipv4() const override { return true; }

KResultOr<size_t> receive_byte_buffered(FileDescription&, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>);
KResultOr<size_t> receive_packet_buffered(FileDescription&, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>);
KResultOr<size_t> receive_packet_buffered(FileDescription&, UserOrKernelBuffer& buffer, size_t buffer_length, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>, timeval&);

IPv4Address m_local_address;
IPv4Address m_peer_address;

struct ReceivedPacket {
IPv4Address peer_address;
u16 peer_port;
timeval timestamp;
Optional<KBuffer> data;
};

Expand Down
2 changes: 1 addition & 1 deletion Kernel/Net/LocalSocket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ DoubleBuffer& LocalSocket::send_buffer_for(FileDescription& description)
ASSERT_NOT_REACHED();
}

KResultOr<size_t> LocalSocket::recvfrom(FileDescription& description, UserOrKernelBuffer& buffer, size_t buffer_size, int, Userspace<sockaddr*>, Userspace<socklen_t*>)
KResultOr<size_t> LocalSocket::recvfrom(FileDescription& description, UserOrKernelBuffer& buffer, size_t buffer_size, int, Userspace<sockaddr*>, Userspace<socklen_t*>, timeval&)
{
auto& buffer_for_me = receive_buffer_for(description);
if (!description.is_blocking()) {
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Net/LocalSocket.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class LocalSocket final : public Socket
virtual bool can_read(const FileDescription&, size_t) const override;
virtual bool can_write(const FileDescription&, size_t) const override;
virtual KResultOr<size_t> sendto(FileDescription&, const UserOrKernelBuffer&, size_t, int, Userspace<const sockaddr*>, socklen_t) override;
virtual KResultOr<size_t> recvfrom(FileDescription&, UserOrKernelBuffer&, size_t, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>) override;
virtual KResultOr<size_t> recvfrom(FileDescription&, UserOrKernelBuffer&, size_t, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>, timeval&) override;
virtual KResult getsockopt(FileDescription&, int level, int option, Userspace<void*>, Userspace<socklen_t*>) override;
virtual KResult chown(FileDescription&, uid_t, gid_t) override;
virtual KResult chmod(FileDescription&, mode_t) override;
Expand Down
9 changes: 6 additions & 3 deletions Kernel/Net/NetworkAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <Kernel/Net/EthernetFrameHeader.h>
#include <Kernel/Net/LoopbackAdapter.h>
#include <Kernel/Net/NetworkAdapter.h>
#include <Kernel/Process.h>
#include <Kernel/Random.h>
#include <Kernel/StdLib.h>

Expand Down Expand Up @@ -192,18 +193,20 @@ void NetworkAdapter::did_receive(ReadonlyBytes payload)
}
}

m_packet_queue.append(buffer.value());
m_packet_queue.append({ buffer.value(), kgettimeofday() });

if (on_receive)
on_receive();
}

size_t NetworkAdapter::dequeue_packet(u8* buffer, size_t buffer_size)
size_t NetworkAdapter::dequeue_packet(u8* buffer, size_t buffer_size, timeval& packet_timestamp)
{
InterruptDisabler disabler;
if (m_packet_queue.is_empty())
return 0;
auto packet = m_packet_queue.take_first();
auto packet_with_timestamp = m_packet_queue.take_first();
packet_timestamp = packet_with_timestamp.timestamp;
auto packet = move(packet_with_timestamp.packet);
size_t packet_size = packet.size();
ASSERT(packet_size <= buffer_size);
memcpy(buffer, packet.data(), packet_size);
Expand Down
10 changes: 8 additions & 2 deletions Kernel/Net/NetworkAdapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class NetworkAdapter : public RefCounted<NetworkAdapter> {
int send_ipv4(const MACAddress&, const IPv4Address&, IPv4Protocol, const UserOrKernelBuffer& payload, size_t payload_size, u8 ttl);
int send_ipv4_fragmented(const MACAddress&, const IPv4Address&, IPv4Protocol, const UserOrKernelBuffer& payload, size_t payload_size, u8 ttl);

size_t dequeue_packet(u8* buffer, size_t buffer_size);
size_t dequeue_packet(u8* buffer, size_t buffer_size, timeval& packet_timestamp);

bool has_queued_packets() const { return !m_packet_queue.is_empty(); }

Expand All @@ -93,7 +93,13 @@ class NetworkAdapter : public RefCounted<NetworkAdapter> {
IPv4Address m_ipv4_address;
IPv4Address m_ipv4_netmask;
IPv4Address m_ipv4_gateway;
SinglyLinkedList<KBuffer> m_packet_queue;

struct PacketWithTimestamp {
KBuffer packet;
timeval timestamp;
};

SinglyLinkedList<PacketWithTimestamp> m_packet_queue;
SinglyLinkedList<KBuffer> m_unused_packet_buffers;
size_t m_unused_packet_buffers_count { 0 };
String m_name;
Expand Down
39 changes: 20 additions & 19 deletions Kernel/Net/NetworkTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@
namespace Kernel {

static void handle_arp(const EthernetFrameHeader&, size_t frame_size);
static void handle_ipv4(const EthernetFrameHeader&, size_t frame_size);
static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&);
static void handle_udp(const IPv4Packet&);
static void handle_tcp(const IPv4Packet&);
static void handle_ipv4(const EthernetFrameHeader&, size_t frame_size, const timeval& packet_timestamp);
static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&, const timeval& packet_timestamp);
static void handle_udp(const IPv4Packet&, const timeval& packet_timestamp);
static void handle_tcp(const IPv4Packet&, const timeval& packet_timestamp);

[[noreturn]] static void NetworkTask_main();

Expand Down Expand Up @@ -89,14 +89,14 @@ void NetworkTask_main()
};
});

auto dequeue_packet = [&pending_packets](u8* buffer, size_t buffer_size) -> size_t {
auto dequeue_packet = [&pending_packets](u8* buffer, size_t buffer_size, timeval& packet_timestamp) -> size_t {
if (pending_packets == 0)
return 0;
size_t packet_size = 0;
NetworkAdapter::for_each([&](auto& adapter) {
if (packet_size || !adapter.has_queued_packets())
return;
packet_size = adapter.dequeue_packet(buffer, buffer_size);
packet_size = adapter.dequeue_packet(buffer, buffer_size, packet_timestamp);
pending_packets--;
#ifdef NETWORK_TASK_DEBUG
klog() << "NetworkTask: Dequeued packet from " << adapter.name().characters() << " (" << packet_size << " bytes)";
Expand All @@ -108,10 +108,11 @@ void NetworkTask_main()
size_t buffer_size = 64 * KiB;
auto buffer_region = MM.allocate_kernel_region(buffer_size, "Kernel Packet Buffer", Region::Access::Read | Region::Access::Write, false, true);
auto buffer = (u8*)buffer_region->vaddr().get();
timeval packet_timestamp;

klog() << "NetworkTask: Enter main loop.";
for (;;) {
size_t packet_size = dequeue_packet(buffer, buffer_size);
size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
if (!packet_size) {
Thread::current()->wait_on(packet_wait_queue, "NetworkTask");
continue;
Expand Down Expand Up @@ -150,7 +151,7 @@ void NetworkTask_main()
handle_arp(eth, packet_size);
break;
case EtherType::IPv4:
handle_ipv4(eth, packet_size);
handle_ipv4(eth, packet_size, packet_timestamp);
break;
case EtherType::IPv6:
// ignore
Expand Down Expand Up @@ -213,7 +214,7 @@ void handle_arp(const EthernetFrameHeader& eth, size_t frame_size)
}
}

void handle_ipv4(const EthernetFrameHeader& eth, size_t frame_size)
void handle_ipv4(const EthernetFrameHeader& eth, size_t frame_size, const timeval& packet_timestamp)
{
constexpr size_t minimum_ipv4_frame_size = sizeof(EthernetFrameHeader) + sizeof(IPv4Packet);
if (frame_size < minimum_ipv4_frame_size) {
Expand All @@ -239,18 +240,18 @@ void handle_ipv4(const EthernetFrameHeader& eth, size_t frame_size)

switch ((IPv4Protocol)packet.protocol()) {
case IPv4Protocol::ICMP:
return handle_icmp(eth, packet);
return handle_icmp(eth, packet, packet_timestamp);
case IPv4Protocol::UDP:
return handle_udp(packet);
return handle_udp(packet, packet_timestamp);
case IPv4Protocol::TCP:
return handle_tcp(packet);
return handle_tcp(packet, packet_timestamp);
default:
klog() << "handle_ipv4: Unhandled protocol " << packet.protocol();
break;
}
}

void handle_icmp(const EthernetFrameHeader& eth, const IPv4Packet& ipv4_packet)
void handle_icmp(const EthernetFrameHeader& eth, const IPv4Packet& ipv4_packet, const timeval& packet_timestamp)
{
auto& icmp_header = *static_cast<const ICMPHeader*>(ipv4_packet.payload());
#ifdef ICMP_DEBUG
Expand All @@ -263,7 +264,7 @@ void handle_icmp(const EthernetFrameHeader& eth, const IPv4Packet& ipv4_packet)
LOCKER(socket->lock());
if (socket->protocol() != (unsigned)IPv4Protocol::ICMP)
continue;
socket->did_receive(ipv4_packet.source(), 0, KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()));
socket->did_receive(ipv4_packet.source(), 0, KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()), packet_timestamp);
}
}

Expand All @@ -290,7 +291,7 @@ void handle_icmp(const EthernetFrameHeader& eth, const IPv4Packet& ipv4_packet)
}
}

void handle_udp(const IPv4Packet& ipv4_packet)
void handle_udp(const IPv4Packet& ipv4_packet, const timeval& packet_timestamp)
{
if (ipv4_packet.payload_size() < sizeof(UDPPacket)) {
klog() << "handle_udp: Packet too small (" << ipv4_packet.payload_size() << ", need " << sizeof(UDPPacket) << ")";
Expand All @@ -316,10 +317,10 @@ void handle_udp(const IPv4Packet& ipv4_packet)

ASSERT(socket->type() == SOCK_DGRAM);
ASSERT(socket->local_port() == udp_packet.destination_port());
socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()));
socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()), packet_timestamp);
}

void handle_tcp(const IPv4Packet& ipv4_packet)
void handle_tcp(const IPv4Packet& ipv4_packet, const timeval& packet_timestamp)
{
if (ipv4_packet.payload_size() < sizeof(TCPPacket)) {
klog() << "handle_tcp: IPv4 payload is too small to be a TCP packet (" << ipv4_packet.payload_size() << ", need " << sizeof(TCPPacket) << ")";
Expand Down Expand Up @@ -549,7 +550,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet)
case TCPSocket::State::Established:
if (tcp_packet.has_fin()) {
if (payload_size != 0)
socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()));
socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()), packet_timestamp);

socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
(void)socket->send_tcp_packet(TCPFlags::ACK);
Expand All @@ -565,7 +566,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet)
#endif

if (payload_size) {
if (socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size())))
if (socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), KBuffer::copy(&ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size()), packet_timestamp))
(void)socket->send_tcp_packet(TCPFlags::ACK);
}
}
Expand Down
3 changes: 2 additions & 1 deletion Kernel/Net/Socket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ KResultOr<size_t> Socket::read(FileDescription& description, size_t, UserOrKerne
{
if (is_shut_down_for_reading())
return 0;
return recvfrom(description, buffer, size, 0, {}, 0);
timeval tv;
return recvfrom(description, buffer, size, 0, {}, 0, tv);
}

KResultOr<size_t> Socket::write(FileDescription& description, size_t, const UserOrKernelBuffer& data, size_t size)
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Net/Socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class Socket : public File {
virtual void attach(FileDescription&) = 0;
virtual void detach(FileDescription&) = 0;
virtual KResultOr<size_t> sendto(FileDescription&, const UserOrKernelBuffer&, size_t, int flags, Userspace<const sockaddr*>, socklen_t) = 0;
virtual KResultOr<size_t> recvfrom(FileDescription&, UserOrKernelBuffer&, size_t, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>) = 0;
virtual KResultOr<size_t> recvfrom(FileDescription&, UserOrKernelBuffer&, size_t, int flags, Userspace<sockaddr*>, Userspace<socklen_t*>, timeval&) = 0;

virtual KResult setsockopt(int level, int option, Userspace<const void*>, socklen_t);
virtual KResult getsockopt(FileDescription&, int level, int option, Userspace<void*>, Userspace<socklen_t*>);
Expand Down
3 changes: 2 additions & 1 deletion Kernel/Syscalls/socket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,8 @@ ssize_t Process::sys$recvmsg(int sockfd, Userspace<struct msghdr*> user_msg, int
auto data_buffer = UserOrKernelBuffer::for_user_buffer((u8*)iovs[0].iov_base, iovs[0].iov_len);
if (!data_buffer.has_value())
return -EFAULT;
auto result = socket.recvfrom(*description, data_buffer.value(), iovs[0].iov_len, flags, user_addr, user_addr_length);
timeval timestamp = { 0, 0 };
auto result = socket.recvfrom(*description, data_buffer.value(), iovs[0].iov_len, flags, user_addr, user_addr_length, timestamp);
if (flags & MSG_DONTWAIT)
description->set_blocking(original_blocking);

Expand Down

0 comments on commit 416d470

Please sign in to comment.