[IO multiplexing] How EPOLL implements ET (edge trigger)

LT and ET

  • edge-triggered
    • What is edge triggering
      • How to implement edge triggering
        • Summary & Notes
      • How to use recv & send for edge triggering
        • recv
          • return value of recv
        • send
  • level-trggered
  • horizontal trigger code

When we are doing epoll network programming, we can choose LT (horizontal trigger) or ET (edge trigger).
By default, epoll triggers horizontally, and horizontal triggering does not require any special settings. So here we mainly study how to do edge triggering.

Edge-triggered

What is edge triggering

There are many explanations of the definition on the Internet.
My understanding is that when there is an event, such as when it is readable, epoll_wait will be triggered once. Even if there is still data in the buffer after reading once, it will not be triggered again.

question:

  1. How to implement edge triggering
  2. How to use recv & send for edge triggering

How to implement edge triggering

The initialization function is no different from the LT mode.

//Description: Ubuntu 16.04.6 LTS
//Release: 16.04
#include <stdio.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <netinet/in.h>
#include <errno.h>
#include <arpa/inet.h>
#include <string.h>
#include <fcntl.h> //fctnl()
#defineBUFFER_LENGTH 128
#define ARRAY_LENGTH 1024
typedef struct connections
{
    int fd;
    char rbuffer[BUFFER_LENGTH];
    int rbuff_index;
    //char wbuffer[128];
} connections_t;

int InitServer(int* listenfd, int* epfd)
{
    *listenfd = socket(AF_INET,SOCK_STREAM,0);
    if (-1 == *listenfd) {
        perror("socket");
        return -1;
    }
    struct sockaddr_in svraddr;
    svraddr.sin_family = AF_INET;
    svraddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svraddr.sin_port = htons(2048);
    socklen_t len = sizeof(svraddr);
    if(-1 == bind(*listenfd,(struct sockaddr*) & amp;svraddr,len)) {
        perror("bind");
        return -1;
    }
    if(-1 == listen(*listenfd,10)) {
        perror("listen");
        return -1;
    }
    *epfd = epoll_create(1);
    if(-1 == *epfd) {
        perror("epoll_create");
        return -1;
    }
    return 0;
}
int InitConnListItem(int fd, connections_t *connlist)
{
    if(fd < ARRAY_LENGTH) {
        connlist[fd].fd = fd;
        connlist[fd].rbuff_index = 0;
        memset(connlist[fd].rbuffer, 0x00 ,ARRAY_LENGTH);
    } else {
        printf("error:fd out of range.");
        return -1;
    }
    return 0;
}

Use fcntl to set fd as a non-blocking function

int SetNonBlockFD(int fd)
{
    int oldflag = fcntl(fd,F_GETFL);
    int newflag = fcntl(fd,F_SETFL, oldflag | O_NONBLOCK);
    if(newflag == -1)
        return -1;
    return oldflag;
}

main function
Only recv is written here. Use the number of recv calls to see whether ET is implemented.

int main()
{
    int listenfd, epfd;
    if(InitServer( & amp;listenfd, & amp;epfd) < 0)
        return -1;

    connections_t connlist[ARRAY_LENGTH] = { 0x00 };
    struct epoll_event events[ARRAY_LENGTH] = { 0x00 };

    struct epoll_event ev;
    ev.data.fd = listenfd;
    ev.events = EPOLLIN;
    ev.events |= EPOLLET;
    epoll_ctl(epfd, EPOLL_CTL_ADD, listenfd, & amp;ev);

    int oldflag = SetNonBlockFD(listenfd);
    if(oldflag < 0)
        return -1;

    if(InitConnListItem(listenfd,connlist) < 0)
        return -1;
    
    while(1) {
       int nready = epoll_wait(epfd,events,ARRAY_LENGTH,-1);
       if(nready > 0)
       {
        int i = 0;
         for(i = 0; i < nready; i + + ) {
            int eventfd = events[i].data.fd;
            if(events[i].events & amp; EPOLLIN) {
                if(eventfd == listenfd) {
                    struct sockaddr acceptaddr;
                    socklen_t len = sizeof(acceptaddr);
                    int acceptfd = accept(listenfd, & amp;acceptaddr, & amp;len);
                    if(accept < 0) {
                        perror("accept.");
                        continue;
                    }
                    printf("accept fd:%d\\
",acceptfd);
                    int oldflag = SetNonBlockFD(acceptfd);
                    if (oldflag < 0)
                        return -1;

                    InitConnListItem(acceptfd,connlist);
                    
                     ev.data.fd = acceptfd;
                     ev.events = EPOLLIN;
                     ev.events |= EPOLLET;
                     epoll_ctl(epfd, EPOLL_CTL_ADD, acceptfd, & amp;ev);
                }
                else {
                    #define READ_LENGTH 1
                    int count = recv(eventfd, connlist[eventfd].rbuffer, READ_LENGTH, 0);
                    printf("recv, fd:%d,count:%d,msg:%s\\
",eventfd, count, connlist[eventfd].rbuffer);
                }
            }
            if(events[i].events & amp; EPOLLOUT) {
                printf("EPOLLOUT\\
");
            }
         }
       }
    }

    return 0;
}

test:
Use net assistant to simulate the client and send a 36Byte long string to the server.
According to the restrictions at recv in mainloop, recv will only read 1 byte at a time.
If ET is implemented, recv will only be called once within a certain period of time to read one byte.
If LT is still implemented, recv will be called 36 times continuously until the buffer is read.

As you can see, recv was only executed once and a “0” was read, so ET was implemented.

Summary &Note

Summary when implementing ET code:

  1. Comment out listenfd’s ev.events |= EPOLLET; and int oldflag = SetNonBlockFD(listenfd);,
    It is still possible to achieve edge triggering of recv, so the setting of listenfd is not a necessary condition for the edge triggering of acceptfd.
  2. Add the following code to mainloop:
 for(i = 0; i < nready; i + + ) {
            int eventfd = events[i].data.fd;
            if(events[i].events & amp; EPOLLET) //New
                printf("fd:%d is EPOLLET\\
",eventfd); //New
            else //add new
                printf("fd:%d is LT\\
",eventfd); //New

Although the ev.events of listenfd and acceptfd are both set to |=EPOLLET, the output of the above newly added code is always “is LT”.
Therefore, it is impossible to use if(events[i].events & amp; EPOLLET) to determine whether fd is an ET.

↑Step 1 has been executed at this time. LT can understand fd 3 (listenfd), but cannot understand fd 5 (acceptfd) or LT.
I didn’t find this reason. Record it here first and see if there is an opportunity to investigate later.
3. if(events[i].events & amp; EPOLLIN) { } part, InitConnListItem(acceptfd, connlist);, epoll_ctl(epfd, EPOLL_CTL_ADD , acceptfd, & amp;ev);, int oldflag = SetNonBlockFD(acceptfd); The execution order has no impact on the implementation of ET. Previously, I changed ET based on a LT code, but it was unsuccessful. I thought it was an issue with the order of these three functions. After rewriting the code, I successfully implemented ET. I changed the order of these three and found that there was no impact on ET.
4. Comment out acceptld’s ev.events |= EPOLLET;, and recv will return to horizontal triggering (it is still non-blocking at this time).

How to use recv & amp; send for edge triggering

recv

When using edge triggering, it is accompanied by non-blocking of fd. All data in the buffer needs to be read at once.
Need to be read in a non-blocking recv loop:

 while (1)
                        {
                            int count = recv(eventfd, connlist[eventfd].rbuffer, READ_LENGTH, 0);
                            if (count <= 0)
                            {
                                if(count == 0) {
                                    perror("et recv return 0:");
                                    epoll_ctl(epfd,EPOLL_CTL_DEL,eventfd,NULL);
                                    close(eventfd);
                                    break;
                                }
                                if(count == -1) {
                                    perror("et recv return -1:");
                                    if(errno == EAGAIN) {
                                        printf("errno is EAGAIN,read end.\\
");
                                    }
                                    else if(errno == EINTR) {
                                        printf("errno is EINTR,read end.\\
");
                                    }
                                    else {
                                        printf("errno is not EAGAIN,read error.\\
");
                                        epoll_ctl(epfd,EPOLL_CTL_DEL,eventfd,NULL);
                                        close(eventfd);
                                    }
                                    break;
                                }
                            }
                            printf("recv, fd:%d,count:%d,msg:%s,time:%s\\
", eventfd, count, connlist[eventfd].rbuffer, __TIME__);
                        }
recv return value
  • In non-blocking conditions:
    recv return -1 and errno == EINTR || EAGAIN (EWOULDBLOCK): Read completed. (perror: Resource temporarily unavailable)
    recv return -1 and errno != EINTR || EAGAIN (EWOULDBLOCK): An error occurred.
    recv return 0: The client has disconnected.
  • In case of blocking:
    Except for the EAGAIN errno, everything else is the same.
    Reference articles for return values: Introduction to the return values of recv and send in Linux socket programming and their meanings.
send

In the above code, an EPOLLOUT response code is left:

 if(events[i].events & amp; EPOLLOUT) {
                printf("EPOLLOUT\\
");
            }

According to recv’s experience, if it is LT mode, EPOLLOUT will always be triggered when it is writable, and EPOLLOUT will only be triggered once when ET.
Set accepted events to EPOLLOUT:

ev.data.fd = acceptfd;
events = EPOLLOUT;

Test the execution of EPOLLOUT in LT mode:
Keep triggering EPOLLOUT.

Test the execution of EPOLLOUT in ET mode:
EPOLLOUT is only triggered once.

Level-trggered

epoll is LT by default.
When the trigger conditions are met, it will always trigger.
For example, if there is data in the read buffer and recv has not finished reading it at one time, then if there is still data left in the buffer, EPOLLIN will be triggered again, and recv can continue.

Write a piece of code to test it. The code is in [Horizontal Trigger Complete Code]. I will only talk about the results here.
Using Net Assistant as the client, the client sends a 26-character message to the server.
“0123456789abcdefghijklmnopqrstuvwxyz”
“epoll_wait” will be printed before each epoll_wait().
After each recv, the received length and the complete fd rbuffer length will be printed. Limit reading to 10 characters at a time. After recv, epoll_event will be rewritten into EPOLLOUT.
After each send, the length of the send will be printed. After send is completed, epoll_event will be rewritten to EPOLLIN.
The result is as follows:

In fact, I only sent a message once in net assistant.
You can see that recv() was automatically triggered 4 times and all the characters sent were read.
And in recv, I inserted the EPOLLOUT event. When recv and send switch back and forth, the characters in the buffer can still be read correctly.

LT must go through wpoll_wati to trigger recv again.

Horizontal trigger code

The error handling here is not good, and we need to specifically summarize the processing of return values later.
It’s okay not to read this code.

//Description: Ubuntu 16.04.6 LTS
//Release: 16.04
#include <stdio.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <netinet/in.h>
#include <errno.h>
#include <arpa/inet.h>
#include <string.h>

#defineBUFFER_LENGTH 128
typedef struct connections
{
    int fd;
    char rbuffer[BUFFER_LENGTH];
    int rbuff_index;
    //char wbuffer[128];
} connections_t;

int main()
{
    printf("epoll_event size=%ld\\
",sizeof(struct epoll_event));
    int listenfd = socket(AF_INET,SOCK_STREAM,0);
    if (-1 == listenfd) {
        perror("socket");
        return -1;
    }
    struct sockaddr_in svraddr;
    svraddr.sin_family = AF_INET;
    svraddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svraddr.sin_port = htons(2048);
    socklen_t len = sizeof(svraddr);
    if(-1 == bind(listenfd,(struct sockaddr*) & amp;svraddr,len)) {
        perror("bind");
        return -1;
    }
    if(-1 == listen(listenfd,10)) {
        perror("listen");
        return -1;
    }
    int epfd = epoll_create(1);
    if(-1 == epfd) {
        perror("epoll_create");
        return -1;
    }
    connections_t connlist[1024] = { 0x00 };

    struct epoll_event events[1024] = { 0x00 };
    struct epoll_event ev;
    ev.events = EPOLLIN;
    ev.data.fd = listenfd;
    epoll_ctl(epfd, EPOLL_CTL_ADD, listenfd, & amp;ev);
    connlist[listenfd].fd = listenfd;
    short iserror = 0;
    while(1) {
        printf("epoll_wait\\
");
        int nready = epoll_wait(epfd, events, 1024, -1);
        for(int i = 0; i < nready; + + i) {
            int eventfd = events[i].data.fd;
            if (events[i].events & amp; EPOLLIN)
            {
                
                if (eventfd == listenfd) {
                    struct sockaddr cliaddr;
                    socklen_t len = sizeof(cliaddr);
                    int acceptfd = accept(eventfd,(struct sockaddr*) & amp;cliaddr, & amp;len);
                    printf("accept,fd:%d\\
",acceptfd);
                    
                    ev.events = EPOLLIN;
                    ev.data.fd = acceptfd;
                    epoll_ctl(epfd,EPOLL_CTL_ADD,acceptfd, & amp;ev);

                    connlist[acceptfd].fd = acceptfd;
                    connlist[acceptfd].rbuff_index = 0;
                    memset(connlist[acceptfd].rbuffer,0x00,BUFFER_LENGTH);
                   
                }
                else {
                    char *buff = connlist[eventfd].rbuffer;
                    int *index = & amp;connlist[eventfd].rbuff_index;
                    int recv_length = 10;
                    int msg_count = recv(eventfd, buff + (*index), recv_length, 0);
                    if(msg_count == 0) {
                        perror("recv");
                        printf("disconnect %d\\
",eventfd);
                        epoll_ctl(epfd, EPOLL_CTL_DEL, eventfd, NULL);
                        close(eventfd);
                        continue;
                    }
                    if(msg_count == -1) {
                        perror("recv");
                         printf("recv error fd:%d\\
",eventfd);
                        iserror = 1;
                        break;
                    }
                    (*index) + = msg_count;
                    printf("recv,count:%d,rbuffer:%s\\
", msg_count, buff);

                    ev.events = EPOLLOUT;
                    ev.data.fd = eventfd;
                    epoll_ctl(epfd,EPOLL_CTL_MOD,eventfd, & amp;ev);
                }
            }
            if(events[i].events & amp; EPOLLOUT) {
                char *send_msg = connlist[eventfd].rbuffer;
                int send_msg_length = connlist[eventfd].rbuff_index;
                usleep(100000);
                int msg_count = send(eventfd,send_msg, send_msg_length, 0);
                printf("send,count:%d,msg=%s\\
", msg_count, send_msg);

                ev.events = EPOLLIN;
                ev.data.fd = eventfd;
                epoll_ctl(epfd, EPOLL_CTL_MOD, eventfd, & amp;ev);
            }
        }
        if(iserror == 1)
             break;
    }
    close(epfd);
    close(listenfd);
    return 0;
}