我面临着一个非常奇怪的问题,我有一个运行在Linux上的简单的Unix流套接字服务器/客户端代码.客户端偶尔会向服务器发送消息(我也只测试了发送一次),但在收到第一条消息后,即使客户端没有发送任何消息,服务器也会继续打印相同的消息.

整个消息是重新创建的,没有静态数据等,这可能会导致相同的消息在多个调用中持续存在.

客户端代码:

int g_fd = -1;
#define SERVER_SOCK   "/tmp/server_sock"

int init_fd(void) {
    g_fd = socket(AF_UNIX, SOCK_STREAM, 0);
    if (g_fd < 0) {
        log_info("alloc", "socket() failed with error (%d:%s)", errno, strerror(errno));
        return -1;
    }

    struct sockaddr_un sa;
    memset(&sa, 0, sizeof(sa));
    sa.sun_family = AF_UNIX;
    snprintf(sa.sun_path, sizeof(sa.sun_path), SERVER_SOCK);

    if (connect(g_fd, (struct sockaddr *) &sa, strlen(sa.sun_path) + sizeof(sa.sun_family)) < 0) {
        log_info("alloc", "connect() failed with error (%d:%s)", errno, strerror(errno));
        return -1;
    }
    int flags = fcntl(g_fd, F_GETFL, 0);
    fcntl(g_fd, F_SETFL, flags | O_NONBLOCK);
    return 0;
}

void send_event(void) {
    if (g_fd < 0) {
        if (init_fd() < 0) {
            log_info("alloc", "failed to connect to server");
            return;
        }
    }
    json_t *jtc = json_object();
    json_object_set_new(jtc, "msgType", json_integer(650));
    json_t *jtype = json_object();
    json_object_set_new(jtype, "type", json_string("MESSAGE_CHANGE"));
    json_object_set_new(jtc, "data", jtype);
    char *j_dump_string = NULL;
    j_dump_string = json_dumps(jtc, JSON_PRESERVE_ORDER);
    if (write_a_msg(g_fd, (uint8_t*)j_dump_string, strlen(j_dump_string)+1) == -1) {
        close(g_fd);
        g_fd = -1;
        log_info("alloc", "failed to send message to server");
    }
    log_info("alloc", "GNA: da_send: %s", j_dump_string);
    free(j_dump_string);
    json_decref(jtc);
}

int write_a_msg(int fd, const uint8_t *ptr, size_t nbytes) {
    uint8_t *write_buf = malloc(nbytes + MSG_LEN_SIZE);
    if (!write_buf)
        return -1;
    write_buf[0] = (nbytes >> 24);
    write_buf[1] = (nbytes >> 16);
    write_buf[2] = (nbytes >> 8);
    write_buf[3] = (nbytes);
    memcpy(write_buf + MSG_LEN_SIZE, ptr, nbytes);
    if (write_loop(fd, write_buf, nbytes + MSG_LEN_SIZE) < 0) {
        int save_err = errno;
        free(write_buf);
        errno = save_err;
        return -1;
    }
    free(write_buf);
    return 0;
}

static int write_loop(int fd, const uint8_t *ptr, size_t nbytes) {
  ssize_t nleft, nwritten;
  nleft = nbytes;
  while (nleft) {
        nwritten = write(fd, ptr, nleft);
        if (nwritten <= 0) {
            if (errno != EAGAIN) return -1;
            continue;
        }
        nleft -= nwritten;
        ptr += nwritten;
    }
    return 0;
}

客户端使用libev处理套接字,服务器使用SELECT:

static int server_init(void) {
    struct sockaddr_un sa;
    memset(&sa, 0, sizeof(sa));
    int fd = socket(AF_UNIX, SOCK_STREAM, 0);
    if (fd < 0) {
        DebugLog(ERROR, "Could not create socket - error (%d:%s)", errno, strerror(errno));
        return -1;
    }
    unlink(SERVER_SOCK);
    sa.sun_family = AF_UNIX;
    strcpy(sa.sun_path, SERVER_SOCK);
    if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
        DebugLog(ERROR, "Bind failed with error (%d:%s)\n", errno, strerror(errno));
        close(fd);
        return -1;
    }
    if (listen(fd, 5)) {
        DebugLog(ERROR, "Listen failed with error (%d:%s)\n", errno, strerror(errno));
        close(fd);
        return -1;
    }
    DebugLog(INFO, "GNA: Create server ready to accept\n");
    ev_add_fd(fd, EV_READ, connection_callback, NULL);
    return RET_OK;
}

static void connection_callback(int fd, int flags, void *data) {
    struct sockaddr_un remote;
    socklen_t rlen = sizeof(struct sockaddr_un);

    int connection_fd = accept(fd, (struct sockaddr *) &remote, &rlen);
    if (connection_fd < 0) {
        DebugLog(ERROR, "%s:Accept failed", __func__);
        return;
    }
    DebugLog(INFO, "GNA: received new connection\n");
    ev_add_fd(connection_fd, EV_READ, request_callback, NULL);
}

static void request_callback(int fd, int flags, void *data) {
    char *msg = NULL;
    size_t msglen = 0;
    if (read_a_msg(fd, (uint8_t **) & msg, &msglen) < 0) {
        DebugLog(ERROR, "%s:read failed (pid:%d). error %d:%s", __func__, getpid(), errno,
                 strerror(errno));
        ev_del_fd(fd);
        close(fd);              // close the FD we'll reopen a new one next time
        if (msg)
            free(msg);
        return;
    }
    msg[msglen] = '\0';         // ensure NULL termination
    handle_msg(msg, msglen, fd);
    if (msg)
        free(msg);
    return;
}

static void handle_msg(char *msg, int msglen, int fd) {
    DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
}

int read_a_msg(int fd, uint8_t **ptr, size_t *nbytes) {
    uint8_t hd[4];
    if (read_loop(fd, hd, 4) < 0)
        return -1;
    size_t payload_len = (hd[0]<<24)|(hd[1]<<16)|(hd[2]<<8)|(hd[3]);
    (*ptr) = malloc(payload_len + 1); // allocate one extra byte in case the user is reading strings and wants to add a NULL in the end. the length wont include this, its essential padding for convenience.
    *nbytes = payload_len;
    if (read_loop(fd, *ptr, *nbytes) < 0)
        return -1;
    return 0;
}

static int read_loop(int fd, uint8_t *ptr, size_t nbytes) {
    ssize_t nleft, nread;
    nleft = nbytes;
    while (nleft) {
        nread = read(fd, ptr, nleft);
        if (nread < 0) {
            if (errno != EAGAIN) return -1;
            continue;
        }
        if (nread == 0)
            break;
        nleft -= nread;
        ptr += nread;
    }
    if (nleft != 0) return -1;
    return 0;
}

尽管没有人发送任何内容,但此日志(log)每隔几秒钟就会打印一次.

DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);

在客户端,只有一个发送函数,其他人都不会调用该函数,并且该日志(log)只出现一次.

log_info("alloc", "GNA: da_send: %s", j_dump_string);

有没有人能告诉我为什么会发生这种情况.如何避免呢?

Update 1:个 在对客户端和服务器上的strace提出建议后,我发现服务器在SELECT中设置了FD,并定期获取一个读取事件

08:19:26.031031 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=16, tv_usec=1049544}) = 1 (in [11], left {tv_sec=17, tv_usec=44972})
08:19:26.035737 read(11, "\0\0\0005", 4) = 4
08:19:26.035814 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:38.982049 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=3, tv_usec=1000564}) = 1 (in [11], left {tv_sec=3, tv_usec=946001})
08:19:39.036745 read(11, "\0\0\0005", 4) = 4
08:19:39.036816 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:50.117012 _newselect(18, [4 5 6 7 8 9 10 11 15 16 17], [], NULL, {tv_sec=4, tv_usec=1276752}) = 1 (in [17], left {tv_sec=5, tv_usec=276586})
08:19:50.117251 read(17, "\0\0\0005", 4) = 4
08:19:50.117308 read(17, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:51.910855 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=2, tv_usec=1070595}) = 1 (in [11], left {tv_sec=2, tv_usec=943831})
08:19:52.037758 read(11, "\0\0\0005", 4) = 4
08:19:52.037841 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:20:05.031834 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=0, tv_usec=1000335}) = 1 (in [11], left {tv_sec=0, tv_usec=993545})
08:20:05.038758 read(11, "\0\0\0005", 4) = 4
08:20:05.038838 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53

我在一开始杀死了客户端并附加了strace,以便它可以获得套接字的FD,客户端没有在FD 14上写入任何内容,只有一次:

08:10:02.362615 socket(AF_UNIX, SOCK_STREAM, 0) = 14
08:10:02.363091 connect(14, {sa_family=AF_UNIX, sun_path="/tmp/server_sock"}, 21) = 0
08:10:02.363251 fcntl64(14, F_GETFL)    = 0x2 (flags O_RDWR)
08:10:02.363297 fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
08:10:02.363837 write(14, "\0\0\0005{\"msgType\": 650, \"data\": {\"t"..., 57) = 57

这是对FD14的唯一一次写入.

Update 2: 上面的send_event个函数被编译为liaba c.so库的一部分.在我的消息中放入getid()和gettimeofday()后,我发现另一个守护进程正在使用这个库,并调用一个包装函数,该函数调用send_event函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息.我现在正在创建一个.id文件来存储客户端的PID,然后在send_event中比较当前进程的PID和客户端的PID.

推荐答案

上面的send_event个函数被编译为libabc.so库的一部分.在我的消息中放入getpid()gettimeofday()后,我发现另一个守护进程正在使用这个库,并调用一个包装函数,该函数调用send_event函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息.我现在正在创建一个.pid文件来存储客户端的ID,然后在send_event中比较当前进程的ID和客户端的ID.

感谢@PTS的建议.

C++相关问答推荐

%p与char* 等组合缺少的GCC Wform警告

Tiva TM4C123GXL的I2C通信

为什么在Linux(特别是Ubuntu 20.04LTS)上,POSIX共享内存对象在重启后仍然存在,然后突然变成了根用户?

#If指令中未定义宏?

我怎么才能用GCC编译一个c库,让它包含另一个库呢?

整型文字后缀在左移中的用途

如何在C语言中正确打印图形

对重叠字符串使用MemMove

S将C语言宏定义为自身的目的是什么?(在glibc标题中看到)

在libwget中启用Cookie会导致分段故障

在编写代码时,Clion比vscode有更多的问题指示器

Tic-tac-toe:从文件加载存储

递归打印二维数组(C编程)

OSDev--双缓冲重启系统

C中的回文数字

为什么会出现此错误?二进制表达式的操作数无效

在文件描述符上设置FD_CLOEXEC与将其传递给POSIX_SPOWN_FILE_ACTIONS_ADCLOSE有区别吗?

模仿 memmove 的行为

无法理解 fgets 输出

使用 GCC 将一个函数中初始化的 struct 体实例通过指针传递到 C 中的另一个函数会产生不同的结果