我面临着一个非常奇怪的问题,我有一个运行在Linux上的简单的Unix流套接字服务器/客户端代码.客户端偶尔会向服务器发送消息(我也只测试了发送一次),但在收到第一条消息后,即使客户端没有发送任何消息,服务器也会继续打印相同的消息.
整个消息是重新创建的,没有静态数据等,这可能会导致相同的消息在多个调用中持续存在.
客户端代码:
int g_fd = -1;
#define SERVER_SOCK "/tmp/server_sock"
int init_fd(void) {
g_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (g_fd < 0) {
log_info("alloc", "socket() failed with error (%d:%s)", errno, strerror(errno));
return -1;
}
struct sockaddr_un sa;
memset(&sa, 0, sizeof(sa));
sa.sun_family = AF_UNIX;
snprintf(sa.sun_path, sizeof(sa.sun_path), SERVER_SOCK);
if (connect(g_fd, (struct sockaddr *) &sa, strlen(sa.sun_path) + sizeof(sa.sun_family)) < 0) {
log_info("alloc", "connect() failed with error (%d:%s)", errno, strerror(errno));
return -1;
}
int flags = fcntl(g_fd, F_GETFL, 0);
fcntl(g_fd, F_SETFL, flags | O_NONBLOCK);
return 0;
}
void send_event(void) {
if (g_fd < 0) {
if (init_fd() < 0) {
log_info("alloc", "failed to connect to server");
return;
}
}
json_t *jtc = json_object();
json_object_set_new(jtc, "msgType", json_integer(650));
json_t *jtype = json_object();
json_object_set_new(jtype, "type", json_string("MESSAGE_CHANGE"));
json_object_set_new(jtc, "data", jtype);
char *j_dump_string = NULL;
j_dump_string = json_dumps(jtc, JSON_PRESERVE_ORDER);
if (write_a_msg(g_fd, (uint8_t*)j_dump_string, strlen(j_dump_string)+1) == -1) {
close(g_fd);
g_fd = -1;
log_info("alloc", "failed to send message to server");
}
log_info("alloc", "GNA: da_send: %s", j_dump_string);
free(j_dump_string);
json_decref(jtc);
}
int write_a_msg(int fd, const uint8_t *ptr, size_t nbytes) {
uint8_t *write_buf = malloc(nbytes + MSG_LEN_SIZE);
if (!write_buf)
return -1;
write_buf[0] = (nbytes >> 24);
write_buf[1] = (nbytes >> 16);
write_buf[2] = (nbytes >> 8);
write_buf[3] = (nbytes);
memcpy(write_buf + MSG_LEN_SIZE, ptr, nbytes);
if (write_loop(fd, write_buf, nbytes + MSG_LEN_SIZE) < 0) {
int save_err = errno;
free(write_buf);
errno = save_err;
return -1;
}
free(write_buf);
return 0;
}
static int write_loop(int fd, const uint8_t *ptr, size_t nbytes) {
ssize_t nleft, nwritten;
nleft = nbytes;
while (nleft) {
nwritten = write(fd, ptr, nleft);
if (nwritten <= 0) {
if (errno != EAGAIN) return -1;
continue;
}
nleft -= nwritten;
ptr += nwritten;
}
return 0;
}
客户端使用libev处理套接字,服务器使用SELECT:
static int server_init(void) {
struct sockaddr_un sa;
memset(&sa, 0, sizeof(sa));
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
DebugLog(ERROR, "Could not create socket - error (%d:%s)", errno, strerror(errno));
return -1;
}
unlink(SERVER_SOCK);
sa.sun_family = AF_UNIX;
strcpy(sa.sun_path, SERVER_SOCK);
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
DebugLog(ERROR, "Bind failed with error (%d:%s)\n", errno, strerror(errno));
close(fd);
return -1;
}
if (listen(fd, 5)) {
DebugLog(ERROR, "Listen failed with error (%d:%s)\n", errno, strerror(errno));
close(fd);
return -1;
}
DebugLog(INFO, "GNA: Create server ready to accept\n");
ev_add_fd(fd, EV_READ, connection_callback, NULL);
return RET_OK;
}
static void connection_callback(int fd, int flags, void *data) {
struct sockaddr_un remote;
socklen_t rlen = sizeof(struct sockaddr_un);
int connection_fd = accept(fd, (struct sockaddr *) &remote, &rlen);
if (connection_fd < 0) {
DebugLog(ERROR, "%s:Accept failed", __func__);
return;
}
DebugLog(INFO, "GNA: received new connection\n");
ev_add_fd(connection_fd, EV_READ, request_callback, NULL);
}
static void request_callback(int fd, int flags, void *data) {
char *msg = NULL;
size_t msglen = 0;
if (read_a_msg(fd, (uint8_t **) & msg, &msglen) < 0) {
DebugLog(ERROR, "%s:read failed (pid:%d). error %d:%s", __func__, getpid(), errno,
strerror(errno));
ev_del_fd(fd);
close(fd); // close the FD we'll reopen a new one next time
if (msg)
free(msg);
return;
}
msg[msglen] = '\0'; // ensure NULL termination
handle_msg(msg, msglen, fd);
if (msg)
free(msg);
return;
}
static void handle_msg(char *msg, int msglen, int fd) {
DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
}
int read_a_msg(int fd, uint8_t **ptr, size_t *nbytes) {
uint8_t hd[4];
if (read_loop(fd, hd, 4) < 0)
return -1;
size_t payload_len = (hd[0]<<24)|(hd[1]<<16)|(hd[2]<<8)|(hd[3]);
(*ptr) = malloc(payload_len + 1); // allocate one extra byte in case the user is reading strings and wants to add a NULL in the end. the length wont include this, its essential padding for convenience.
*nbytes = payload_len;
if (read_loop(fd, *ptr, *nbytes) < 0)
return -1;
return 0;
}
static int read_loop(int fd, uint8_t *ptr, size_t nbytes) {
ssize_t nleft, nread;
nleft = nbytes;
while (nleft) {
nread = read(fd, ptr, nleft);
if (nread < 0) {
if (errno != EAGAIN) return -1;
continue;
}
if (nread == 0)
break;
nleft -= nread;
ptr += nread;
}
if (nleft != 0) return -1;
return 0;
}
尽管没有人发送任何内容,但此日志(log)每隔几秒钟就会打印一次.
DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
在客户端,只有一个发送函数,其他人都不会调用该函数,并且该日志(log)只出现一次.
log_info("alloc", "GNA: da_send: %s", j_dump_string);
有没有人能告诉我为什么会发生这种情况.如何避免呢?
Update 1:个 在对客户端和服务器上的strace提出建议后,我发现服务器在SELECT中设置了FD,并定期获取一个读取事件
08:19:26.031031 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=16, tv_usec=1049544}) = 1 (in [11], left {tv_sec=17, tv_usec=44972})
08:19:26.035737 read(11, "\0\0\0005", 4) = 4
08:19:26.035814 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:38.982049 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=3, tv_usec=1000564}) = 1 (in [11], left {tv_sec=3, tv_usec=946001})
08:19:39.036745 read(11, "\0\0\0005", 4) = 4
08:19:39.036816 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:50.117012 _newselect(18, [4 5 6 7 8 9 10 11 15 16 17], [], NULL, {tv_sec=4, tv_usec=1276752}) = 1 (in [17], left {tv_sec=5, tv_usec=276586})
08:19:50.117251 read(17, "\0\0\0005", 4) = 4
08:19:50.117308 read(17, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:51.910855 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=2, tv_usec=1070595}) = 1 (in [11], left {tv_sec=2, tv_usec=943831})
08:19:52.037758 read(11, "\0\0\0005", 4) = 4
08:19:52.037841 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:20:05.031834 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=0, tv_usec=1000335}) = 1 (in [11], left {tv_sec=0, tv_usec=993545})
08:20:05.038758 read(11, "\0\0\0005", 4) = 4
08:20:05.038838 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
我在一开始杀死了客户端并附加了strace,以便它可以获得套接字的FD,客户端没有在FD 14上写入任何内容,只有一次:
08:10:02.362615 socket(AF_UNIX, SOCK_STREAM, 0) = 14
08:10:02.363091 connect(14, {sa_family=AF_UNIX, sun_path="/tmp/server_sock"}, 21) = 0
08:10:02.363251 fcntl64(14, F_GETFL) = 0x2 (flags O_RDWR)
08:10:02.363297 fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
08:10:02.363837 write(14, "\0\0\0005{\"msgType\": 650, \"data\": {\"t"..., 57) = 57
这是对FD14的唯一一次写入.
Update 2:
上面的send_event
个函数被编译为liaba c.so库的一部分.在我的消息中放入getid()和gettimeofday()后,我发现另一个守护进程正在使用这个库,并调用一个包装函数,该函数调用send_event
函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息.我现在正在创建一个.id文件来存储客户端的PID,然后在send_event
中比较当前进程的PID和客户端的PID.