有限状态机实例:服务端实现简单的HTTP请求的读取和分析

有限状态机实例:服务端实现HTTP请求的读取和分析

TCP/IP在包的头部给出头部的长度字段,但HTTP协议未提供头部长度字段,(头部长度变化大),头部结束时遇到空行,\r\n

如果一次读操作没有读入HTTP请求的整个头部,即没有遇到空行,需要等待第二次读入,

在每一次完成读操作,要分析新读入的数据是否有空行,(\r\n前面有请求头和头部字段),,所以在寻找\r\n的过程中可以顺便解析http头部

仅解析GET方法的http请求

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#include "me.h"
#define BUFFER_SIZE 4096

//主状态机状态,
enum CHECK_STATE {
CHECK_STATE_REQUESTLINE = 0,
CHECK_STATE_HEADER
};

//子状态机状态,
enum LINE_STATUS {
LINE_OK = 0,
LINE_BAD,
LINE_OPEN
};

enum HTTP_CODE {
NO_REQUEST,
GET_REQUEST,
BAD_REQUEST,
FORRBIDEN_REQUEST,
INTERNAL_ERROR,
CLOSED_CONNECTION
};

const char* szret[] = {"I get a correct result\n","Something wrong\n"};

//子状态机
LINE_STATUS parse_line(char *buffer,int &checked_index,int &read_index)
{
char temp;
for (;checked_index < read_index; ++checked_index)
{
// printf("parse_line\n");
temp = buffer[checked_index];
// 如果当前是\r则可能读取到完整的行
if (temp == '\r')
{
//\r是最后一个,则说明接下来还有数据需要读
if ((checked_index + 1) == read_index)
{
// printf("parse_line: return LINE_OPEN\n");
return LINE_OPEN;
}
//读取到完整的行\r \n
else if (buffer[checked_index + 1] == '\n')
{
buffer[checked_index++] = '\0';
buffer[checked_index++] = '\0';
// printf("parse_line: return LINE_OK\n");
return LINE_OK;
}
//否则http包出现\r则出错
// printf("parse_line: return LINE_BAD\n");
return LINE_BAD;
}
//当前是\n,也可能出现完整行
else if (temp == '\n')
{
if ((checked_index > 1) && buffer[checked_index - 1] == '\r')
{
buffer[checked_index-1] = '\0';
buffer[checked_index++] = '\0';
// printf("parse_line: return LINE_OK\n");
return LINE_OK;
}
// printf("parse_line: return LINE_BAD\n");
return LINE_BAD;
}
}
// printf("parse_line: return LINE_OPEN\n");
// 所有内容分析完毕,没有碰到\r字符,则继续读取
return LINE_OPEN;
}

//分析请求行
HTTP_CODE parse_requestline(char* temp,CHECK_STATE &checkstate)
{
char *url = strpbrk(temp," \t");
if (!url)
{
return BAD_REQUEST;
}
*url++ = '\0';

char *method = temp;
if (strcasecmp(method,"GET") == 0)//仅支持GET方法
{
printf("The request method is: GET\n");
}
else
{
return BAD_REQUEST;
}

url += strspn(url," \t");
char *version = strpbrk(url," \t");
if (!version)
{
return BAD_REQUEST;
}
//version地址也拿到了

*version++ = '\0';
version += strspn(version," \t");

//仅支持HTTP/1.1版本
if (strcasecmp(version,"HTTP/1.1") != 0)
{
return BAD_REQUEST;
}

//检查url是否合法
if (strncasecmp(url,"http://",7) == 0)
{
url += 7;
url = strchr(url,'/');
}

if (!url || url[0] != '/')
{
return BAD_REQUEST;
}
printf("The request URL is: %s\n",url);
//请求行分析完毕,主状态机状态转移到 头部字段分析
checkstate = CHECK_STATE_HEADER;
return NO_REQUEST;
}

HTTP_CODE parse_headers(char *temp)
{
if (temp[0] == '\0')//遇到空行,分析完毕,
{
return GET_REQUEST;
}
else if (strncasecmp(temp,"Host:",5) == 0)//处理Host头部字段
{
temp += 5;
temp += strspn(temp," \t");
printf("the request host is: %s\n",temp);
}
else if (strncasecmp(temp,"Accept:",7) == 0)
{
temp += 7;
temp += strspn(temp," \t");
printf("the request Accept is: %s\n",temp);
}
else if (strncasecmp(temp,"User-Agent:",11) == 0)
{
temp += 11;
temp += strspn(temp," \t");
printf("the request User-Agent is: %s\n",temp);
}
//其他字段头部都不处理
else
{
printf("I can not handle this header\n");
}
// return GET_REQUEST;
return NO_REQUEST;//返回NO_REQUEST无影响,因为在parse_content内并没有对NO_REQUEST进行处理
}

//每次recv到客户端的数据都会调用此函数
HTTP_CODE parse_content(char* buffer,int &checked_index,CHECK_STATE &checkstate,int &read_index,int &start_line)
{
LINE_STATUS linestatus = LINE_OK;
HTTP_CODE retcode = NO_REQUEST;
while((linestatus = parse_line(buffer,checked_index,read_index)) == LINE_OK)
{
char *temp = buffer + start_line;
start_line = checked_index; //下一行的起始位置
//判断当前主状态机的状态
switch(checkstate)
{
case CHECK_STATE_REQUESTLINE:
{
// printf("parse_content: 执行分析请求行\n");
retcode = parse_requestline(temp,checkstate);
if (retcode == BAD_REQUEST)
{
// printf("parse_content: 分析请求行返回BAD_REQUEST\n");
return BAD_REQUEST;
}
break;
}
case CHECK_STATE_HEADER://headers其余字段不分析,返回NO_REQUEST
{
// printf("parse_content: 分析头部字段\n");
retcode = parse_headers(temp);
if (retcode == BAD_REQUEST)
{
return BAD_REQUEST;
}
else if (retcode == GET_REQUEST)
{
return GET_REQUEST;
}
break;
}
default:
{
return INTERNAL_ERROR;
}
}
}

// while条件不符合,判断子状态机状态
if (linestatus == LINE_OPEN)
{
return NO_REQUEST;//没有读取到完整的行,下次recv继续来
}
else
{
return BAD_REQUEST;//行中出现其他字符,则http请求分析直接失败,
}
}

int main(int argc,char* argv[])
{
if (argc <= 2)
{
printf("Usage: %s ip-address port\n",basename(argv[0]));
exit(1);
}

const char* ip = argv[1];
int port = atoi(argv[2]);

struct sockaddr_in addr;
memset(&addr,0,sizeof(addr));

addr.sin_family = AF_INET;
inet_pton(AF_INET,ip,&addr.sin_addr);
addr.sin_port = htons(port);

int listenfd = socket(AF_INET,SOCK_STREAM,0);
assert(listenfd >= 0);
int ret = bind(listenfd,(struct sockaddr*)&addr,sizeof(addr));
assert(ret != -1);
ret = listen(listenfd,5);
assert(ret != -1);

struct sockaddr_in client;
socklen_t client_addrlength = sizeof(client);
int fd = accept(listenfd,(struct sockaddr*)&client,&client_addrlength);
if (fd < 0)
{
printf("accept error!\n");
}
else
{
char buffer[BUFFER_SIZE];
memset(&buffer,'\0',sizeof(buffer));

int data_read = 0;
int read_index = 0;
int checked_index = 0;
int start_line = 0;

CHECK_STATE checkstate = CHECK_STATE_REQUESTLINE;//主状态机初始状态是 读取请求行

while(1)
{
data_read = recv(fd,buffer + read_index,BUFFER_SIZE - read_index,0);
if (data_read == -1)
{
printf("read failed,sockfd: %d\n");
break;
}
else if (data_read == 0)
{
printf("remote client has closed the connection\n");
break;
}
read_index += data_read;

HTTP_CODE result = parse_content(buffer,checked_index,checkstate,read_index,start_line);
if (result == NO_REQUEST)//一行没有读取完毕
{
continue;
}
else if (result == GET_REQUEST)//完整的GET方法请求
{
send(fd,szret[0],strlen(szret[0]),0);
// printf("send get_request\n");
break;
}
else
{
send(fd,szret[1],strlen(szret[1]),0);
// printf("send something wrong\n");
break;
}
}
close(fd);
}
close(listenfd);
return 0;
}

头文件 me.h,位于当前目录的子目录include/下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#ifndef ME_H
#define ME_H

#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sendfile.h>
#include <sys/resource.h>

#include <netdb.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

#include <stdbool.h>

#define error_handle(str) {\
fprintf(stderr,"%s\n",str);\
exit(1);}
#endif

编译:g++ httpstate1.cpp -Iinclude/ -o httpstate1

测试

测试机器(本地ip) 192.168.247.153 随机选择一个端口12345

./httpstate1 192.168.247.153 12345

使用curl模拟GET方法的HTTP请求

curl -v http://192.168.247.153:12345/test (注意目标的ip和端口)


curl模拟的http请求:
在这里插入图片描述

httpstate1解析结果:

在这里插入图片描述

我是在查看curl后发送的字段后然后在代码中添加了最后2个字段的解析,


1

代码中使用了2个状态机,

主状态机 CHECK_STATE , 初始状态:解析请求行(CHECK_STATE_REQUESTLINE),

主状态机在内部调用了从状态机

从状态机: LINE_STATUS,初始状态:LINE_OK,(也是检测到\r\n完成的http请求的状态),

当parse_line函数处理新读取的Buffer中

  1. 不存在完整的请求,则状态转移到LINE_OPEN
  2. 读取到\r\n,则是完整的行,状态仍是LINE_OK,则由主状态机继续来处理
  3. \r或\n单独出现在http请求中,说明http请求语法错误,则转移到LINE_BAD状态

转移到主状态机时:

如果主状态是: 初始状态(CHECK_STATE_REQUESTLINE):则调用parse_requestline()来分析请求行,

如果主状态时:CHECK_STATE_HEADER,则调用parse_headers来分析头部字段,

parse_requestline()在成功分析后将主状态转移到CHECK_STATE_HEADER

<参考Linux高性能服务器编程>