HTTP 协议深度解析（三）：完整 HTTP 服务器实现

HTTP 协议深度解析（三）：完整 HTTP 服务器实现 | 极客日志

web 根目录：/var/www/html 目录结构： /var/www/html/ ├── index.html ├── about.html ├── css/ │ └── style.css ├── js/ │ └── app.js └── images/ └── logo.png

http://example.com/ → /var/www/html/index.html http://example.com/about.html → /var/www/html/about.html http://example.com/css/style.css → /var/www/html/css/style.css http://example.com/images/logo.png → /var/www/html/images/logo.png

std::string GetFilePath(const std::string& url_path, const std::string& web_root) {
    if (url_path == "/") {
        return web_root + "/index.html"; // 默认首页
    }
    return web_root + url_path;
}

web 根目录：./wwwroot URL：/test.html 文件路径：./wwwroot/test.html URL：/images/photo.jpg 文件路径：./wwwroot/images/photo.jpg

GET /../../../etc/passwd HTTP/1.1

./wwwroot/../../../etc/passwd → /etc/passwd

bool IsSafePath(const std::string& path, const std::string& web_root) {
    // 1. 检查是否包含".."
    if (path.find("..") != std::string::npos) {
        return false;
    }
    // 2. 检查是否在 web 根目录下
    char real_path[PATH_MAX];
    if (realpath(path.c_str(), real_path) == NULL) {
        return false;
    }
    char real_root[PATH_MAX];
    realpath(web_root.c_str(), real_root);
    // 检查 real_path 是否以 real_root 开头
    return strncmp(real_path, real_root, strlen(real_root)) == 0;
}

std::string ReadFile(const std::string& filepath) {
    std::ifstream file(filepath, std::ios::binary);
    if (!file.is_open()) {
        return "";
    }
    // 移动到文件末尾，获取文件大小
    file.seekg(0, std::ios::end);
    size_t filesize = file.tellg();
    // 回到文件开头
    file.seekg(0, std::ios::beg);
    // 读取内容
    std::string content;
    content.resize(filesize);
    file.read(&content[0], filesize);
    file.close();
    return content;
}

std::string GetMimeType(const std::string& filepath) {
    // 提取扩展名
    size_t pos = filepath.rfind('.');
    if (pos == std::string::npos) {
        return "application/octet-stream"; // 默认二进制流
    }
    std::string ext = filepath.substr(pos);
    // MIME 类型映射表
    static std::map<std::string, std::string> mime_types = {
        {".html", "text/html"},
        {".htm", "text/html"},
        {".css", "text/css"},
        {".js", "application/javascript"},
        {".json", "application/json"},
        {".xml", "application/xml"},
        {".txt", "text/plain"},
        {".jpg", "image/jpeg"},
        {".jpeg", "image/jpeg"},
        {".png", "image/png"},
        {".gif", "image/gif"},
        {".svg", "image/svg+xml"},
        {".ico", "image/x-icon"},
        {".pdf", "application/pdf"},
        {".zip", "application/zip"},
        {".mp3", "audio/mpeg"},
        {".mp4", "video/mp4"}
    };
    auto it = mime_types.find(ext);
    if (it != mime_types.end()) {
        return it->second;
    }
    return "application/octet-stream";
}

bool FileExists(const std::string& filepath) {
    struct stat info;
    return stat(filepath.c_str(), &info) == 0 && S_ISREG(info.st_mode);
}

struct HttpRequest {
    std::string method; // 方法：GET、POST 等
    std::string url; // URL 路径
    std::string version; // HTTP 版本
    std::map<std::string, std::string> headers; // Header 键值对
    std::string body; // Body 内容
    // GET 参数（从 URL 解析）
    std::map<std::string, std::string> query_params;
    // POST 参数（从 Body 解析，application/x-www-form-urlencoded）
    std::map<std::string, std::string> post_params;
};

bool ParseRequestLine(const std::string& line, HttpRequest* req) {
    std::istringstream iss(line);
    iss >> req->method >> req->url >> req->version;
    if (req->method.empty() || req->url.empty() || req->version.empty()) {
        return false;
    }
    return true;
}

bool ParseHeader(const std::string& line, HttpRequest* req) {
    size_t pos = line.find(':');
    if (pos == std::string::npos) {
        return false;
    }
    std::string key = line.substr(0, pos);
    std::string value = line.substr(pos + 1);
    // 去除 value 前面的空格
    size_t start = value.find_first_not_of(' ');
    if (start != std::string::npos) {
        value = value.substr(start);
    }
    req->headers[key] = value;
    return true;
}

输入："Host: www.example.com" 解析：headers["Host"]="www.example.com"

bool ParseHttpRequest(const std::string& raw_request, HttpRequest* req) {
    std::istringstream stream(raw_request);
    std::string line;
    // 1. 解析首行
    if (!std::getline(stream, line)) {
        return false;
    }
    // 去除\r
    if (!line.empty() && line.back() == '\r') {
        line.pop_back();
    }
    if (!ParseRequestLine(line, req)) {
        return false;
    }
    // 2. 解析 Header
    while (std::getline(stream, line)) {
        if (!line.empty() && line.back() == '\r') {
            line.pop_back();
        }
        // 空行表示 Header 结束
        if (line.empty()) {
            break;
        }
        ParseHeader(line, req);
    }
    // 3. 读取 Body
    std::string body_line;
    while (std::getline(stream, body_line)) {
        req->body += body_line;
        if (stream.peek() != EOF) {
            req->body += "\n";
        }
    }
    return true;
}

void ParseQueryParams(HttpRequest* req) {
    size_t pos = req->url.find('?');
    if (pos == std::string::npos) {
        return; // 没有查询参数
    }
    std::string query = req->url.substr(pos + 1);
    req->url = req->url.substr(0, pos); // 去除查询参数部分
    // 解析 key1=value1&key2=value2
    std::istringstream stream(query);
    std::string pair;
    while (std::getline(stream, pair, '&')) {
        size_t eq = pair.find('=');
        if (eq != std::string::npos) {
            std::string key = pair.substr(0, eq);
            std::string value = pair.substr(eq + 1);
            req->query_params[key] = UrlDecode(value);
        }
    }
}

std::string UrlDecode(const std::string& str) {
    std::string result;
    for (size_t i = 0; i < str.size(); ++i) {
        if (str[i] == '%' && i + 2 < str.size()) {
            // %XX 格式
            int value;
            std::istringstream iss(str.substr(i + 1, 2));
            if (iss >> std::hex >> value) {
                result += static_cast<char>(value);
                i += 2;
            } else {
                result += str[i];
            }
        } else if (str[i] == '+') {
            result += ' '; // +号表示空格
        } else {
            result += str[i];
        }
    }
    return result;
}

输入："C%2B%2B%20%E7%BC%96%E7%A8%8B" 输出："C++ 编程"

void ParsePostParams(HttpRequest* req) {
    if (req->method != "POST") {
        return;
    }
    // 只处理 application/x-www-form-urlencoded
    auto it = req->headers.find("Content-Type");
    if (it == req->headers.end() || it->second.find("application/x-www-form-urlencoded") == std::string::npos) {
        return;
    }
    // Body 格式：key1=value1&key2=value2
    std::istringstream stream(req->body);
    std::string pair;
    while (std::getline(stream, pair, '&')) {
        size_t eq = pair.find('=');
        if (eq != std::string::npos) {
            std::string key = pair.substr(0, eq);
            std::string value = pair.substr(eq + 1);
            req->post_params[key] = UrlDecode(value);
        }
    }
}

struct HttpResponse {
    std::string version; // HTTP/1.1
    int status_code; // 200、404 等
    std::string status_text; // OK、Not Found 等
    std::map<std::string, std::string> headers; // 响应头
    std::string body; // 响应体

    std::string Build() {
        std::ostringstream oss;
        // 首行
        oss << version << " " << status_code << " " << status_text << "\r\n";
        // Header
        for (auto& pair : headers) {
            oss << pair.first << ": " << pair.second << "\r\n";
        }
        // 空行
        oss << "\r\n";
        // Body
        oss << body;
        return oss.str();
    }
};

std::string GetStatusText(int code) {
    static std::map<int, std::string> status_texts = {
        {200, "OK"},
        {201, "Created"},
        {204, "No Content"},
        {301, "Moved Permanently"},
        {302, "Found"},
        {304, "Not Modified"},
        {400, "Bad Request"},
        {401, "Unauthorized"},
        {403, "Forbidden"},
        {404, "Not Found"},
        {500, "Internal Server Error"},
        {502, "Bad Gateway"},
        {503, "Service Unavailable"}
    };
    auto it = status_texts.find(code);
    if (it != status_texts.end()) {
        return it->second;
    }
    return "Unknown";
}

HttpResponse Build200Response(const std::string& content, const std::string& content_type) {
    HttpResponse resp;
    resp.version = "HTTP/1.1";
    resp.status_code = 200;
    resp.status_text = "OK";
    resp.headers["Content-Type"] = content_type;
    resp.headers["Content-Length"] = std::to_string(content.size());
    resp.headers["Connection"] = "close";
    resp.body = content;
    return resp;
}

HttpResponse Build404Response() {
    std::string html = "<html>\n"
                       "<head><title>404 Not Found</title></head>\n"
                       "<body>\n"
                       "<h1>404 Not Found</h1>\n"
                       "<p>The requested resource was not found on this server.</p>\n"
                       "</body>\n"
                       "</html>";
    HttpResponse resp;
    resp.version = "HTTP/1.1";
    resp.status_code = 404;
    resp.status_text = "Not Found";
    resp.headers["Content-Type"] = "text/html";
    resp.headers["Content-Length"] = std::to_string(html.size());
    resp.body = html;
    return resp;
}

class HttpServer {
public:
    HttpServer(int port, const std::string& web_root)
        : _port(port), _web_root(web_root), _listen_fd(-1) {}

    bool Start() {
        // 创建 socket
        _listen_fd = socket(AF_INET, SOCK_STREAM, 0);
        if (_listen_fd < 0) {
            perror("socket");
            return false;
        }
        // 设置端口复用
        int opt = 1;
        setsockopt(_listen_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
        // bind
        struct sockaddr_in addr;
        addr.sin_family = AF_INET;
        addr.sin_addr.s_addr = INADDR_ANY;
        addr.sin_port = htons(_port);
        if (bind(_listen_fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
            perror("bind");
            return false;
        }
        // listen
        if (listen(_listen_fd, 10) < 0) {
            perror("listen");
            return false;
        }
        std::cout << "HTTP Server started on port " << _port << std::endl;
        std::cout << "Web root: " << _web_root << std::endl;
        // accept 循环
        while (true) {
            struct sockaddr_in client_addr;
            socklen_t len = sizeof(client_addr);
            int client_fd = accept(_listen_fd, (struct sockaddr*)&client_addr, &len);
            if (client_fd < 0) {
                perror("accept");
                continue;
            }
            // 创建线程处理连接，这只是'教学版：一连接一线程'，实际'工程版：线程池 + 任务队列 / epoll + reactor'
            std::thread t(&HttpServer::HandleClient, this, client_fd);
            t.detach();
        }
        return true;
    }

private:
    void HandleClient(int client_fd) {
        // 读取请求
        char buffer[8192];
        ssize_t n = read(client_fd, buffer, sizeof(buffer) - 1);
        if (n <= 0) {
            close(client_fd);
            return;
        }
        buffer[n] = '\0';
        std::string raw_request(buffer);
        // 解析请求
        HttpRequest req;
        if (!ParseHttpRequest(raw_request, &req)) {
            close(client_fd);
            return;
        }
        ParseQueryParams(&req);
        ParsePostParams(&req);
        // 打印日志
        std::cout << req.method << " " << req.url << std::endl;
        // 处理请求
        HttpResponse resp = ProcessRequest(req);
        // 发送响应
        std::string response_str = resp.Build();
        write(client_fd, response_str.c_str(), response_str.size());
        close(client_fd);
    }

    HttpResponse ProcessRequest(const HttpRequest& req) {
        // 处理静态资源
        if (req.method == "GET") {
            return HandleStaticFile(req);
        }
        // 处理 POST 请求
        if (req.method == "POST") {
            return HandlePostRequest(req);
        }
        // 不支持的方法
        return Build404Response();
    }

    HttpResponse HandleStaticFile(const HttpRequest& req) {
        std::string filepath = _web_root + req.url;
        // 默认首页
        if (req.url == "/") {
            filepath = _web_root + "/index.html";
        }
        // 检查文件是否存在
        if (!FileExists(filepath)) {
            return Build404Response();
        }
        // 读取文件
        std::string content = ReadFile(filepath);
        if (content.empty()) {
            return Build404Response();
        }
        // 判断 MIME 类型
        std::string mime_type = GetMimeType(filepath);
        // 构造响应
        return Build200Response(content, mime_type);
    }

    HttpResponse HandlePostRequest(const HttpRequest& req) {
        // 示例：处理/api/echo 接口
        if (req.url == "/api/echo") {
            std::string json = "{\"received\": \"" + req.body + "\"}";
            return Build200Response(json, "application/json");
        }
        return Build404Response();
    }

    int _port;
    std::string _web_root;
    int _listen_fd;
};

int main(int argc, char* argv[]) {
    if (argc != 3) {
        std::cout << "Usage: " << argv[0] << " <port> <web_root>" << std::endl;
        return 1;
    }
    int port = std::atoi(argv[1]);
    std::string web_root = argv[2];
    HttpServer server(port, web_root);
    server.Start();
    return 0;
}

mkdir -p wwwroot

<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>测试页面</title>
<link rel="stylesheet" href="/style.css">
</head>
<body>
<h1>欢迎访问 HTTP 服务器</h1>
<p>这是一个静态 HTML 页面</p>
<img src="/logo.png" alt="Logo">
<script src="/app.js"></script>
</body>
</html>

body {
    font-family: Arial, sans-serif;
    background-color: #f0f0f0;
    margin: 50px;
}
h1 {
    color: #333;
}

console.log('JavaScript loaded!');
alert('Hello from HTTP Server!');

g++ -o http_server http_server.cpp -std=c++11 -lpthread
./http_server 9090 ./wwwroot

HTTP Server started on port 9090 Web root: ./wwwroot

http://127.0.0.1:9090/

GET / GET /style.css GET /app.js GET /logo.png GET /favicon.ico

curl -i http://127.0.0.1:9090/

HTTP/1.1 200 OK
Content-Type: text/html
Content-Length: 285
Connection: close
<!DOCTYPE html><html>... </html>

curl -i http://127.0.0.1:9090/nonexistent.html

HTTP/1.1 404 Not Found
Content-Type: text/html
Content-Length: 158
<html><head><title>404 Not Found</title></head>... </html>

curl -i "http://127.0.0.1:9090/search?keyword=Linux&page=2"

query_params["keyword"]="Linux" query_params["page"]="2"

curl -X POST http://127.0.0.1:9090/api/echo \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "username=admin&password=123"

请求：GET /index.html 响应：<html>...</html>

HTTP 协议深度解析（三）：完整 HTTP 服务器实现

HTTP 协议深度解析（三）：完整 HTTP 服务器实现

一、web 根目录的概念

1.1 什么是 web 根目录

1.2 路径映射规则

1.3 安全性考虑

二、文件读取与 MIME 类型

2.1 读取文件内容

2.2 MIME 类型判断

2.3 文件是否存在

三、HTTP 请求解析

3.1 请求结构体

3.2 解析首行

3.4 完整解析流程

3.5 解析 URL 参数

3.6 urldecode 实现

3.7 解析 POST 参数

四、HTTP 响应构造

4.1 响应结构体

4.2 状态码对应的文本

4.3 构造 200 响应

4.4 构造 404 响应

五、完整 HTTP 服务器实现

5.1 HttpServer 类

5.2 main 函数

六、测试验证

6.1 准备测试文件

6.2 编译运行

6.3 浏览器测试

6.4 curl 测试

七、HTTP 版本演进

7.1 HTTP/0.9（1991 年）

7.2 HTTP/1.0（1996 年）

7.3 HTTP/1.1（1999 年）

7.4 HTTP/2.0（2015 年）

7.5 HTTP/3.0（2022 年）

八、本篇总结

8.1 核心要点

8.2 容易混淆的点

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具