请求信息
GET / HTTP/1.1 Host: localhost:7003 Connection: keep-alive Pragma: no-cache Cache-Control: no-cache Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,pt;q=0.5 Cookie: test2=ab+cd; test1=ab%2Bcd
服务端
@Controller @Slf4j public class MainController { @Autowired private HttpServletRequest request; @GetMapping("/") public @ResponseBody String index() { Cookie[] cookies = request.getCookies(); if (null != cookies) { for (Cookie cookie : cookies) { log.info(cookie.getName() + "=" + cookie.getValue()); } } return "index"; } }
控制台输出
2019-05-16 18:03:32.770 INFO 10114 --- [nio-7003-exec-1] net.mengkang.demo.MainController : test2=ab+cd 2019-05-16 18:03:32.770 INFO 10114 --- [nio-7003-exec-1] net.mengkang.demo.MainController : test1=ab%2Bcd
GET / HTTP/1.1 Host: localhost:8084 Connection: keep-alive Pragma: no-cache Cache-Control: no-cache Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,pt;q=0.5 Cookie: test2=ab+cd; test1=ab%2Bcd
服务端
var_exprot($_COOKIE);
array ( 'test2' => 'ab cd', 'test1' => 'ab+cd', )
发现 Java
是不会对 cookie
数据做任何处理,但是 php
则会默认进行一次 urldecode
操作,这导致了,两边系统里面获取同一 cookie
时,结果不一致的 bug。
类似的问题 PHP 在解析外部变量时的一个 BUG
主要查看两处源码
main/php_variables.c ext/standard/url.c
SAPI_API SAPI_TREAT_DATA_FUNC(php_default_treat_data) { ... switch (arg) { case PARSE_GET: case PARSE_STRING: separator = PG(arg_separator).input; break; case PARSE_COOKIE: separator = ";/0"; //可以在我们浏览器里看到请求的header里面cookie的分隔符就是这个 break; } var = php_strtok_r(res, separator, &strtok_buf); while (var) { val = strchr(var, '='); if (arg == PARSE_COOKIE) { /* Remove leading spaces from cookie names, needed for multi-cookie header where ; can be followed by a space */ while (isspace(*var)) { var++; } if (var == val || *var == '/0') { goto next_cookie; } } if (++count > PG(max_input_vars)) { php_error_docref(NULL, E_WARNING, "Input variables exceeded " ZEND_LONG_FMT ". To increase the limit change max_input_vars in php.ini.", PG(max_input_vars)); break; } if (val) { /* have a value */ size_t val_len; size_t new_val_len; *val++ = '/0'; php_url_decode(var, strlen(var)); val_len = php_url_decode(val, strlen(val)); val = estrndup(val, val_len); if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len)) { php_register_variable_safe(var, val, new_val_len, &array); } efree(val); } else { size_t val_len; size_t new_val_len; php_url_decode(var, strlen(var)); val_len = 0; val = estrndup("", val_len); if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len)) { php_register_variable_safe(var, val, new_val_len, &array); } efree(val); } next_cookie: var = php_strtok_r(NULL, separator, &strtok_buf); } if (free_buffer) { efree(res); } }
我们看到cookie的值会被执行 php_url_decode
操作,下面附带其源码,且加上一段测试代码
#include <stdio.h> #include <ctype.h> #include <memory.h> static int php_htoi(char *s) { int value; int c; c = ((unsigned char *) s)[0]; if (isupper(c)) c = tolower(c); value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16; c = ((unsigned char *) s)[1]; if (isupper(c)) c = tolower(c); value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10; return (value); } size_t php_url_decode(char *str, size_t len) { char *dest = str; char *data = str; while (len--) { if (*data == '+') { *dest = ' '; } else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2))) { *dest = (char) php_htoi(data + 1); data += 2; len -= 2; } else { *dest = *data; } data++; dest++; } *dest = '/0'; return dest - str; } int main() { char a[6] = {"ab+cd"}; php_url_decode(a, strlen(a)); printf("%s/n", a); return 0; }
上面 php_url_decode
用到了 php_htoi
,这个是因为 urlencode
是按照 rfc1738
对字符串中除了 -_. 之外的所有非字母数字字符都将被替换成百分号(%)后跟两位十六进制数。 htoi
作用就是 Converting Hexadecimal Digits Into Integers
。然后把计算出来的整型转换为 char
,存回处理完之后的字符数组里。
rawurlencode
与 urlencode
的区别是什么?
手册上的解释是:
urlencode
返回字符串,此字符串中除了 -_. 之外的所有非字母数字字符都将被替换成百分号(%)后跟两位十六进制数,空格则编码为加号(+)。此编码与 WWW 表单 POST 数据的编码方式是一样的,同时与 application/x-www-form-urlencoded 的媒体类型编码方式一样。由于历史原因,此编码在将空格编码为加号(+)方面与 » RFC3986 编码(参见 rawurlencode())不同。
PHPAPI size_t php_raw_url_decode(char *str, size_t len) { char *dest = str; char *data = str; while (len--) { if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2))) { #ifndef CHARSET_EBCDIC *dest = (char) php_htoi(data + 1); #else *dest = os_toebcdic[(char) php_htoi(data + 1)]; #endif data += 2; len -= 2; } else { *dest = *data; } data++; dest++; } *dest = '/0'; return dest - str; }
通过源码可以看到就是对 +
处理没有了。
不管是 $_GET
, $_POST
, $_COOKIE
的数据都会经过 urldecode
的二手数据,这个导致和JAVA那边获取的cookie值不一样了就。