Merge pull request #201 from yuyichao/post

Support HTTP POST request in the htmlparser and regex source
This commit is contained in:
依云 2021-09-07 13:13:19 +08:00 committed by GitHub
commit 750999f397
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 82 additions and 5 deletions

View file

@ -269,6 +269,12 @@ regex
When multiple version strings are found, the maximum of those is chosen.
post_data
(*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option.
post_data_type
(*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``.
This source supports :ref:`list options`.
Search in an HTTP header
@ -312,6 +318,12 @@ url
xpath
An xpath expression used to find the version string.
post_data
(*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option.
post_data_type
(*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``.
.. note::
An additional dependency "lxml" is required.

View file

@ -35,6 +35,7 @@ class AiohttpSession(BaseSession):
follow_redirects: bool = True,
params = (),
json = None,
body = None,
verify_cert: bool = True,
) -> Response:
kwargs = {
@ -47,7 +48,13 @@ class AiohttpSession(BaseSession):
if proxy is not None:
kwargs['proxy'] = proxy
if json is not None:
if body is not None:
# Make sure all backends have the same default encoding for post data.
if 'Content-Type' not in headers:
headers = {**headers, 'Content-Type': 'application/x-www-form-urlencoded'}
kwargs['headers'] = headers
kwargs['data'] = body.encode()
elif json is not None:
kwargs['json'] = json
try:

View file

@ -61,6 +61,7 @@ class BaseSession:
follow_redirects: bool = True,
params = (),
json = None,
body = None,
) -> Response:
t = tries.get()
p = proxy.get()
@ -82,6 +83,7 @@ class BaseSession:
params = params,
follow_redirects = follow_redirects,
json = json,
body = body,
proxy = p or None,
verify_cert = verify,
)
@ -103,6 +105,7 @@ class BaseSession:
follow_redirects: bool = True,
params = (),
json = None,
body = None,
verify_cert: bool = True,
) -> Response:
''':meta private:'''

View file

@ -27,6 +27,7 @@ class HttpxSession(BaseSession):
follow_redirects: bool = True,
params = (),
json = None,
body = None,
verify_cert: bool = True,
) -> Response:
client = self.clients.get((proxy, verify_cert))
@ -40,8 +41,13 @@ class HttpxSession(BaseSession):
self.clients[(proxy, verify_cert)] = client
try:
if body is not None:
# Make sure all backends have the same default encoding for post data.
if 'Content-Type' not in headers:
headers = {**headers, 'Content-Type': 'application/x-www-form-urlencoded'}
body = body.encode()
r = await client.request(
method, url, json = json,
method, url, json = json, content = body,
headers = headers,
allow_redirects = follow_redirects,
params = params,

View file

@ -52,6 +52,7 @@ class TornadoSession(BaseSession):
follow_redirects: bool = True,
params = (),
json = None,
body = None,
verify_cert: bool = True,
) -> Response:
kwargs: Dict[str, Any] = {
@ -62,7 +63,10 @@ class TornadoSession(BaseSession):
'validate_cert': verify_cert,
}
if json:
if body:
# By default the content type is already 'application/x-www-form-urlencoded'
kwargs['body'] = body
elif json:
kwargs['body'] = _json.dumps(json)
kwargs['prepare_curl_callback'] = try_use_http2

View file

@ -15,7 +15,13 @@ async def get_version_impl(info):
encoding = conf.get('encoding')
parser = html.HTMLParser(encoding=encoding)
res = await session.get(conf['url'])
data = conf.get('post_data')
if data is None:
res = await session.get(conf['url'])
else:
res = await session.post(conf['url'], body = data, headers = {
'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded')
})
doc = html.fromstring(res.body, base_url=conf['url'], parser=parser)
try:

View file

@ -20,7 +20,13 @@ async def get_version_impl(info):
encoding = conf.get('encoding', 'latin1')
res = await session.get(conf['url'])
data = conf.get('post_data')
if data is None:
res = await session.get(conf['url'])
else:
res = await session.post(conf['url'], body = data, headers = {
'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded')
})
body = res.body.decode(encoding)
versions = regex.findall(body)
if not versions and not conf.get('missing_ok', False):

View file

@ -87,3 +87,36 @@ async def test_regex_bad_ssl(get_version, httpbin_secure):
else:
assert False, 'certificate should not be trusted'
async def test_regex_post(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"ABCDEF":\s*"(\w+)"',
"post_data": "ABCDEF=234&CDEFG=xyz"
}) == "234"
async def test_regex_post2(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"CDEFG":\s*"(\w+)"',
"post_data": "ABCDEF=234&CDEFG=xyz"
}) == "xyz"
async def test_regex_post_json(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"ABCDEF":\s*(\w+)',
"post_data": '{"ABCDEF":234,"CDEFG":"xyz"}',
"post_data_type": "application/json"
}) == "234"
async def test_regex_post_json2(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"CDEFG":\s*"(\w+)"',
"post_data": '{"ABCDEF":234,"CDEFG":"xyz"}',
"post_data_type": "application/json"
}) == "xyz"