From c65a5343edc221bfebf620055a7430a662622e2f Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sun, 5 Sep 2021 16:23:26 -0400 Subject: [PATCH 1/2] httpclient: Support arbitrary request body The body will be supplied as a string. This is the only method I can find that is supported by all the backends as well as the config file. Make sure all the backends have the same default content types. --- nvchecker/httpclient/aiohttp_httpclient.py | 9 ++++++++- nvchecker/httpclient/base.py | 3 +++ nvchecker/httpclient/httpx_httpclient.py | 8 +++++++- nvchecker/httpclient/tornado_httpclient.py | 6 +++++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/nvchecker/httpclient/aiohttp_httpclient.py b/nvchecker/httpclient/aiohttp_httpclient.py index a122571..6af4b0f 100644 --- a/nvchecker/httpclient/aiohttp_httpclient.py +++ b/nvchecker/httpclient/aiohttp_httpclient.py @@ -35,6 +35,7 @@ class AiohttpSession(BaseSession): follow_redirects: bool = True, params = (), json = None, + body = None, verify_cert: bool = True, ) -> Response: kwargs = { @@ -47,7 +48,13 @@ class AiohttpSession(BaseSession): if proxy is not None: kwargs['proxy'] = proxy - if json is not None: + if body is not None: + # Make sure all backends have the same default encoding for post data. + if 'Content-Type' not in headers: + headers = {**headers, 'Content-Type': 'application/x-www-form-urlencoded'} + kwargs['headers'] = headers + kwargs['data'] = body.encode() + elif json is not None: kwargs['json'] = json try: diff --git a/nvchecker/httpclient/base.py b/nvchecker/httpclient/base.py index b9e2101..ae861f9 100644 --- a/nvchecker/httpclient/base.py +++ b/nvchecker/httpclient/base.py @@ -61,6 +61,7 @@ class BaseSession: follow_redirects: bool = True, params = (), json = None, + body = None, ) -> Response: t = tries.get() p = proxy.get() @@ -82,6 +83,7 @@ class BaseSession: params = params, follow_redirects = follow_redirects, json = json, + body = body, proxy = p or None, verify_cert = verify, ) @@ -103,6 +105,7 @@ class BaseSession: follow_redirects: bool = True, params = (), json = None, + body = None, verify_cert: bool = True, ) -> Response: ''':meta private:''' diff --git a/nvchecker/httpclient/httpx_httpclient.py b/nvchecker/httpclient/httpx_httpclient.py index 7cf2345..3a17fb3 100644 --- a/nvchecker/httpclient/httpx_httpclient.py +++ b/nvchecker/httpclient/httpx_httpclient.py @@ -27,6 +27,7 @@ class HttpxSession(BaseSession): follow_redirects: bool = True, params = (), json = None, + body = None, verify_cert: bool = True, ) -> Response: client = self.clients.get((proxy, verify_cert)) @@ -40,8 +41,13 @@ class HttpxSession(BaseSession): self.clients[(proxy, verify_cert)] = client try: + if body is not None: + # Make sure all backends have the same default encoding for post data. + if 'Content-Type' not in headers: + headers = {**headers, 'Content-Type': 'application/x-www-form-urlencoded'} + body = body.encode() r = await client.request( - method, url, json = json, + method, url, json = json, content = body, headers = headers, allow_redirects = follow_redirects, params = params, diff --git a/nvchecker/httpclient/tornado_httpclient.py b/nvchecker/httpclient/tornado_httpclient.py index 6d5240c..ce813ff 100644 --- a/nvchecker/httpclient/tornado_httpclient.py +++ b/nvchecker/httpclient/tornado_httpclient.py @@ -52,6 +52,7 @@ class TornadoSession(BaseSession): follow_redirects: bool = True, params = (), json = None, + body = None, verify_cert: bool = True, ) -> Response: kwargs: Dict[str, Any] = { @@ -62,7 +63,10 @@ class TornadoSession(BaseSession): 'validate_cert': verify_cert, } - if json: + if body: + # By default the content type is already 'application/x-www-form-urlencoded' + kwargs['body'] = body + elif json: kwargs['body'] = _json.dumps(json) kwargs['prepare_curl_callback'] = try_use_http2 From c43d4e900f4e62ae39464169621c465a45101dd5 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sun, 5 Sep 2021 16:26:30 -0400 Subject: [PATCH 2/2] Support HTTP POST request in the htmlparser and regex source Example: source = "regex" regex = "spcm_linux_libs_v[0-9a-zA-Z]*" url = "https://spectrum-instrumentation.com/spcm_downloads_downloads_ajax" post_data = "series%5B%5D=273&families%5B%5D=475" --- docs/usage.rst | 12 ++++++++++++ nvchecker_source/htmlparser.py | 8 +++++++- nvchecker_source/regex.py | 8 +++++++- tests/test_regex.py | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 0da4d18..72fe770 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -269,6 +269,12 @@ regex When multiple version strings are found, the maximum of those is chosen. +post_data + (*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option. + +post_data_type + (*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``. + This source supports :ref:`list options`. Search in an HTTP header @@ -312,6 +318,12 @@ url xpath An xpath expression used to find the version string. +post_data + (*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option. + +post_data_type + (*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``. + .. note:: An additional dependency "lxml" is required. diff --git a/nvchecker_source/htmlparser.py b/nvchecker_source/htmlparser.py index a64f213..077f1e0 100644 --- a/nvchecker_source/htmlparser.py +++ b/nvchecker_source/htmlparser.py @@ -15,7 +15,13 @@ async def get_version_impl(info): encoding = conf.get('encoding') parser = html.HTMLParser(encoding=encoding) - res = await session.get(conf['url']) + data = conf.get('post_data') + if data is None: + res = await session.get(conf['url']) + else: + res = await session.post(conf['url'], body = data, headers = { + 'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded') + }) doc = html.fromstring(res.body, base_url=conf['url'], parser=parser) try: diff --git a/nvchecker_source/regex.py b/nvchecker_source/regex.py index 1660b4f..48406e1 100644 --- a/nvchecker_source/regex.py +++ b/nvchecker_source/regex.py @@ -20,7 +20,13 @@ async def get_version_impl(info): encoding = conf.get('encoding', 'latin1') - res = await session.get(conf['url']) + data = conf.get('post_data') + if data is None: + res = await session.get(conf['url']) + else: + res = await session.post(conf['url'], body = data, headers = { + 'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded') + }) body = res.body.decode(encoding) versions = regex.findall(body) if not versions and not conf.get('missing_ok', False): diff --git a/tests/test_regex.py b/tests/test_regex.py index e54bca1..1b4c255 100644 --- a/tests/test_regex.py +++ b/tests/test_regex.py @@ -87,3 +87,36 @@ async def test_regex_bad_ssl(get_version, httpbin_secure): else: assert False, 'certificate should not be trusted' +async def test_regex_post(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"ABCDEF":\s*"(\w+)"', + "post_data": "ABCDEF=234&CDEFG=xyz" + }) == "234" + +async def test_regex_post2(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"CDEFG":\s*"(\w+)"', + "post_data": "ABCDEF=234&CDEFG=xyz" + }) == "xyz" + +async def test_regex_post_json(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"ABCDEF":\s*(\w+)', + "post_data": '{"ABCDEF":234,"CDEFG":"xyz"}', + "post_data_type": "application/json" + }) == "234" + +async def test_regex_post_json2(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"CDEFG":\s*"(\w+)"', + "post_data": '{"ABCDEF":234,"CDEFG":"xyz"}', + "post_data_type": "application/json" + }) == "xyz"