Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duplicate keys preservation of JSON data #1163

Merged
merged 5 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).

## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)

- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))

## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)
Expand Down
4 changes: 2 additions & 2 deletions httpie/cli/requestitems.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
RequestQueryParamsDict,
)
from .exceptions import ParseError
from ..utils import get_content_type, load_json_preserve_order
from ..utils import get_content_type, load_json_preserve_order_and_dupe_keys


class RequestItems:
Expand Down Expand Up @@ -150,6 +150,6 @@ def load_text_file(item: KeyValueArg) -> str:

def load_json(arg: KeyValueArg, contents: str) -> JSONType:
try:
return load_json_preserve_order(contents)
return load_json_preserve_order_and_dupe_keys(contents)
except ValueError as e:
raise ParseError(f'{arg.orig!r}: {e}')
5 changes: 3 additions & 2 deletions httpie/output/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from typing import Tuple

from ..utils import load_json_preserve_order_and_dupe_keys
from .lexers.json import PREFIX_REGEX


Expand All @@ -11,14 +12,14 @@ def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
"""
# First, the full data.
try:
return '', json.loads(data)
return '', load_json_preserve_order_and_dupe_keys(data)
except ValueError:
pass

# Then, try to find the start of the actual body.
data_prefix, body = parse_prefixed_json(data)
try:
return data_prefix, json.loads(body)
return data_prefix, load_json_preserve_order_and_dupe_keys(body)
except ValueError:
raise ValueError('Invalid JSON')

Expand Down
61 changes: 57 additions & 4 deletions httpie/utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,72 @@
import json
import mimetypes
import re
import time
from collections import OrderedDict
from http.cookiejar import parse_ns_headers
from pprint import pformat
from typing import List, Optional, Tuple
import re
from reprlib import recursive_repr
from typing import Any, List, Optional, Tuple

import requests.auth

RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)')
Item = Tuple[str, Any]
Items = List[Item]


class JsonDictPreservingDuplicateKeys(OrderedDict):
"""A specialized JSON dict preserving duplicate keys.

Note: Python 3.6 & 3.7 do not support duplicate keys sorting.
See https://bugs.python.org/issue23493#msg400929.

"""

def load_json_preserve_order(s):
return json.loads(s, object_pairs_hook=OrderedDict)
def __init__(self, items: Items):
self._items = items

# HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict.
#
# Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations.
#
# (1) The pure-python implementation will do a simple `if not dict: return '{}'`,
# and we could fake that check by implementing the `__bool__()` method.
# Source:
# - https://github.com/python/cpython/blob/9d318ad/Lib/json/encoder.py#L334-L336
#
# (2) On the other hand, the C implementation will do a check on the number of
# items contained inside the dict, using a verification on `dict->ma_used`, which
# is updated only when an item is added/removed from the dict. For that case,
# there is no workaround but to add an item into the dict.
# Sources:
# - https://github.com/python/cpython/blob/9d318ad/Modules/_json.c#L1581-L1582
# - https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L53
# - https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L17-L18
#
# To please both implementations, we simply add one item to the dict.
if items:
self['__hack__'] = '__hack__'

def items(self) -> Items:
"""Return all items, duplicate ones included.

"""
return self._items

def __eq__(self, other: object) -> bool:
"""Most simple way to check for equality without reimplementing a complex logic.

"""
return repr(self) == repr(other)

@recursive_repr()
def __repr__(self) -> str:
return '{' + ', '.join(f'{k!r}: {v!r}' for (k, v) in self.items()) + '}'


def load_json_preserve_order_and_dupe_keys(s):
return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys)


def repr_dict(d: dict) -> str:
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def patharg(path):
FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
JSON_WITH_DUPE_KEYS_FILE_PATH = FIXTURES_ROOT / 'test_with_dupe_keys.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/test_with_dupe_keys.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"key":15,"key":15,"key":3,"key":7}
23 changes: 12 additions & 11 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
from requests.exceptions import InvalidSchema

import httpie.cli.argparser
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from httpie.status import ExitStatus
from httpie.cli import constants
from httpie.cli.definition import parser
from httpie.cli.argtypes import KeyValueArg, KeyValueArgType
from httpie.cli.requestitems import RequestItems

from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http


Expand Down Expand Up @@ -100,14 +101,14 @@ def test_valid_items(self):
assert raw_json_embed == json.loads(JSON_FILE_CONTENT)
items.data['string-embed'] = items.data['string-embed'].strip()
assert dict(items.data) == {
"ed": "",
"string": "value",
"bool": True,
"list": ["a", 1, {}, False],
"obj": {
"a": "b"
'ed': '',
'string': 'value',
'bool': True,
'list': ['a', 1, {}, False],
'obj': {
'a': 'b'
},
"string-embed": FILE_CONTENT,
'string-embed': FILE_CONTENT,
}

# Parsed query string parameters
Expand Down
52 changes: 52 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import json

import pytest
Expand All @@ -7,12 +8,27 @@
from httpie.compat import is_windows
from httpie.output.formatters.colors import ColorFormatter

from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE

TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'

JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
JSON_WITH_DUPES_FORMATTED_SORTED = '''{
"key": 3,
"key": 7,
"key": 15,
"key": 15
}'''
JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
"key": 15,
"key": 15,
"key": 3,
"key": 7
}'''


@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
Expand All @@ -38,3 +54,39 @@ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_da
# meaning it was correctly handled as a whole.
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
assert expected_body in r


@responses.activate
def test_duplicate_keys_support_from_response():
"""JSON with duplicate keys should be handled correctly."""
responses.add(responses.GET, URL_EXAMPLE, body=JSON_WITH_DUPES_RAW,
content_type='application/json')

# JSON keys are sorted by default, but it will work only on Python 3.8+.
# See `utils.JsonDictPreservingDuplicateKeys` class docstring for details.
if sys.version_info >= (3, 8):
r = http('--pretty', 'format', URL_EXAMPLE)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r

# Ensure --unsorted also does a good job.
r = http('--unsorted', '--pretty', 'format', URL_EXAMPLE)
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r


def test_duplicate_keys_support_from_input_file(httpbin):
"""JSON file with duplicate keys should be handled correctly."""
# JSON keys are sorted by default, but it will work only on Python 3.8+.
# See `utils.JsonDictPreservingDuplicateKeys` class docstring for details.
if sys.version_info >= (3, 8):
r = http('--verbose', httpbin.url + '/post',
f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}')
# FIXME: count should be 2 (1 for the request, 1 for the response)
# but httpbin does not support duplicate keys.
assert r.count(JSON_WITH_DUPES_FORMATTED_SORTED) == 1

# Ensure --unsorted also does a good job.
r = http('--verbose', '--unsorted', httpbin.url + '/post',
f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}')
# FIXME: count should be 2 (1 for the request, 1 for the response)
# but httpbin does not support duplicate keys.
assert r.count(JSON_WITH_DUPES_FORMATTED_UNSORTED) == 1