Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duplicate keys preservation of JSON data #1163

Merged
merged 5 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).

## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)

- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))

## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)
Expand Down
4 changes: 2 additions & 2 deletions httpie/cli/requestitems.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
RequestQueryParamsDict,
)
from .exceptions import ParseError
from ..utils import get_content_type, load_json_preserve_order
from ..utils import get_content_type, load_json_preserve_order_and_dupe_keys


class RequestItems:
Expand Down Expand Up @@ -150,6 +150,6 @@ def load_text_file(item: KeyValueArg) -> str:

def load_json(arg: KeyValueArg, contents: str) -> JSONType:
try:
return load_json_preserve_order(contents)
return load_json_preserve_order_and_dupe_keys(contents)
except ValueError as e:
raise ParseError(f'{arg.orig!r}: {e}')
5 changes: 3 additions & 2 deletions httpie/output/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from typing import Tuple

from ..utils import load_json_preserve_order_and_dupe_keys
from .lexers.json import PREFIX_REGEX


Expand All @@ -11,14 +12,14 @@ def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
"""
# First, the full data.
try:
return '', json.loads(data)
return '', load_json_preserve_order_and_dupe_keys(data)
except ValueError:
pass

# Then, try to find the start of the actual body.
data_prefix, body = parse_prefixed_json(data)
try:
return data_prefix, json.loads(body)
return data_prefix, load_json_preserve_order_and_dupe_keys(body)
except ValueError:
raise ValueError('Invalid JSON')

Expand Down
58 changes: 54 additions & 4 deletions httpie/utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,69 @@
import json
import mimetypes
import re
import sys
import time
from collections import OrderedDict
from http.cookiejar import parse_ns_headers
from pprint import pformat
from typing import List, Optional, Tuple
import re
from typing import Any, List, Optional, Tuple

import requests.auth

RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)')
Item = Tuple[str, Any]
Items = List[Item]


class JsonDictPreservingDuplicateKeys(OrderedDict):
"""A specialized JSON dict preserving duplicate keys.

"""

# Python versions prior to 3.8 suffer from an issue with multiple keys with the same name.
# `json.dumps(obj, indent=N, sort_keys=True)` will output sorted keys when they are unique, and
# duplicate keys will be outputted as they were defined in the original data.
# See <https://bugs.python.org/issue23493#msg400929> for the behavior change between Python versions.
SUPPORTS_SORTING = sys.version_info >= (3, 8)

def __init__(self, items: Items):
self._items = items
self._ensure_items_used()

def _ensure_items_used(self) -> None:
"""HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict.

Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations.

(1) The pure-python implementation will do a simple `if not dict: return '{}'`,
and we could fake that check by implementing the `__bool__()` method.
Source:
- <https://github.com/python/cpython/blob/9d318ad/Lib/json/encoder.py#L334-L336>

(2) On the other hand, the C implementation will do a check on the number of
items contained inside the dict, using a verification on `dict->ma_used`, which
is updated only when an item is added/removed from the dict. For that case,
there is no workaround but to add an item into the dict.
Sources:
- <https://github.com/python/cpython/blob/9d318ad/Modules/_json.c#L1581-L1582>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L53>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L17-L18>

To please both implementations, we simply add one item to the dict.

"""
if self._items:
self['__hack__'] = '__hack__'

def items(self) -> Items:
"""Return all items, duplicate ones included.

"""
return self._items


def load_json_preserve_order(s):
return json.loads(s, object_pairs_hook=OrderedDict)
def load_json_preserve_order_and_dupe_keys(s):
return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys)


def repr_dict(d: dict) -> str:
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def patharg(path):
FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
JSON_WITH_DUPE_KEYS_FILE_PATH = FIXTURES_ROOT / 'test_with_dupe_keys.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/test_with_dupe_keys.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"key":15,"key":15,"key":3,"key":7}
29 changes: 14 additions & 15 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
"""CLI argument parsing related tests."""
import argparse
import json

import pytest
from requests.exceptions import InvalidSchema

import httpie.cli.argparser
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from httpie.status import ExitStatus
from httpie.cli import constants
from httpie.cli.definition import parser
from httpie.cli.argtypes import KeyValueArg, KeyValueArgType
from httpie.cli.requestitems import RequestItems
from httpie.status import ExitStatus
from httpie.utils import load_json_preserve_order_and_dupe_keys

from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http


Expand Down Expand Up @@ -97,17 +98,15 @@ def test_valid_items(self):

# Parsed data
raw_json_embed = items.data.pop('raw-json-embed')
assert raw_json_embed == json.loads(JSON_FILE_CONTENT)
assert raw_json_embed == load_json_preserve_order_and_dupe_keys(JSON_FILE_CONTENT)
items.data['string-embed'] = items.data['string-embed'].strip()
assert dict(items.data) == {
"ed": "",
"string": "value",
"bool": True,
"list": ["a", 1, {}, False],
"obj": {
"a": "b"
},
"string-embed": FILE_CONTENT,
'ed': '',
'string': 'value',
'bool': True,
'list': ['a', 1, {}, False],
'obj': load_json_preserve_order_and_dupe_keys('{"a": "b"}'),
'string-embed': FILE_CONTENT,
}

# Parsed query string parameters
Expand Down
48 changes: 48 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,29 @@
from httpie.cli.constants import PRETTY_MAP
from httpie.compat import is_windows
from httpie.output.formatters.colors import ColorFormatter
from httpie.utils import JsonDictPreservingDuplicateKeys

from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE

TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'

JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
JSON_WITH_DUPES_FORMATTED_SORTED = '''{
"key": 3,
"key": 7,
"key": 15,
"key": 15
}'''
JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
"key": 15,
"key": 15,
"key": 3,
"key": 7
}'''


@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
Expand All @@ -38,3 +54,35 @@ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_da
# meaning it was correctly handled as a whole.
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
assert expected_body in r


@responses.activate
def test_duplicate_keys_support_from_response():
"""JSON with duplicate keys should be handled correctly."""
responses.add(responses.GET, URL_EXAMPLE, body=JSON_WITH_DUPES_RAW,
content_type='application/json')
args = ('--pretty', 'format', URL_EXAMPLE)

# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r

# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r


def test_duplicate_keys_support_from_input_file():
"""JSON file with duplicate keys should be handled correctly."""
args = ('--verbose', '--offline', URL_EXAMPLE,
f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}')

# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r

# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r