Skip to content

Commit

Permalink
Improve JSON output when there is leading data before the actual JSON…
Browse files Browse the repository at this point in the history
… body (httpie#1130)

In some special cases, to prevent against Cross Site Script Inclusion (XSSI)
attacks, the JSON response body starts with a magic prefix line that must be
stripped before feeding the rest of the response body to the JSON parser.
Such prefix is now simply ignored from the parser but still printed in the
terminal.

* Fix Windows tests
  • Loading branch information
BoboTiG authored Sep 21, 2021
1 parent 2731341 commit e6c5cd3
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 51 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ This project adheres to [Semantic Versioning](https://semver.org/).

## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)

- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))

## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)

Blog post: [What’s new in HTTPie 2.5.0](https://httpie.io/blog/httpie-2.5.0)
Expand Down
56 changes: 8 additions & 48 deletions httpie/output/formatters/colors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from pygments.formatters.terminal import TerminalFormatter
from pygments.formatters.terminal256 import Terminal256Formatter
from pygments.lexer import Lexer
from pygments.lexers.data import JsonLexer
from pygments.lexers.special import TextLexer
from pygments.lexers.text import HttpLexer as PygmentsHttpLexer
from pygments.util import ClassNotFound

from ..lexers.json import EnhancedJsonLexer
from ...compat import is_windows
from ...context import Environment
from ...plugins import FormatterPlugin
Expand Down Expand Up @@ -60,6 +62,7 @@ def __init__(
http_lexer = PygmentsHttpLexer()
formatter = TerminalFormatter()
else:
from ..lexers.http import SimplifiedHTTPLexer
http_lexer = SimplifiedHTTPLexer()
formatter = Terminal256Formatter(
style=self.get_style_class(color_scheme)
Expand Down Expand Up @@ -151,55 +154,12 @@ def get_lexer(
else:
lexer = pygments.lexers.get_lexer_by_name('json')

return lexer


class SimplifiedHTTPLexer(pygments.lexer.RegexLexer):
"""Simplified HTTP lexer for Pygments.
It only operates on headers and provides a stronger contrast between
their names and values than the original one bundled with Pygments
(:class:`pygments.lexers.text import HttpLexer`), especially when
Solarized color scheme is used.
# Use our own JSON lexer: it supports JSON bodies preceded by non-JSON data
# as well as legit JSON bodies.
if isinstance(lexer, JsonLexer):
lexer = EnhancedJsonLexer()

"""
name = 'HTTP'
aliases = ['http']
filenames = ['*.http']
tokens = {
'root': [
# Request-Line
(r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)',
pygments.lexer.bygroups(
pygments.token.Name.Function,
pygments.token.Text,
pygments.token.Name.Namespace,
pygments.token.Text,
pygments.token.Keyword.Reserved,
pygments.token.Operator,
pygments.token.Number
)),
# Response Status-Line
(r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)',
pygments.lexer.bygroups(
pygments.token.Keyword.Reserved, # 'HTTP'
pygments.token.Operator, # '/'
pygments.token.Number, # Version
pygments.token.Text,
pygments.token.Number, # Status code
pygments.token.Text,
pygments.token.Name.Exception, # Reason
)),
# Header
(r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups(
pygments.token.Name.Attribute, # Name
pygments.token.Text,
pygments.token.Operator, # Colon
pygments.token.Text,
pygments.token.String # Value
))
]
}
return lexer


class Solarized256Style(pygments.style.Style):
Expand Down
7 changes: 4 additions & 3 deletions httpie/output/formatters/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ def format_body(self, body: str, mime: str) -> str:
]
if (self.kwargs['explicit_json']
or any(token in mime for token in maybe_json)):
from ..utils import load_prefixed_json
try:
obj = json.loads(body)
data_prefix, json_obj = load_prefixed_json(body)
except ValueError:
pass # Invalid JSON, ignore.
else:
# Indent, sort keys by name, and avoid
# unicode escapes to improve readability.
body = json.dumps(
obj=obj,
body = data_prefix + json.dumps(
obj=json_obj,
sort_keys=self.format_options['json']['sort_keys'],
ensure_ascii=False,
indent=self.format_options['json']['indent']
Expand Down
Empty file.
49 changes: 49 additions & 0 deletions httpie/output/lexers/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pygments


class SimplifiedHTTPLexer(pygments.lexer.RegexLexer):
"""Simplified HTTP lexer for Pygments.
It only operates on headers and provides a stronger contrast between
their names and values than the original one bundled with Pygments
(:class:`pygments.lexers.text import HttpLexer`), especially when
Solarized color scheme is used.
"""
name = 'HTTP'
aliases = ['http']
filenames = ['*.http']
tokens = {
'root': [
# Request-Line
(r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)',
pygments.lexer.bygroups(
pygments.token.Name.Function,
pygments.token.Text,
pygments.token.Name.Namespace,
pygments.token.Text,
pygments.token.Keyword.Reserved,
pygments.token.Operator,
pygments.token.Number
)),
# Response Status-Line
(r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)',
pygments.lexer.bygroups(
pygments.token.Keyword.Reserved, # 'HTTP'
pygments.token.Operator, # '/'
pygments.token.Number, # Version
pygments.token.Text,
pygments.token.Number, # Status code
pygments.token.Text,
pygments.token.Name.Exception, # Reason
)),
# Header
(r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups(
pygments.token.Name.Attribute, # Name
pygments.token.Text,
pygments.token.Operator, # Colon
pygments.token.Text,
pygments.token.String # Value
))
]
}
31 changes: 31 additions & 0 deletions httpie/output/lexers/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import re

from pygments.lexer import bygroups, using, RegexLexer
from pygments.lexers.data import JsonLexer
from pygments.token import Token

PREFIX_TOKEN = Token.Error
PREFIX_REGEX = r'[^{\["]+'


class EnhancedJsonLexer(RegexLexer):
"""
Enhanced JSON lexer for Pygments.
It adds support for eventual data prefixing the actual JSON body.
"""
name = 'JSON'
flags = re.IGNORECASE | re.DOTALL
tokens = {
'root': [
# Eventual non-JSON data prefix followed by actual JSON body.
# FIX: data prefix + number (integer or float) are not correctly handled.
(
fr'({PREFIX_REGEX})' + r'((?:[{\["]|true|false|null).+)',
bygroups(PREFIX_TOKEN, using(JsonLexer))
),
# JSON body.
(r'.+', using(JsonLexer)),
],
}
36 changes: 36 additions & 0 deletions httpie/output/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import json
import re
from typing import Tuple

from .lexers.json import PREFIX_REGEX


def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
"""Simple JSON loading from `data`.
"""
# First, the full data.
try:
return '', json.loads(data)
except ValueError:
pass

# Then, try to find the start of the actual body.
data_prefix, body = parse_prefixed_json(data)
try:
return data_prefix, json.loads(body)
except ValueError:
raise ValueError('Invalid JSON')


def parse_prefixed_json(data: str) -> Tuple[str, str]:
"""Find the potential JSON body from `data`.
Sometimes the JSON body is prefixed with a XSSI magic string, specific to the server.
Return a tuple (data prefix, actual JSON body).
"""
matches = re.findall(PREFIX_REGEX, data)
data_prefix = matches[0] if matches else ''
body = data[len(data_prefix):]
return data_prefix, body
40 changes: 40 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json

import pytest
import responses

from httpie.cli.constants import PRETTY_MAP
from httpie.compat import is_windows
from httpie.output.formatters.colors import ColorFormatter

from .utils import MockEnvironment, http, URL_EXAMPLE

TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'


@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
@responses.activate
def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty):
"""Test JSON bodies preceded by non-JSON data."""
body = data_prefix + json.dumps(json_data)
content_type = 'application/json'
responses.add(responses.GET, URL_EXAMPLE, body=body,
content_type=content_type)

colored_output = pretty in ('all', 'colors')
env = MockEnvironment(colors=256) if colored_output else None
r = http('--pretty=' + pretty, URL_EXAMPLE, env=env)

indent = None if pretty in ('none', 'colors') else 4
expected_body = data_prefix + json.dumps(json_data, indent=indent)
if colored_output:
fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}})
expected_body = fmt.format_body(expected_body, content_type)
# Check to ensure the non-JSON data prefix is colored only one time,
# meaning it was correctly handled as a whole.
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
assert expected_body in r

0 comments on commit e6c5cd3

Please sign in to comment.