Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UTF8,16,32 support to the Memory Visualizer. #210

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
The text in the various viewing dialogs is still the full length.
The 100 limit probably needs to be configurable.
* Added register profiles to show only interesting/relevant registers.
* Added UTF-8,16,32 support in the Memory Visualizer.

## [2.3] - 2023-11-19
* In the margins of the source windows, allow CTRL+DoubleClick to do a quick RunToLine or RunToAddress.
Expand Down
143 changes: 100 additions & 43 deletions src/SeerHexWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <QtGui/QClipboard>
#include <QtGui/QTextCursor>
#include <QtGui/QTextBlock>
#include <QtCore/QStringConverter>
#include <QtCore/QSize>
#include <QtCore/QDebug>
#include <stdexcept>
Expand Down Expand Up @@ -217,6 +218,12 @@ QString SeerHexWidget::charModeString () const {

if (charMode() == SeerHexWidget::AsciiCharMode) {
return "ascii";
}else if (charMode() == SeerHexWidget::Utf8Mode) {
return "utf8";
}else if (charMode() == SeerHexWidget::Utf16Mode) {
return "utf16";
}else if (charMode() == SeerHexWidget::Utf32Mode) {
return "utf32";
}else if (charMode() == SeerHexWidget::EbcdicCharMode) {
return "ebcdic";
}
Expand Down Expand Up @@ -257,7 +264,7 @@ void SeerHexWidget::handleCursorPositionChanged () {
return;
}

// Is is after the hex values? (ascii/ebcdic region)
// Is is after the hex values? (ascii/utf/ebcdic region)
if (cursor.positionInBlock() > SeerHexWidget::HexFieldWidth + hexCharsPerLine()) {
emit byteOffsetChanged(-1);
return;
Expand Down Expand Up @@ -337,16 +344,19 @@ void SeerHexWidget::handleByteOffsetChanged (int byte) {
extraSelections.append(extra_byte);

// Highlight the current ascii value.
cursor.movePosition(QTextCursor::StartOfLine, QTextCursor::MoveAnchor);
cursor.movePosition(QTextCursor::Right, QTextCursor::MoveAnchor, pos_a);
cursor.movePosition(QTextCursor::Right, QTextCursor::KeepAnchor, 1);

// Add it to the extra selections.
QTextEdit::ExtraSelection extra_ascii;
extra_ascii.format.setBackground(plainTextEdit->palette().highlight().color());
extra_ascii.cursor = cursor;

extraSelections.append(extra_ascii);
// For Ascii and Ebcdic only. No UTF as it can be variable length encoding.
if (charMode() == SeerHexWidget::AsciiCharMode || charMode() == SeerHexWidget::EbcdicCharMode) {
cursor.movePosition(QTextCursor::StartOfLine, QTextCursor::MoveAnchor);
cursor.movePosition(QTextCursor::Right, QTextCursor::MoveAnchor, pos_a);
cursor.movePosition(QTextCursor::Right, QTextCursor::KeepAnchor, 1);

// Add it to the extra selections.
QTextEdit::ExtraSelection extra_ascii;
extra_ascii.format.setBackground(plainTextEdit->palette().highlight().color());
extra_ascii.cursor = cursor;

extraSelections.append(extra_ascii);
}

} plainTextEdit->setExtraSelections(extraSelections);

Expand Down Expand Up @@ -521,6 +531,10 @@ void SeerHexWidget::handleByteOffsetChanged (int byte) {
val += QString(symbol);
}

}else if (charMode() == SeerHexWidget::Utf8Mode || charMode() == SeerHexWidget::Utf16Mode || charMode() == SeerHexWidget::Utf32Mode) {

val = "Can't show for UTF";

}else if (charMode() == SeerHexWidget::EbcdicCharMode) {
for (int i=0; i<arr.size(); i++) {

Expand All @@ -531,7 +545,7 @@ void SeerHexWidget::handleByteOffsetChanged (int byte) {
}

}else{
// Don't print anything.
val = "";
}

lineEdit_14->setText(val);
Expand All @@ -556,6 +570,71 @@ void SeerHexWidget::create () {
return;
}

// Convert the data to a 'text' string.
QString textString = "";
QByteArray data = _pdata->getData();

if (charMode() == SeerHexWidget::AsciiCharMode) {

for (int b=0; b<data.size(); b++) {

unsigned char ch = Seer::ucharToAscii(data[b]);

textString.append(QChar(ch));
}

}else if (charMode() == SeerHexWidget::Utf8Mode) {

auto toUtf16 = QStringDecoder(QStringDecoder::Utf8, QStringConverter::Flag::ConvertInvalidToNull);

textString = toUtf16.decode(data);

// Filter out ascii control characters.
for (int b=0; b<textString.length(); b++) {
if (textString[b] == QChar(127) || textString[b] < QChar(32)) {
textString[b] = QChar::ReplacementCharacter;
}
}

}else if (charMode() == SeerHexWidget::Utf16Mode) {

auto toUtf16 = QStringDecoder(QStringDecoder::Utf16, QStringConverter::Flag::ConvertInvalidToNull);

textString = toUtf16.decode(data);

// Filter out ascii control characters.
for (int b=0; b<textString.length(); b++) {
if (textString[b] == QChar(127) || textString[b] < QChar(32)) {
textString[b] = QChar::ReplacementCharacter;
}
}

}else if (charMode() == SeerHexWidget::Utf32Mode) {

auto toUtf16 = QStringDecoder(QStringDecoder::Utf32, QStringConverter::Flag::ConvertInvalidToNull);

textString = toUtf16.decode(data);

// Filter out ascii control characters.
for (int b=0; b<textString.length(); b++) {
if (textString[b] == QChar(127) || textString[b] < QChar(32)) {
textString[b] = QChar::ReplacementCharacter;
}
}

}else if (charMode() == SeerHexWidget::EbcdicCharMode) {

for (int b=0; b<data.size(); b++) {

unsigned char ch = Seer::ebcdicToAscii(data[b]);

textString.append(QChar(ch));
}

}else{
// No 'text' string.
}

// Set text formats.
QTextCharFormat defaultFormat = plainTextEdit->currentCharFormat();
QTextCharFormat grayFormat = defaultFormat;
Expand All @@ -569,9 +648,7 @@ void SeerHexWidget::create () {
cursor.movePosition(QTextCursor::Start);

// Go through the data, one byte at a time.
for (int i=0; i<_pdata->size(); i+=bytesPerLine()) {

QByteArray data = _pdata->getData(i, bytesPerLine());
for (int i=0; i<data.size(); i+=bytesPerLine()) {

// Place a new hex address on the left side.
if (i % bytesPerLine() == 0) {
Expand All @@ -588,9 +665,9 @@ void SeerHexWidget::create () {
// Print N bytes in their datatype value.
int b = 0;

for (b=0; b<bytesPerLine() && i+b < _pdata->size(); b++) {
for (b=0; b<bytesPerLine() && i+b < data.size(); b++) {

unsigned char ch = data[b];
unsigned char ch = data[i+b];

QString val;

Expand Down Expand Up @@ -640,33 +717,13 @@ void SeerHexWidget::create () {
// Write spacer to document.
cursor.insertText (QString("| "), defaultFormat);

// Print N bytes in their char value.
if (charMode() == SeerHexWidget::AsciiCharMode) {
for (int b=0; b<bytesPerLine() && i+b < _pdata->size(); b++) {

unsigned char ch = Seer::ucharToAscii( data[b] );

QChar symbol = QChar(ch);
QString val(symbol);

// Write display character to document.
cursor.insertText (val, defaultFormat);
}

}else if (charMode() == SeerHexWidget::EbcdicCharMode) {
for (int b=0; b<bytesPerLine() && i+b < _pdata->size(); b++) {

unsigned char ch = Seer::ebcdicToAscii( data[b] );
// Print N bytes of the 'text' string.
for (int b=0; b<bytesPerLine() && i+b < textString.length(); b++) {

QChar symbol = QChar(ch);
QString val(symbol);
QString val = textString[i+b];

// Write display character to document.
cursor.insertText (val, defaultFormat);
}

}else{
// Don't print anything.
// Write display character to document.
cursor.insertText (val, defaultFormat);
}

// Write eol to document.
Expand All @@ -678,7 +735,7 @@ void SeerHexWidget::create () {

// Print checksum.
{
quint16 crc16 = qChecksum(_pdata->getData(), Qt::ChecksumIso3309);
quint16 crc16 = qChecksum(data, Qt::ChecksumIso3309);
QString crc16str = QString::number(crc16);

lineEdit_15->setText(crc16str);
Expand Down
5 changes: 4 additions & 1 deletion src/SeerHexWidget.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ class SeerHexWidget: public QWidget, protected Ui::SeerHexWidgetForm {
enum CharMode {
UnknownCharMode = 0,
AsciiCharMode = 1,
EbcdicCharMode = 2
Utf8Mode = 2,
Utf16Mode = 3,
Utf32Mode = 4,
EbcdicCharMode = 5
};

enum MagicNumbers {
Expand Down
17 changes: 16 additions & 1 deletion src/SeerMemoryVisualizerWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,14 @@ SeerMemoryVisualizerWidget::SeerMemoryVisualizerWidget (QWidget* parent) : QWidg

if (memoryHexEditor->charMode() == SeerHexWidget::AsciiCharMode) {
charDisplayFormatComboBox->setCurrentIndex(0);
}else if (memoryHexEditor->charMode() == SeerHexWidget::EbcdicCharMode) {
}else if (memoryHexEditor->charMode() == SeerHexWidget::Utf8Mode) {
charDisplayFormatComboBox->setCurrentIndex(1);
}else if (memoryHexEditor->charMode() == SeerHexWidget::Utf16Mode) {
charDisplayFormatComboBox->setCurrentIndex(2);
}else if (memoryHexEditor->charMode() == SeerHexWidget::Utf32Mode) {
charDisplayFormatComboBox->setCurrentIndex(3);
}else if (memoryHexEditor->charMode() == SeerHexWidget::EbcdicCharMode) {
charDisplayFormatComboBox->setCurrentIndex(4);
}else{
charDisplayFormatComboBox->setCurrentIndex(0);
}
Expand Down Expand Up @@ -328,6 +334,15 @@ void SeerMemoryVisualizerWidget::handleCharDisplayFormatComboBox (int index) {
memoryHexEditor->setCharMode(SeerHexWidget::AsciiCharMode);

}else if (index == 1) {
memoryHexEditor->setCharMode(SeerHexWidget::Utf8Mode);

}else if (index == 2) {
memoryHexEditor->setCharMode(SeerHexWidget::Utf16Mode);

}else if (index == 3) {
memoryHexEditor->setCharMode(SeerHexWidget::Utf32Mode);

}else if (index == 4) {
memoryHexEditor->setCharMode(SeerHexWidget::EbcdicCharMode);

}else{
Expand Down
15 changes: 15 additions & 0 deletions src/SeerMemoryVisualizerWidget.ui
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,21 @@
<string>ascii</string>
</property>
</item>
<item>
<property name="text">
<string>utf8</string>
</property>
</item>
<item>
<property name="text">
<string>utf16</string>
</property>
</item>
<item>
<property name="text">
<string>utf32</string>
</property>
</item>
<item>
<property name="text">
<string>ebcdic</string>
Expand Down
8 changes: 8 additions & 0 deletions src/resources/help/MemoryVisualizer.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The Memory Visualizer shows the contents of a region of memory in different form
* Binary
* Decimal
* Ascii
* UTF-8, UTF-16, UTF-32
* Ebcdic
* Assembly

Expand Down Expand Up @@ -68,8 +69,15 @@ The memory dump can be displayed in these formats:
The memory dump can be displayed in these character formats:

* Ascii
* UTF-8, UTF-16, UTF-32
* Ebcdic

Note, when displaying UTF, some features in the visualizer are restricted. For example, when clicking on
a byte in the memory dump, the corresponding character in the text view is not highlighted. This is
because UTF can be a variable length of bytes per character - due to UTF encoding.

A possible delay may happen to load the extra fonts needs for the extra characters that UTF provides.

### Column width

This specifies the column width of the memory dump.
Expand Down
1 change: 1 addition & 0 deletions tests/hellounicode/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hellounicode
10 changes: 10 additions & 0 deletions tests/hellounicode/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.PHONY: all
all: hellounicode

hellounicode: hellounicode.cpp
g++ -g -o hellounicode hellounicode.cpp

.PHONY: clean
clean:
rm -f hellounicode hellounicode.o

31 changes: 31 additions & 0 deletions tests/hellounicode/hellounicode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include <iostream>
#include <stdlib.h>

int main (int argc, char* argv[]) {

std::wstring w_redrum = { L"All work and no play makes Jack a dull boy.\n" };

std::string u8_redrum = { u8"All work and no play makes Jack a dull boy.\n" };

std::u16string u16_redrum = { u"All work and no play makes Jack a dull boy.\n" };

std::u32string u32_redrum = { U"All work and no play makes Jack a dull boy.\n" };

std::u16string u16_hungry = { u"nǐ chīle ma.\n" };

// Do something with the strings so they don't get optimized out.
std::cout << w_redrum.length() << std::endl;
std::cout << u8_redrum.length() << std::endl;
std::cout << u16_redrum.length() << std::endl;
std::cout << u32_redrum.length() << std::endl;
std::cout << u16_hungry.length() << std::endl;

const wchar_t* w_ptr = w_redrum.data();
const char* u8_ptr = u8_redrum.data();
const char16_t* u16_ptr = u16_redrum.data();
const char32_t* u32_ptr = u32_redrum.data();
const char16_t* hungry_ptr = u16_hungry.data();

return 0;
}

Loading