Skip to content

Commit

Permalink
Merge pull request #210 from epasveer/58-memoryvisualizer-add-utf-816…
Browse files Browse the repository at this point in the history
…32-encodings-to-the-text-box

Add UTF8,16,32 support to the Memory Visualizer.
  • Loading branch information
epasveer authored Dec 27, 2023
2 parents c55fe5e + a8256cc commit bb5c0ef
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 45 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
The text in the various viewing dialogs is still the full length.
The 100 limit probably needs to be configurable.
* Added register profiles to show only interesting/relevant registers.
* Added UTF-8,16,32 support in the Memory Visualizer.

## [2.3] - 2023-11-19
* In the margins of the source windows, allow CTRL+DoubleClick to do a quick RunToLine or RunToAddress.
Expand Down
143 changes: 100 additions & 43 deletions src/SeerHexWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <QtGui/QClipboard>
#include <QtGui/QTextCursor>
#include <QtGui/QTextBlock>
#include <QtCore/QStringConverter>
#include <QtCore/QSize>
#include <QtCore/QDebug>
#include <stdexcept>
Expand Down Expand Up @@ -217,6 +218,12 @@ QString SeerHexWidget::charModeString () const {

if (charMode() == SeerHexWidget::AsciiCharMode) {
return "ascii";
}else if (charMode() == SeerHexWidget::Utf8Mode) {
return "utf8";
}else if (charMode() == SeerHexWidget::Utf16Mode) {
return "utf16";
}else if (charMode() == SeerHexWidget::Utf32Mode) {
return "utf32";
}else if (charMode() == SeerHexWidget::EbcdicCharMode) {
return "ebcdic";
}
Expand Down Expand Up @@ -257,7 +264,7 @@ void SeerHexWidget::handleCursorPositionChanged () {
return;
}

// Is is after the hex values? (ascii/ebcdic region)
// Is is after the hex values? (ascii/utf/ebcdic region)
if (cursor.positionInBlock() > SeerHexWidget::HexFieldWidth + hexCharsPerLine()) {
emit byteOffsetChanged(-1);
return;
Expand Down Expand Up @@ -337,16 +344,19 @@ void SeerHexWidget::handleByteOffsetChanged (int byte) {
extraSelections.append(extra_byte);

// Highlight the current ascii value.
cursor.movePosition(QTextCursor::StartOfLine, QTextCursor::MoveAnchor);
cursor.movePosition(QTextCursor::Right, QTextCursor::MoveAnchor, pos_a);
cursor.movePosition(QTextCursor::Right, QTextCursor::KeepAnchor, 1);

// Add it to the extra selections.
QTextEdit::ExtraSelection extra_ascii;
extra_ascii.format.setBackground(plainTextEdit->palette().highlight().color());
extra_ascii.cursor = cursor;

extraSelections.append(extra_ascii);
// For Ascii and Ebcdic only. No UTF as it can be variable length encoding.
if (charMode() == SeerHexWidget::AsciiCharMode || charMode() == SeerHexWidget::EbcdicCharMode) {
cursor.movePosition(QTextCursor::StartOfLine, QTextCursor::MoveAnchor);
cursor.movePosition(QTextCursor::Right, QTextCursor::MoveAnchor, pos_a);
cursor.movePosition(QTextCursor::Right, QTextCursor::KeepAnchor, 1);

// Add it to the extra selections.
QTextEdit::ExtraSelection extra_ascii;
extra_ascii.format.setBackground(plainTextEdit->palette().highlight().color());
extra_ascii.cursor = cursor;

extraSelections.append(extra_ascii);
}

} plainTextEdit->setExtraSelections(extraSelections);

Expand Down Expand Up @@ -521,6 +531,10 @@ void SeerHexWidget::handleByteOffsetChanged (int byte) {
val += QString(symbol);
}

}else if (charMode() == SeerHexWidget::Utf8Mode || charMode() == SeerHexWidget::Utf16Mode || charMode() == SeerHexWidget::Utf32Mode) {

val = "Can't show for UTF";

}else if (charMode() == SeerHexWidget::EbcdicCharMode) {
for (int i=0; i<arr.size(); i++) {

Expand All @@ -531,7 +545,7 @@ void SeerHexWidget::handleByteOffsetChanged (int byte) {
}

}else{
// Don't print anything.
val = "";
}

lineEdit_14->setText(val);
Expand All @@ -556,6 +570,71 @@ void SeerHexWidget::create () {
return;
}

// Convert the data to a 'text' string.
QString textString = "";
QByteArray data = _pdata->getData();

if (charMode() == SeerHexWidget::AsciiCharMode) {

for (int b=0; b<data.size(); b++) {

unsigned char ch = Seer::ucharToAscii(data[b]);

textString.append(QChar(ch));
}

}else if (charMode() == SeerHexWidget::Utf8Mode) {

auto toUtf16 = QStringDecoder(QStringDecoder::Utf8, QStringConverter::Flag::ConvertInvalidToNull);

textString = toUtf16.decode(data);

// Filter out ascii control characters.
for (int b=0; b<textString.length(); b++) {
if (textString[b] == QChar(127) || textString[b] < QChar(32)) {
textString[b] = QChar::ReplacementCharacter;
}
}

}else if (charMode() == SeerHexWidget::Utf16Mode) {

auto toUtf16 = QStringDecoder(QStringDecoder::Utf16, QStringConverter::Flag::ConvertInvalidToNull);

textString = toUtf16.decode(data);

// Filter out ascii control characters.
for (int b=0; b<textString.length(); b++) {
if (textString[b] == QChar(127) || textString[b] < QChar(32)) {
textString[b] = QChar::ReplacementCharacter;
}
}

}else if (charMode() == SeerHexWidget::Utf32Mode) {

auto toUtf16 = QStringDecoder(QStringDecoder::Utf32, QStringConverter::Flag::ConvertInvalidToNull);

textString = toUtf16.decode(data);

// Filter out ascii control characters.
for (int b=0; b<textString.length(); b++) {
if (textString[b] == QChar(127) || textString[b] < QChar(32)) {
textString[b] = QChar::ReplacementCharacter;
}
}

}else if (charMode() == SeerHexWidget::EbcdicCharMode) {

for (int b=0; b<data.size(); b++) {

unsigned char ch = Seer::ebcdicToAscii(data[b]);

textString.append(QChar(ch));
}

}else{
// No 'text' string.
}

// Set text formats.
QTextCharFormat defaultFormat = plainTextEdit->currentCharFormat();
QTextCharFormat grayFormat = defaultFormat;
Expand All @@ -569,9 +648,7 @@ void SeerHexWidget::create () {
cursor.movePosition(QTextCursor::Start);

// Go through the data, one byte at a time.
for (int i=0; i<_pdata->size(); i+=bytesPerLine()) {

QByteArray data = _pdata->getData(i, bytesPerLine());
for (int i=0; i<data.size(); i+=bytesPerLine()) {

// Place a new hex address on the left side.
if (i % bytesPerLine() == 0) {
Expand All @@ -588,9 +665,9 @@ void SeerHexWidget::create () {
// Print N bytes in their datatype value.
int b = 0;

for (b=0; b<bytesPerLine() && i+b < _pdata->size(); b++) {
for (b=0; b<bytesPerLine() && i+b < data.size(); b++) {

unsigned char ch = data[b];
unsigned char ch = data[i+b];

QString val;

Expand Down Expand Up @@ -640,33 +717,13 @@ void SeerHexWidget::create () {
// Write spacer to document.
cursor.insertText (QString("| "), defaultFormat);

// Print N bytes in their char value.
if (charMode() == SeerHexWidget::AsciiCharMode) {
for (int b=0; b<bytesPerLine() && i+b < _pdata->size(); b++) {

unsigned char ch = Seer::ucharToAscii( data[b] );

QChar symbol = QChar(ch);
QString val(symbol);

// Write display character to document.
cursor.insertText (val, defaultFormat);
}

}else if (charMode() == SeerHexWidget::EbcdicCharMode) {
for (int b=0; b<bytesPerLine() && i+b < _pdata->size(); b++) {

unsigned char ch = Seer::ebcdicToAscii( data[b] );
// Print N bytes of the 'text' string.
for (int b=0; b<bytesPerLine() && i+b < textString.length(); b++) {

QChar symbol = QChar(ch);
QString val(symbol);
QString val = textString[i+b];

// Write display character to document.
cursor.insertText (val, defaultFormat);
}

}else{
// Don't print anything.
// Write display character to document.
cursor.insertText (val, defaultFormat);
}

// Write eol to document.
Expand All @@ -678,7 +735,7 @@ void SeerHexWidget::create () {

// Print checksum.
{
quint16 crc16 = qChecksum(_pdata->getData(), Qt::ChecksumIso3309);
quint16 crc16 = qChecksum(data, Qt::ChecksumIso3309);
QString crc16str = QString::number(crc16);

lineEdit_15->setText(crc16str);
Expand Down
5 changes: 4 additions & 1 deletion src/SeerHexWidget.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ class SeerHexWidget: public QWidget, protected Ui::SeerHexWidgetForm {
enum CharMode {
UnknownCharMode = 0,
AsciiCharMode = 1,
EbcdicCharMode = 2
Utf8Mode = 2,
Utf16Mode = 3,
Utf32Mode = 4,
EbcdicCharMode = 5
};

enum MagicNumbers {
Expand Down
17 changes: 16 additions & 1 deletion src/SeerMemoryVisualizerWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,14 @@ SeerMemoryVisualizerWidget::SeerMemoryVisualizerWidget (QWidget* parent) : QWidg

if (memoryHexEditor->charMode() == SeerHexWidget::AsciiCharMode) {
charDisplayFormatComboBox->setCurrentIndex(0);
}else if (memoryHexEditor->charMode() == SeerHexWidget::EbcdicCharMode) {
}else if (memoryHexEditor->charMode() == SeerHexWidget::Utf8Mode) {
charDisplayFormatComboBox->setCurrentIndex(1);
}else if (memoryHexEditor->charMode() == SeerHexWidget::Utf16Mode) {
charDisplayFormatComboBox->setCurrentIndex(2);
}else if (memoryHexEditor->charMode() == SeerHexWidget::Utf32Mode) {
charDisplayFormatComboBox->setCurrentIndex(3);
}else if (memoryHexEditor->charMode() == SeerHexWidget::EbcdicCharMode) {
charDisplayFormatComboBox->setCurrentIndex(4);
}else{
charDisplayFormatComboBox->setCurrentIndex(0);
}
Expand Down Expand Up @@ -328,6 +334,15 @@ void SeerMemoryVisualizerWidget::handleCharDisplayFormatComboBox (int index) {
memoryHexEditor->setCharMode(SeerHexWidget::AsciiCharMode);

}else if (index == 1) {
memoryHexEditor->setCharMode(SeerHexWidget::Utf8Mode);

}else if (index == 2) {
memoryHexEditor->setCharMode(SeerHexWidget::Utf16Mode);

}else if (index == 3) {
memoryHexEditor->setCharMode(SeerHexWidget::Utf32Mode);

}else if (index == 4) {
memoryHexEditor->setCharMode(SeerHexWidget::EbcdicCharMode);

}else{
Expand Down
15 changes: 15 additions & 0 deletions src/SeerMemoryVisualizerWidget.ui
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,21 @@
<string>ascii</string>
</property>
</item>
<item>
<property name="text">
<string>utf8</string>
</property>
</item>
<item>
<property name="text">
<string>utf16</string>
</property>
</item>
<item>
<property name="text">
<string>utf32</string>
</property>
</item>
<item>
<property name="text">
<string>ebcdic</string>
Expand Down
8 changes: 8 additions & 0 deletions src/resources/help/MemoryVisualizer.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The Memory Visualizer shows the contents of a region of memory in different form
* Binary
* Decimal
* Ascii
* UTF-8, UTF-16, UTF-32
* Ebcdic
* Assembly

Expand Down Expand Up @@ -68,8 +69,15 @@ The memory dump can be displayed in these formats:
The memory dump can be displayed in these character formats:

* Ascii
* UTF-8, UTF-16, UTF-32
* Ebcdic

Note, when displaying UTF, some features in the visualizer are restricted. For example, when clicking on
a byte in the memory dump, the corresponding character in the text view is not highlighted. This is
because UTF can be a variable length of bytes per character - due to UTF encoding.

A possible delay may happen to load the extra fonts needs for the extra characters that UTF provides.

### Column width

This specifies the column width of the memory dump.
Expand Down
1 change: 1 addition & 0 deletions tests/hellounicode/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hellounicode
10 changes: 10 additions & 0 deletions tests/hellounicode/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.PHONY: all
all: hellounicode

hellounicode: hellounicode.cpp
g++ -g -o hellounicode hellounicode.cpp

.PHONY: clean
clean:
rm -f hellounicode hellounicode.o

31 changes: 31 additions & 0 deletions tests/hellounicode/hellounicode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include <iostream>
#include <stdlib.h>

int main (int argc, char* argv[]) {

std::wstring w_redrum = { L"All work and no play makes Jack a dull boy.\n" };

std::string u8_redrum = { u8"All work and no play makes Jack a dull boy.\n" };

std::u16string u16_redrum = { u"All work and no play makes Jack a dull boy.\n" };

std::u32string u32_redrum = { U"All work and no play makes Jack a dull boy.\n" };

std::u16string u16_hungry = { u"nǐ chīle ma.\n" };

// Do something with the strings so they don't get optimized out.
std::cout << w_redrum.length() << std::endl;
std::cout << u8_redrum.length() << std::endl;
std::cout << u16_redrum.length() << std::endl;
std::cout << u32_redrum.length() << std::endl;
std::cout << u16_hungry.length() << std::endl;

const wchar_t* w_ptr = w_redrum.data();
const char* u8_ptr = u8_redrum.data();
const char16_t* u16_ptr = u16_redrum.data();
const char32_t* u32_ptr = u32_redrum.data();
const char16_t* hungry_ptr = u16_hungry.data();

return 0;
}

0 comments on commit bb5c0ef

Please sign in to comment.