Skip to content

Commit

Permalink
garbled CJK Unified Ideographs Extension text test and fix (#987)
Browse files Browse the repository at this point in the history
* add CJK Unified Ideographs Extension B, C, D test

* Fixed garbled text

* fix unused import
  • Loading branch information
nao-ton authored Nov 15, 2023
1 parent 4f3ad5b commit 5e46ab0
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 4 deletions.
17 changes: 13 additions & 4 deletions openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ byte[] convertToBytes(String text, TextRenderingOptions options) {
private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingException {
int len = text.length();
int[] metrics = null;
char[] glyph = new char[len];
int[] glyph = new int[len];
int i = 0;
for (int k = 0; k < len; ++k) {
int val;
Expand All @@ -257,10 +257,19 @@ private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingE
Integer gl = m0;
if (!longTag.containsKey(gl))
longTag.put(gl, new int[]{m0, metrics[1], val});
glyph[i++] = (char)m0;
glyph[i++] = m0;
}
String s = new String(glyph, 0, i);
return s.getBytes(CJKFont.CJK_ENCODING);
return getCJKEncodingBytes(glyph, i);
}

private byte[] getCJKEncodingBytes(int[] glyph, int size) {
byte[] result = new byte[size * 2];
for (int i = 0; i < size; i++) {
int g = glyph[i];
result[i * 2] = (byte)(g >> 8);
result[i * 2 + 1] = (byte)(g & 0xFF);
}
return result;
}

byte[] convertToBytes(GlyphVector glyphVector) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.lowagie.text.pdf;

import java.io.FileOutputStream;
import java.io.IOException;

import org.junit.jupiter.api.Test;

import com.lowagie.text.Chunk;
import com.lowagie.text.Document;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;

class PdfDocumentCJKExtensionTest {
@Test
void generateDocumentsWithCJKExtension() throws IOException {
String fontName = "TakaoMjMincho";

// TakaoMjMincho Version 003.01.01
// Please download and place it below.
// https://launchpad.net/takao-fonts
// https://launchpad.net/takao-fonts/trunk/15.03/+download/TakaoMjFonts_00301.01.zip
String fontPath = "src/test/resources/fonts/TakaoMjFonts/TakaoMjMincho.ttf";

// register font
FontFactory.register(fontPath, fontName);

Document document = new Document();

// FOP off
document.setGlyphSubstitutionEnabled(false);

PdfWriter.getInstance(document, new FileOutputStream("target/" + PdfDocumentCJKExtensionTest.class.getSimpleName() + ".pdf"));

try {
Font font = FontFactory.getFont(fontName, "Identity-H", false, 10, 0, null);

document.open();
// CJK Unified Ideographs Extension B
// https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_Extension_B

// U+20000
String cjkB_OK = "𠀀";
// U+2A000
String cjkB_NG = "𪀀";


// CJK Unified Ideographs Extension C
// https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_Extension_C

// U+2A746
String cjkC_NG = "𪝆";


// CJK Unified Ideographs Extension D
// https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_Extension_D

// U+2B746
String cjkD_NG = "𫝆";

document.add(new Chunk(cjkB_OK + " " + cjkB_NG + " " + cjkC_NG + " " + cjkD_NG, font));

} finally {
document.close();
}
}
}

0 comments on commit 5e46ab0

Please sign in to comment.