diff --git a/fonts/comic shanns 2.ttf b/fonts/comic shanns 2.ttf new file mode 100644 index 000000000..addc86459 Binary files /dev/null and b/fonts/comic shanns 2.ttf differ diff --git a/text_rendering/__init__.py b/text_rendering/__init__.py index 710b97297..a4836cd19 100644 --- a/text_rendering/__init__.py +++ b/text_rendering/__init__.py @@ -1,12 +1,13 @@ -from typing import List +from typing import List, Union from utils import Quadrilateral import numpy as np import cv2 import math from utils import findNextPowerOf2 - +import textwrap from . import text_render +from .text_render_eng import render_textblock_list_eng from textblockdetector.textblock import TextBlock def fg_bg_compare(fg, bg): @@ -154,4 +155,27 @@ def render(img_canvas, font_size, text_mag_ratio, trans_text, region, majority_d canvas_region = rgba_region[:, :, 0: 3] mask_region = rgba_region[:, :, 3: 4].astype(np.float32) / 255.0 img_canvas = np.clip((img_canvas.astype(np.float32) * (1 - mask_region) + canvas_region.astype(np.float32) * mask_region), 0, 255).astype(np.uint8) - return img_canvas \ No newline at end of file + return img_canvas + + + + +async def dispatch_eng_render(img_canvas: np.ndarray, text_regions: Union[List[TextBlock], List[Quadrilateral]], translated_sentences: List[str], font_path: str) -> np.ndarray : + if len(text_regions) == 0: + return img_canvas + + if isinstance(text_regions[0], Quadrilateral): + blk_list = [] + for region, tr in zip(text_regions, translated_sentences): + x = np.min(region.pts[:, 0]) + w = np.max(region.pts[:, 0]) - x + y = np.min(region.pts[:, 1]) + h = np.max(region.pts[:, 1]) - y + font_size = region.font_size * 0.7 + blk = TextBlock([x, y, w, h], lines=[region.pts], translation=tr, angle=region.angle, font_size=font_size) + blk_list.append(blk) + return render_textblock_list_eng(img_canvas, blk_list, font_path, size_tol=1.1) + + for blk, tr in zip(text_regions, translated_sentences): + blk.translation = tr + return render_textblock_list_eng(img_canvas, text_regions, font_path, size_tol=1.2) \ No newline at end of file diff --git a/text_rendering/text_render_eng.py b/text_rendering/text_render_eng.py new file mode 100644 index 000000000..f6e5ef7cf --- /dev/null +++ b/text_rendering/text_render_eng.py @@ -0,0 +1,115 @@ +from PIL import ImageFont, ImageDraw, Image +import numpy as np +from typing import List, Union +from textblockdetector import TextBlock + +from utils import Quadrilateral + + +class Line: + def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None: + self.text = text + self.pos_x = pos_x + self.pos_y = pos_y + self.length = int(length) + + +def text_to_word_list(text: str) -> List[str]: + text = text.upper().replace(' ', ' ') + processed_text = '' + + # dumb way to insure spaces between words + text_len = len(text) + for ii, c in enumerate(text): + if c in ['.', '?', '!'] and ii < text_len - 1: + next_c = text[ii + 1] + if next_c.isalpha() or next_c.isnumeric(): + processed_text += c + ' ' + else: + processed_text += c + else: + processed_text += c + word_list = processed_text.split(' ') + words = [] + skip_next = False + word_num = len(word_list) + for ii, word in enumerate(word_list): + if skip_next: + skip_next = False + continue + if ii < word_num - 1: + if len(word) == 1 or len(word_list[ii + 1]) == 1: + skip_next = True + word = word + ' ' + word_list[ii + 1] + words.append(word) + return words + +def render_textblock_list_eng(img: np.ndarray, blk_list: List[TextBlock], font_path: str, scale_quality=1.0, align_center=True, size_tol=1.0): + pilimg = Image.fromarray(img) + for blk in blk_list: + if blk.vertical: + blk.angle -= 90 + sw_r = 0.1 + fs = int(blk.font_size / (1 + 2*sw_r) * scale_quality) + min_bbox = blk.min_rect(rotate_back=False)[0] + bx, by = min_bbox[0] + bw, bh = min_bbox[2] - min_bbox[0] + cx, cy = bx + bw / 2, by + bh / 2 + bw = bw * scale_quality + + font = ImageFont.truetype(font_path, fs) + words = text_to_word_list(blk.translation) + if not len(words): + continue + + base_length = -1 + w_list = [] + + sw = int(sw_r * font.size) + line_height = int((1 + 2*sw_r) * font.getmetrics()[0]) + + for word in words: + wl = font.getlength(word) + w_list.append(wl) + if wl > base_length: + base_length = wl + base_length = max(base_length, bw) + space_l = font.getlength(' ') + pos_x, pos_y = 0, 0 + line = Line(words[0], 0, 0, w_list[0]) + line_lst = [line] + for word, wl in zip(words[1:], w_list[1:]): + added_len = int(space_l + wl + line.length) + if added_len > base_length: + pos_y += line_height + line = Line(word, 0, pos_y, wl) + line_lst.append(line) + else: + line.text = line.text + ' ' + word + line.length = added_len + last_line = line_lst[-1] + canvas_h = last_line.pos_y + line_height + canvas_w = int(base_length) + + font_color = (0, 0, 0) + stroke_color = (255, 255, 255) + img = Image.new('RGBA', (canvas_w, canvas_h), color = (0, 0, 0, 0)) + d = ImageDraw.Draw(img) + d.fontmode = 'L' + + for line in line_lst: + pos_x = int((base_length - line.length) / 2) if align_center else 0 + d.text((pos_x, line.pos_y), line.text, font=font, fill=font_color, stroke_width=sw, stroke_fill=stroke_color) + + if abs(blk.angle) > 3: + img = img.rotate(-blk.angle, expand=True) + im_w, im_h = img.size + scale = min(bh / im_h * size_tol, bw / im_w * size_tol) + if scale < 1: + img = img.resize((int(im_w*scale), int(im_h*scale))) + + im_w, im_h = img.size + paste_x, paste_y = int(cx - im_w / 2), int(cy - im_h / 2) + pilimg.paste(img, (paste_x, paste_y), mask=img) + + return np.array(pilimg) \ No newline at end of file diff --git a/textblockdetector/textblock.py b/textblockdetector/textblock.py index b7d7cfef8..38274cf43 100644 --- a/textblockdetector/textblock.py +++ b/textblockdetector/textblock.py @@ -131,7 +131,7 @@ def min_rect(self, rotate_back=True): min_bbox = np.array([[min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]]) if angled and rotate_back: min_bbox = rotate_polygons(center, min_bbox, -self.angle) - return min_bbox.reshape(-1, 4, 2) + return min_bbox.reshape(-1, 4, 2).astype(np.int64) # equivalent to qt's boundingRect, ignore angle def bounding_rect(self): @@ -365,8 +365,8 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.3, distanc blk.lines.append(blk2.lines[0]) blk.vec = vec_sum blk.angle = int(round(np.rad2deg(math.atan2(vec_sum[1], vec_sum[0])))) - if blk.vertical: - blk.angle -= 90 + # if blk.vertical: + # blk.angle -= 90 blk.norm = np.linalg.norm(vec_sum) blk.distance = np.append(blk.distance, blk2.distance[-1]) blk.font_size = fntsz_avg diff --git a/translate_demo.py b/translate_demo.py index 19a4b0858..61e100c91 100755 --- a/translate_demo.py +++ b/translate_demo.py @@ -36,6 +36,8 @@ parser.add_argument('--target-lang', default='CHS', type=str, help='destination language') parser.add_argument('--use-ctd', action='store_true', help='use comic-text-detector for text detection') parser.add_argument('--verbose', action='store_true', help='print debug info and save intermediate images') +parser.add_argument('--manga2eng', action='store_true', help='render English text translated from manga with some typesetting') +parser.add_argument('--eng-font', default='fonts/comic shanns 2.ttf', type=str, help='font used by manga2eng mode') args = parser.parse_args() def update_state(task_id, nonce, state) : @@ -181,11 +183,15 @@ async def infer( if mode == 'web' and task_id : update_state(task_id, nonce, 'render') # render translated texts - if detector == 'ctd' : - from text_rendering import dispatch_ctd_render - output = await dispatch_ctd_render(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, text_regions, render_text_direction_overwrite) + if args.target_lang == 'ENG' and args.manga2eng: + from text_rendering import dispatch_eng_render + output = await dispatch_eng_render(np.copy(img_inpainted), text_regions, translated_sentences, args.eng_font) else: - output = await dispatch_rendering(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, textlines, text_regions, render_text_direction_overwrite) + if detector == 'ctd' : + from text_rendering import dispatch_ctd_render + output = await dispatch_ctd_render(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, text_regions, render_text_direction_overwrite) + else: + output = await dispatch_rendering(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, textlines, text_regions, render_text_direction_overwrite) print(' -- Saving results') if alpha_ch is not None :