Skip to content

Commit

Permalink
Merge pull request #862 from popcion/main
Browse files Browse the repository at this point in the history
add pre/post dict for web mode; add 5 useful args for webui; fix AssertionError
  • Loading branch information
zyddnys authored Mar 6, 2025
2 parents b964df3 + 2035103 commit 687d9ca
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 16 deletions.
12 changes: 12 additions & 0 deletions manga_translator/ocr/model_48px.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,18 @@ def infer_beam_batch_tensor(self, img: torch.FloatTensor, img_widths: List[int],
batch_index = batch_index.index_select(0, torch.tensor(remaining_indexs, device=img.device))

# Ensure we have the correct number of finished hypotheses for each sample
if len(finished_hypos) < N: # Fallback if not enough finished hypos
for i in range(N):
if i not in finished_hypos:
# Select the best hypothesis available at the end of beam search
sample_indices = (batch_index == i).nonzero(as_tuple=True)[0]
if sample_indices.numel() > 0:
best_hypo_index = sample_indices[0] # Take the first one as fallback
finished_hypos[i] = out_idx[best_hypo_index], torch.exp(log_probs[best_hypo_index]).item(), cached_activations[best_hypo_index]
else:
# If no hypothesis is available at all (very unlikely, but for robustness)
finished_hypos[i] = (torch.tensor([end_tok], device=img.device), 0.0, torch.zeros(cached_activations.shape[1:], device=img.device)) # Dummy hypo

assert len(finished_hypos) == N

# Final output processing and color predictions
Expand Down
35 changes: 34 additions & 1 deletion server/args.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,37 @@
import argparse
import os
from urllib.parse import unquote

def url_decode(s):
s = unquote(s)
if s.startswith('file:///'):
s = s[len('file://'):]
return s

# Additional argparse types
def path(string):
if not string:
return ''
s = url_decode(os.path.expanduser(string))
if not os.path.exists(s):
raise argparse.ArgumentTypeError(f'No such file or directory: "{string}"')
return s

def file_path(string):
if not string:
return ''
s = url_decode(os.path.expanduser(string))
if not os.path.exists(s):
raise argparse.ArgumentTypeError(f'No such file: "{string}"')
return s

def dir_path(string):
if not string:
return ''
s = url_decode(os.path.expanduser(string))
if not os.path.exists(s):
raise argparse.ArgumentTypeError(f'No such directory: "{string}"')
return dir_path

def parse_arguments():
parser = argparse.ArgumentParser(description="Specify host and port for the server.")
Expand All @@ -13,7 +44,9 @@ def parse_arguments():
parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication')
parser.add_argument('--models-ttl', default='0', type=int, help='models TTL in memory in seconds')
parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file')
parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file')
g = parser.add_mutually_exclusive_group()
g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
return parser.parse_args()
return parser.parse_args()
115 changes: 100 additions & 15 deletions server/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,80 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
<i class="iconify absolute top-1.5 right-1 pointer-events-none"
data-icon="carbon:chevron-down"></i>
</div>
</div>
</div>
</div>
</div>

<div class="flex mx-4 justify-start items-end">
<div class="flex gap-4">
<div class="flex items-center" title="Inpainting Size">
<i class="iconify" data-icon="carbon:paint-brush"></i>
<div class="relative">
<select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
v-model="inpaintingSize">
<option value="516">516px</option>
<option value="1024">1024px</option>
<option value="2048">2048px</option>
<option value="2560">2560px</option>
</select>
<i class="iconify absolute top-1.5 right-1 pointer-events-none"
data-icon="carbon:chevron-down"></i>
</div>
</div>
<div class="flex items-center" title="Unclip Ratio">
<i class="iconify" data-icon="weui:max-window-filled"></i>
<div class="relative">
<input type="number"
class="w-9ch appearance-none bg-transparent border-b border-gray-300"
v-model="customUnclipRatio"
placeholder="2.3 (Default)"
step="0.01"
value="2.3" />
</div>
</div>
<div class="flex items-center gap-1" title="Box Threshold">
<i class="iconify" data-icon="weui:photo-wall-outlined"></i>
<div class="relative">
<input type="number"
class="w-9ch appearance-none bg-transparent border-b border-gray-300"
v-model="customBoxThreshold"
placeholder="0.7 (Default)"
step="0.01"
value="0.7" />
</div>
</div>

<div class="flex items-center gap-1" title="Mask Dilation Offset">
<i class="iconify" data-icon="material-symbols:adjust-outline"></i>
<div class="relative">
<input type="number"
class="w-9ch appearance-none bg-transparent border-b border-gray-300"
v-model="maskDilationOffset"
placeholder="30 (Default)"
step="1"
value="30" />
</div>
</div>
<div class="flex items-center gap-1" title="Inpainter">
<i class="iconify" data-icon="carbon:paint-brush"></i>
<div class="relative">
<select class="w-12ch appearance-none bg-transparent border-b border-gray-300"
v-model="inpainter">
<option value="default">Default</option>
<option value="lama_large">Lama Large</option>
<option value="lama_mpe">Lama MPE</option>
<option value="sd">SD</option>
<option value="none">None</option>
<option value="original">Original</option>
</select>
<i class="iconify absolute top-1.5 right-1 pointer-events-none"
data-icon="carbon:chevron-down"></i>
</div>
</div>

</div>
</div>

<div v-if="result" class="flex flex-col items-center">
<img class="my-2" :src="resultUri"/>
<button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">
Expand Down Expand Up @@ -204,13 +275,20 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
textDetector: 'default',
renderTextDirection: 'auto',
translator: 'youdao',
validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'sakura', 'offline', 'gpt3.5', 'deepseek', 'none'],
validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'sakura', 'offline', 'openai', 'deepseek', 'none'],
getTranslatorName(key) {
if (key === 'none')
return "No Text"
return key ? key[0].toUpperCase() + key.slice(1) : "";
},
targetLanguage: 'CHS',

inpaintingSize: '2048',
customUnclipRatio: 2.3,
customBoxThreshold: 0.7,
maskDilationOffset: 30,
inpainter: 'default',

ondrop(e) {
const file = e.dataTransfer?.files?.[0]
if (file && acceptTypes.includes(file.type)) {
Expand Down Expand Up @@ -308,19 +386,26 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
const formData = new FormData()
formData.append('image', this.file)

const config = `{
"detector": {
"detector": "${this.textDetector}",
"detection_size": ${this.detectionResolution}
},
"render": {
"direction": "${this.renderTextDirection}"
},
"translator": {
"translator": "${this.translator}",
"target_lang": "${this.targetLanguage}"
}
}`;
const config = `{
"detector": {
"detector": "${this.textDetector}",
"detection_size": ${this.detectionResolution},
"box_threshold": ${this.customBoxThreshold},
"unclip_ratio": ${this.customUnclipRatio}
},
"render": {
"direction": "${this.renderTextDirection}"
},
"translator": {
"translator": "${this.translator}",
"target_lang": "${this.targetLanguage}"
},
"inpainter": {
"inpainter": "${this.inpainter}",
"inpainting_size": ${this.inpaintingSize}
},
"mask_dilation_offset": ${this.maskDilationOffset}
}`;

formData.append('config', config)

Expand Down
4 changes: 4 additions & 0 deletions server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: Names
cmds.append('--verbose')
if params.models_ttl:
cmds.append('--models-ttl=%s' % params.models_ttl)
if params.pre_dict:
cmds.extend(['--pre-dict', params.pre_dict])
if params.pre_dict:
cmds.extend(['--post-dict', params.post_dict])
base_path = os.path.dirname(os.path.abspath(__file__))
parent = os.path.dirname(base_path)
proc = subprocess.Popen(cmds, cwd=parent)
Expand Down

0 comments on commit 687d9ca

Please sign in to comment.