Merge pull request #862 from popcion/main

add pre/post dict for web mode; add 5 useful args for webui; fix AssertionError
zyddnys · Mar 6, 2025 · 687d9ca · 687d9ca
2 parents b964df3 + 2035103
commit 687d9ca
Show file tree

Hide file tree

Showing 4 changed files with 150 additions and 16 deletions.
diff --git a/manga_translator/ocr/model_48px.py b/manga_translator/ocr/model_48px.py
@@ -755,6 +755,18 @@ def infer_beam_batch_tensor(self, img: torch.FloatTensor, img_widths: List[int],
             batch_index = batch_index.index_select(0, torch.tensor(remaining_indexs, device=img.device))
 
         # Ensure we have the correct number of finished hypotheses for each sample
+        if len(finished_hypos) < N: # Fallback if not enough finished hypos
+            for i in range(N):
+                if i not in finished_hypos:
+                    # Select the best hypothesis available at the end of beam search
+                    sample_indices = (batch_index == i).nonzero(as_tuple=True)[0]
+                    if sample_indices.numel() > 0:
+                        best_hypo_index = sample_indices[0] # Take the first one as fallback
+                        finished_hypos[i] = out_idx[best_hypo_index], torch.exp(log_probs[best_hypo_index]).item(), cached_activations[best_hypo_index]
+                    else:
+                        # If no hypothesis is available at all (very unlikely, but for robustness)
+                        finished_hypos[i] = (torch.tensor([end_tok], device=img.device), 0.0, torch.zeros(cached_activations.shape[1:], device=img.device)) # Dummy hypo
+
         assert len(finished_hypos) == N
 
         # Final output processing and color predictions

diff --git a/server/args.py b/server/args.py
@@ -1,6 +1,37 @@
 import argparse
 import os
+from urllib.parse import unquote
 
+def url_decode(s):
+    s = unquote(s)
+    if s.startswith('file:///'):
+        s = s[len('file://'):]
+    return s
+
+# Additional argparse types
+def path(string):
+    if not string:
+        return ''
+    s = url_decode(os.path.expanduser(string))
+    if not os.path.exists(s):
+        raise argparse.ArgumentTypeError(f'No such file or directory: "{string}"')
+    return s
+
+def file_path(string):
+    if not string:
+        return ''
+    s = url_decode(os.path.expanduser(string))
+    if not os.path.exists(s):
+        raise argparse.ArgumentTypeError(f'No such file: "{string}"')
+    return s
+
+def dir_path(string):
+    if not string:
+        return ''
+    s = url_decode(os.path.expanduser(string))
+    if not os.path.exists(s):
+        raise argparse.ArgumentTypeError(f'No such directory: "{string}"')
+    return dir_path
 
 def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
@@ -13,7 +44,9 @@ def parse_arguments():
     parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
     parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication')
     parser.add_argument('--models-ttl', default='0', type=int, help='models TTL in memory in seconds')
+    parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file')
+    parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file')    
     g = parser.add_mutually_exclusive_group()
     g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
     g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
-    return parser.parse_args()
+    return parser.parse_args()
diff --git a/server/index.html b/server/index.html
@@ -107,9 +107,80 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
                            data-icon="carbon:chevron-down"></i>
                     </div>
-                </div>
+                </div>			
             </div>
         </div>
+
+		<div class="flex mx-4 justify-start items-end">
+			<div class="flex gap-4">
+				<div class="flex items-center" title="Inpainting Size">
+					<i class="iconify" data-icon="carbon:paint-brush"></i>
+					<div class="relative">
+						<select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+								v-model="inpaintingSize">
+							<option value="516">516px</option>	
+							<option value="1024">1024px</option>
+							<option value="2048">2048px</option>
+							<option value="2560">2560px</option>
+						</select>
+						<i class="iconify absolute top-1.5 right-1 pointer-events-none"
+						   data-icon="carbon:chevron-down"></i>
+					</div>
+				</div>
+                <div class="flex items-center" title="Unclip Ratio">
+                    <i class="iconify" data-icon="weui:max-window-filled"></i>
+                    <div class="relative">
+                        <input type="number"
+                               class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                               v-model="customUnclipRatio" 
+                               placeholder="2.3 (Default)" 
+                               step="0.01" 
+                               value="2.3" /> 
+                    </div>
+				</div>	
+                <div class="flex items-center gap-1" title="Box Threshold">
+                    <i class="iconify" data-icon="weui:photo-wall-outlined"></i>
+                    <div class="relative">
+                        <input type="number"
+                               class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                               v-model="customBoxThreshold" 
+                               placeholder="0.7 (Default)" 
+                               step="0.01" 
+                               value="0.7" /> 
+                    </div>
+                </div>
+
+                <div class="flex items-center gap-1" title="Mask Dilation Offset">
+                    <i class="iconify" data-icon="material-symbols:adjust-outline"></i>
+                    <div class="relative">
+                        <input type="number"
+                               class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                               v-model="maskDilationOffset"
+                               placeholder="30 (Default)"
+                               step="1"
+                               value="30" />
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Inpainter">
+                    <i class="iconify" data-icon="carbon:paint-brush"></i>
+                    <div class="relative">
+                        <select class="w-12ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="inpainter">
+                            <option value="default">Default</option>
+                            <option value="lama_large">Lama Large</option>
+                            <option value="lama_mpe">Lama MPE</option>
+                            <option value="sd">SD</option>
+                            <option value="none">None</option>
+							<option value="original">Original</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>				
+
+			</div>
+		</div>		
+
         <div v-if="result" class="flex flex-col items-center">
             <img class="my-2" :src="resultUri"/>
             <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">
@@ -204,13 +275,20 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
         textDetector: 'default',
         renderTextDirection: 'auto',
         translator: 'youdao',
-        validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'sakura', 'offline', 'gpt3.5', 'deepseek', 'none'],
+        validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'sakura', 'offline', 'openai', 'deepseek', 'none'],
         getTranslatorName(key) {
             if (key === 'none')
                 return "No Text"
             return key ? key[0].toUpperCase() + key.slice(1) : "";
         },
         targetLanguage: 'CHS',
+
+        inpaintingSize: '2048', 
+        customUnclipRatio: 2.3,
+        customBoxThreshold: 0.7,  
+	maskDilationOffset: 30,
+        inpainter: 'default',
+
         ondrop(e) {
             const file = e.dataTransfer?.files?.[0]
             if (file && acceptTypes.includes(file.type)) {
@@ -308,19 +386,26 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
             const formData = new FormData()
             formData.append('image', this.file)
 
-            const config = `{
-                "detector": {
-                    "detector": "${this.textDetector}",
-                    "detection_size": ${this.detectionResolution}
-                },
-                "render": {
-                    "direction": "${this.renderTextDirection}"
-                },
-                "translator": {
-                    "translator": "${this.translator}",
-                    "target_lang": "${this.targetLanguage}"
-                }
-            }`;
+			const config = `{
+				"detector": {
+					"detector": "${this.textDetector}",
+					"detection_size": ${this.detectionResolution},
+					"box_threshold": ${this.customBoxThreshold},
+					"unclip_ratio": ${this.customUnclipRatio}
+				},
+				"render": {
+					"direction": "${this.renderTextDirection}"
+				},
+				"translator": {
+					"translator": "${this.translator}",
+					"target_lang": "${this.targetLanguage}"
+				},
+				"inpainter": {
+					"inpainter": "${this.inpainter}",					
+					"inpainting_size": ${this.inpaintingSize}
+				},
+				"mask_dilation_offset": ${this.maskDilationOffset}
+			}`;
 
             formData.append('config', config)
 

diff --git a/server/main.py b/server/main.py
@@ -158,6 +158,10 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: Names
         cmds.append('--verbose')
     if params.models_ttl:
         cmds.append('--models-ttl=%s' % params.models_ttl)
+    if params.pre_dict: 
+        cmds.extend(['--pre-dict', params.pre_dict]) 
+    if params.pre_dict: 
+        cmds.extend(['--post-dict', params.post_dict])         
     base_path = os.path.dirname(os.path.abspath(__file__))
     parent = os.path.dirname(base_path)
     proc = subprocess.Popen(cmds, cwd=parent)