Merge pull request #1 from TheFloatingString/feature/main

Add uv pyproject, verbose parameter, and ruff formatting
irfansharif · Feb 15, 2025 · 1041a92 · 1041a92
2 parents de05d5d + 1bddcbe
commit 1041a92
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 11 deletions.
diff --git a/ollama-modal.py b/ollama-modal.py
@@ -7,18 +7,19 @@
 
 MODEL = os.environ.get("MODEL", "llama3:instruct")
 
+
 def pull(model: str = MODEL):
     subprocess.run(["systemctl", "daemon-reload"])
     subprocess.run(["systemctl", "enable", "ollama"])
     subprocess.run(["systemctl", "start", "ollama"])
     time.sleep(2)  # 2s, wait for the service to start
     subprocess.run(["ollama", "pull", model], stdout=subprocess.PIPE)
 
+
 image = (
-    modal.Image
-    .debian_slim()
+    modal.Image.debian_slim()
     .apt_install("curl", "systemctl")
-    .run_commands( # from https://github.com/ollama/ollama/blob/main/docs/linux.md
+    .run_commands(  # from https://github.com/ollama/ollama/blob/main/docs/linux.md
         "curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz",
         "tar -C /usr -xzf ollama-linux-amd64.tgz",
         "useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama",
@@ -34,6 +35,7 @@ def pull(model: str = MODEL):
 with image.imports():
     import ollama
 
+
 @app.cls(gpu="a10g", container_idle_timeout=300)
 class Ollama:
     @build()
@@ -54,17 +56,17 @@ def load(self):
         subprocess.run(["systemctl", "start", "ollama"])
 
     @method()
-    def infer(self, text: str):
+    def infer(self, text: str, verbose: bool = False):
         stream = ollama.chat(
-            model=MODEL,
-            messages=[{'role': 'user', 'content': text}],
-            stream=True
+            model=MODEL, messages=[{"role": "user", "content": text}], stream=True
         )
         for chunk in stream:
-            yield chunk['message']['content']
-            print(chunk['message']['content'], end='', flush=True)
+            yield chunk["message"]["content"]
+            if verbose:
+                print(chunk["message"]["content"], end="", flush=True)
         return
 
+
 # Convenience thing, to run using:
 #
 #  $ modal run ollama-modal.py [--lookup] [--text "Why is the sky blue?"]
@@ -74,5 +76,5 @@ def main(text: str = "Why is the sky blue?", lookup: bool = False):
         ollama = modal.Cls.lookup("ollama", "Ollama")
     else:
         ollama = Ollama()
-    for chunk in ollama.infer.remote_gen(text):
-        print(chunk, end='', flush=False)
+    for chunk in ollama.infer.remote_gen(text, verbose=False):
+        print(chunk, end="", flush=False)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "ollama-modal"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "modal>=0.73.51",
+    "ruff>=0.9.6",
+]