Rename testbeds

aorwall · Oct 30, 2024 · e6cc79a · e6cc79a
1 parent 4c865d8
commit e6cc79a
Show file tree

Hide file tree

Showing 43 changed files with 340 additions and 347 deletions.
diff --git a/.gitignore b/.gitignore
@@ -57,4 +57,5 @@ playground
 moatless_testbeds.egg-info
 test_logs
 .ipynb_checkpoints
-__pycache__
+__pycache__
+.pypirc
diff --git a/README.md b/README.md
@@ -1,15 +1,16 @@
 # Moatless Testbeds
-Moatless Testbeds enables you to run testbeds as isolated pods in a Kubernetes cluster, orchestrated through a central API.
+Moatless Testbeds allows you to create isolated testbed environments in a Kubernetes cluster where you can apply code changes through git patches and run tests or SWE-Bench evaluations. 
 
 While initially tested with SWE-Bench's docker containerization solution, it supports any Docker image that meets the basic requirements:
 
 - Contains a git repository in the `/testbeds` directory for applying patches
 - Supports running tests with specific commands (e.g., `pytest [path to test file]`)
 
+
 #### Usage Example
 
 ```python
-from moatless_testbeds import TestbedSDK
+from testbeds.sdk import TestbedSDK
 
 sdk = TestbedSDK(
     base_url="http://<API-IP>",
@@ -43,13 +44,19 @@ git clone https://github.com/aorwall/moatless-testbeds.git
 cd moatless-testbeds
 
 # Install Testbeds SDK
-pip install -e .
+pip install moatless-testbeds
+
+# Set the Kubernetes namespace if not default
+# export KUBERNETES_NAMESPACE=testbeds  # default: testbeds
 
 # Optional: Set environment variables only if using custom images
 # If not set, will use default public images
-# export KUBERNETES_NAMESPACE=testbeds  # default: testbeds
 # export DOCKER_REGISTRY=your-registry  # default: aorwall
 
+# Optional: Enable direct command execution in testbeds
+# Warning: This allows arbitrary command execution and should be used with caution
+# export ENABLE_EXEC=true  # default: false
+
 # Run the install script
 ./scripts/install.sh
 ```
@@ -97,18 +104,22 @@ A successful run will show "✅ Evaluation completed successfully!" in the logs.
 ### Run tests
 
 ```bash
-python scripts/run_tests.py --instance-id <instance-id>
+python scripts/run_tests.py --instance-id <instance-id> [--test-files test1.py test2.py ...]
 ```
 
 For example:
 
 ```bash
+# Run with test_patch files
 python scripts/run_tests.py --instance-id django__django-11333
+
+# Run specific test files
+python scripts/run_tests.py --instance-id django__django-11333 --test-files tests/test_forms.py tests/test_models.py
 ```
 
 The script will:
 1. Create a new testbed instance
-2. Run the tests using the specified instance ID with the files specified in the instance's `test_patch`
+2. Run the specified tests or fall back to the test_patch files if no tests are specified
 3. Output the test results in JSON format
 4. Clean up the testbed instance
 

diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api
@@ -7,9 +7,9 @@ COPY requirements.txt /app/
 
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY testbed /app/testbed/
+COPY testbeds /app/testbeds/
 
-COPY testbed/api/main.py /app/
+COPY testbeds/api/main.py /app/
 COPY docker/entrypoint.sh /app/entrypoint.sh
 
 RUN mkdir -p /var/log && touch /var/log/testbed-api.log && chmod 666 /var/log/testbed-api.log

diff --git a/docker/Dockerfile.testbed b/docker/Dockerfile.testbed
@@ -6,7 +6,7 @@ COPY requirements.txt /app/
 
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY testbed /app/testbed/
+COPY testbeds /app/testbeds/
 
 COPY docker/entrypoint_testbed.sh /app/entrypoint.sh
 

diff --git a/docker/entrypoint_testbed.sh b/docker/entrypoint_testbed.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 set -e
 
-exec gunicorn --bind 0.0.0.0:8000 --workers 4 --timeout 30 --log-level info --capture-output --enable-stdio-inheritance "testbed.testbed.server:create_app()"
+exec gunicorn --bind 0.0.0.0:8000 --workers 4 --timeout 30 --log-level info --capture-output --enable-stdio-inheritance "testbeds.testbed.server:create_app()"
diff --git a/k8s/api-deployment.yaml b/k8s/api-deployment.yaml
@@ -34,6 +34,8 @@ spec:
           valueFrom:
             fieldRef:
               fieldPath: metadata.namespace
+        - name: ENABLE_EXEC
+          value: "${ENABLE_EXEC}"
       volumes:
       - name: api-keys
         secret:

diff --git a/k8s/k8s-ingress.yaml b/k8s/k8s-ingress.yaml
@@ -0,0 +1,24 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+  name: moatless-testbeds
+  namespace: testbed-dev
+spec:
+  ingressClassName: webapprouting.kubernetes.azure.com
+  rules:
+  - host: testbeds.moatless.ai
+    http:
+      paths:
+      - backend:
+          service:
+            name: testbed-api-service
+            port:
+              number: 80
+        path: /
+        pathType: Prefix
+  tls:
+  - hosts:
+    - testbeds.moatless.ai
+    secretName: moatless-testbeds-tls
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,17 @@
+[tool.poetry]
+name = "moatless-testbeds"
+version = "0.0.2"
+description = "Run testbeds as isolated pods in a Kubernetes cluster"
+authors = ["Albert Örwall <[email protected]>"]
+readme = "README.md"
+packages = [{include = "testbeds"}]
+
+[tool.poetry.dependencies]
+python = "^3.9"
+requests = "^2.32.3"
+pydantic = "^2.8.2"
+datasets = "^3.0.2"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/scripts/build_api.sh b/scripts/build_api.sh
@@ -1,5 +1,8 @@
 DOCKER_REGISTRY=${DOCKER_REGISTRY:-aorwall}
-API_DOCKER_IMAGE=${DOCKER_REGISTRY}/moatless-testbed-api:latest
+IMAGE_TAG=${IMAGE_TAG:-latest}
+API_DOCKER_IMAGE=${DOCKER_REGISTRY}/moatless-testbed-api:${IMAGE_TAG}
+
+echo "Building API Docker image: ${API_DOCKER_IMAGE}"
 
 set -e
 

diff --git a/scripts/deploy_api.sh b/scripts/deploy_api.sh
@@ -1,21 +1,14 @@
 #!/bin/bash
 
-DOCKER_IMAGE=${DOCKER_REGISTRY:-aorwall/moatless-testbed-api}
-NAMESPACE=${NAMESPACE:-default}
+export NAMESPACE=${KUBERNETES_NAMESPACE:-testbeds}
+export DOCKER_REGISTRY=${DOCKER_REGISTRY:-aorwall}
+export IMAGE_TAG=${IMAGE_TAG:-$(git rev-parse --short HEAD)}
 
-docker build -t ${DOCKER_IMAGE}:latest -f Dockerfile.api .
-docker push ${DOCKER_IMAGE}:latest
+echo "Deploying API to namespace: $NAMESPACE with image tag: $IMAGE_TAG"
 
-# Apply combined RBAC resources for API
-kubectl apply -f <(envsubst < infra/testbed-rbac.yaml)
+scripts/build_api.sh
 
-# Apply testbed sidecar service account and roles
-kubectl apply -f <(envsubst < infra/testbed-sa.yaml)
-kubectl apply -f <(envsubst < infra/testbed-role.yaml)
-kubectl apply -f <(envsubst < infra/testbed-rolebinding.yaml)
-
-kubectl apply -f <(envsubst < k8s/api-keys-secret.yaml)
 kubectl apply -f <(envsubst < k8s/api-deployment.yaml)
 kubectl apply -f <(envsubst < k8s/api-service.yaml)
 
-echo "Deployment completed in namespace: $NAMESPACE"
+echo "Deployment completed in namespace: $NAMESPACE with image tag: $IMAGE_TAG"
diff --git a/scripts/install.sh b/scripts/install.sh
@@ -1,9 +1,10 @@
 #!/bin/bash
 
 # Export the variables so envsubst can use them
-export NAMESPACE=${KUBERNETES_NAMESPACE:-testbeds}
+export TESTBED_NAMESPACE=${KUBERNETES_NAMESPACE:-testbeds}
 export DOCKER_REGISTRY=${DOCKER_REGISTRY:-aorwall}
 export IMAGE_TAG=${IMAGE_TAG:-latest}
+export ENABLE_EXEC=${ENABLE_EXEC:-false}
 
 set -e
 
@@ -14,10 +15,11 @@ if [ -z "$TESTBED_API_KEY" ]; then
 fi
 
 echo "Installing with configuration:"
-echo "  Namespace: $NAMESPACE"
+echo "  Namespace: $TESTBED_NAMESPACE"
 echo "  Docker Registry: $DOCKER_REGISTRY"
 echo "  Image Tag: $IMAGE_TAG"
 echo "  API Key: $TESTBED_API_KEY"
+echo "  Enable Exec: $ENABLE_EXEC"
 echo "---"
 
 kubectl apply -f <(envsubst < k8s/api-keys-secret.yaml)
@@ -29,13 +31,14 @@ kubectl apply -f <(envsubst < k8s/api-service.yaml)
 echo "---"
 echo "Waiting for external IP (this might take a few minutes)..."
 while true; do
-    export TESTBED_API_IP=$(kubectl get service testbed-api-service -n $NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null)
+    export TESTBED_API_IP=$(kubectl get service testbed-api-service -n $TESTBED_NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null)
     if [ -n "$TESTBED_API_IP" ]; then
         echo "API is available at: http://$TESTBED_API_IP"
 
         # Save IP to a file for later use
         echo "export TESTBED_API_IP=$TESTBED_API_IP" > .env.testbed
-        echo "export NAMESPACE=$NAMESPACE" >> .env.testbed
+        echo "export TESTBED_HOSTNAME=http://$TESTBED_API_IP" >> .env.testbed
+        echo "export TESTBED_NAMESPACE=$TESTBED_NAMESPACE" >> .env.testbed
         echo "export TESTBED_API_KEY=$TESTBED_API_KEY" >> .env.testbed
 
         break
@@ -44,15 +47,8 @@ while true; do
     sleep 5
 done
 
-echo "Installation complete!"
-
 echo "---"
-echo "Verifying installation..."
-
 echo "Checking health endpoint http://$TESTBED_API_IP/health (this might take a few minutes)..."
 curl "http://$TESTBED_API_IP/health"
 
-echo "Verifying testbed instance django__django-11133..."
-python scripts/verify.py --instance-id django__django-11133
-
-echo "Verification complete!"
+echo "Installation complete!"
diff --git a/scripts/run_evaluation.py b/scripts/run_evaluation.py
@@ -7,7 +7,7 @@
 import argparse
 
 from dotenv import load_dotenv
-from testbed.sdk import TestbedSDK
+from testbeds.sdk import TestbedSDK
 
 logging.basicConfig(
     level=logging.INFO, 
@@ -16,24 +16,27 @@
 logger = logging.getLogger(__name__)
 
 
-def run_evaluation(instance_id="django__django-11133"):
-    namespace = os.getenv("NAMESPACE")
-    ip = os.getenv("TESTBED_API_IP")
-    api_key = os.getenv("TESTBED_API_KEY")
-
-    if not all([namespace, ip, api_key]):
-        logger.error("Missing required environment variables")
+def run_evaluation(instance_id: str):    
+    if not os.getenv("TESTBED_HOSTNAME"):
+        logger.error("TESTBED_HOSTNAME is not set")
+        return False
+    
+    if not os.getenv("TESTBED_API_KEY"):
+        logger.error("TESTBED_API_KEY is not set")
         return False
 
+    hostname = os.getenv("TESTBED_HOSTNAME")
+    api_key = os.getenv("TESTBED_API_KEY")
+
     logger.info(f"Starting evaluation for instance: {instance_id}")
 
     try:
         sdk = TestbedSDK(
-            base_url=f"http://{ip}",
+            base_url=hostname,
             api_key=api_key
         )
 
-        logger.info("Creating evaluation instance...")
+        logger.info("Creating testbed instance...")
         testbed = sdk.create_client(instance_id=instance_id)
         logger.info(f"Created Testbed ID: {testbed.testbed_id}")
 
@@ -43,7 +46,7 @@ def run_evaluation(instance_id="django__django-11133"):
         logger.info("Running evaluation script...")
         result = testbed.run_evaluation()
 
-        logger.info("Cleaning up evaluation instance...")
+        logger.info("Cleaning up testbed instance...")
         sdk.delete_testbed(testbed_id=testbed.testbed_id)
 
         if result.resolved:
@@ -62,7 +65,7 @@ def run_evaluation(instance_id="django__django-11133"):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Run evaluation on Testbed API')
     parser.add_argument('--instance-id', type=str,
-                        help='Instance ID to use for evaluation (default: django__django-11133)')
+                        help='Instance ID to use for evaluation (e.g., django__django-11133)')
 
     args = parser.parse_args()
     success = run_evaluation(args.instance_id)

diff --git a/scripts/run_tests.py b/scripts/run_tests.py
@@ -7,7 +7,7 @@
 import argparse
 
 from dotenv import load_dotenv
-from testbed.sdk import TestbedSDK
+from testbeds.sdk import TestbedSDK
 
 logging.basicConfig(
     level=logging.INFO, 
@@ -16,36 +16,41 @@
 logger = logging.getLogger(__name__)
 
 
-def run_evaluation(instance_id="django__django-11133"):
-    namespace = os.getenv("NAMESPACE")
-    ip = os.getenv("TESTBED_API_IP")
-    api_key = os.getenv("TESTBED_API_KEY")
-
-    if not all([namespace, ip, api_key]):
-        logger.error("Missing required environment variables")
+def run_tests(instance_id: str, test_files: list[str] = None):
+    if not os.getenv("TESTBED_HOSTNAME"):
+        logger.error("TESTBED_HOSTNAME is not set")
+        return False
+    
+    if not os.getenv("TESTBED_API_KEY"):
+        logger.error("TESTBED_API_KEY is not set")
         return False
 
+    hostname = os.getenv("TESTBED_HOSTNAME")
+    api_key = os.getenv("TESTBED_API_KEY")
+
     logger.info(f"Starting evaluation for instance: {instance_id}")
 
     try:
         sdk = TestbedSDK(
-            base_url=f"http://{ip}",
+            base_url=hostname,
             api_key=api_key
         )
 
-        logger.info("Creating evaluation instance...")
+        logger.info("Creating testbed instance...")
         testbed = sdk.create_client(instance_id=instance_id)
         logger.info(f"Created Testbed ID: {testbed.testbed_id}")
 
         logger.info(f"Waiting for testbed to be ready...")
         testbed.wait_until_ready()
 
-        test_files = testbed.test_spec.get_test_patch_files()
+        # Use provided test files or fall back to test_patch files
+        if test_files is None:
+            test_files = testbed.test_spec.get_test_patch_files()
 
         logger.info("Running tests...")
         result = testbed.run_tests(test_files)
 
-        logger.info("Cleaning up evaluation instance...")
+        logger.info("Cleaning up testbed instance...")
         sdk.delete_testbed(testbed_id=testbed.testbed_id)
 
         logger.info(f"Test results:\n{result.model_dump_json(indent=2)}")
@@ -58,10 +63,12 @@ def run_evaluation(instance_id="django__django-11133"):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run evaluation on Testbed API')
+    parser = argparse.ArgumentParser(description='Run tests on Testbed API')
     parser.add_argument('--instance-id', type=str,
-                        help='Instance ID to use for evaluation (default: django__django-11133)')
+                        help='Instance ID to test (e.g., django__django-11133)')
+    parser.add_argument('--test-files', nargs='+',
+                        help='List of test files to run (optional)')
 
     args = parser.parse_args()
-    success = run_evaluation(args.instance_id)
+    success = run_tests(args.instance_id, args.test_files)
     sys.exit(0 if success else 1)