Skip to content

Commit

Permalink
Rename testbeds
Browse files Browse the repository at this point in the history
  • Loading branch information
aorwall committed Oct 30, 2024
1 parent 4c865d8 commit e6cc79a
Show file tree
Hide file tree
Showing 43 changed files with 340 additions and 347 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,5 @@ playground
moatless_testbeds.egg-info
test_logs
.ipynb_checkpoints
__pycache__
__pycache__
.pypirc
23 changes: 17 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# Moatless Testbeds
Moatless Testbeds enables you to run testbeds as isolated pods in a Kubernetes cluster, orchestrated through a central API.
Moatless Testbeds allows you to create isolated testbed environments in a Kubernetes cluster where you can apply code changes through git patches and run tests or SWE-Bench evaluations.

While initially tested with SWE-Bench's docker containerization solution, it supports any Docker image that meets the basic requirements:

- Contains a git repository in the `/testbeds` directory for applying patches
- Supports running tests with specific commands (e.g., `pytest [path to test file]`)


#### Usage Example

```python
from moatless_testbeds import TestbedSDK
from testbeds.sdk import TestbedSDK

sdk = TestbedSDK(
base_url="http://<API-IP>",
Expand Down Expand Up @@ -43,13 +44,19 @@ git clone https://github.com/aorwall/moatless-testbeds.git
cd moatless-testbeds

# Install Testbeds SDK
pip install -e .
pip install moatless-testbeds

# Set the Kubernetes namespace if not default
# export KUBERNETES_NAMESPACE=testbeds # default: testbeds

# Optional: Set environment variables only if using custom images
# If not set, will use default public images
# export KUBERNETES_NAMESPACE=testbeds # default: testbeds
# export DOCKER_REGISTRY=your-registry # default: aorwall

# Optional: Enable direct command execution in testbeds
# Warning: This allows arbitrary command execution and should be used with caution
# export ENABLE_EXEC=true # default: false

# Run the install script
./scripts/install.sh
```
Expand Down Expand Up @@ -97,18 +104,22 @@ A successful run will show "✅ Evaluation completed successfully!" in the logs.
### Run tests

```bash
python scripts/run_tests.py --instance-id <instance-id>
python scripts/run_tests.py --instance-id <instance-id> [--test-files test1.py test2.py ...]
```

For example:

```bash
# Run with test_patch files
python scripts/run_tests.py --instance-id django__django-11333

# Run specific test files
python scripts/run_tests.py --instance-id django__django-11333 --test-files tests/test_forms.py tests/test_models.py
```

The script will:
1. Create a new testbed instance
2. Run the tests using the specified instance ID with the files specified in the instance's `test_patch`
2. Run the specified tests or fall back to the test_patch files if no tests are specified
3. Output the test results in JSON format
4. Clean up the testbed instance

Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.api
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ COPY requirements.txt /app/

RUN pip install --no-cache-dir -r requirements.txt

COPY testbed /app/testbed/
COPY testbeds /app/testbeds/

COPY testbed/api/main.py /app/
COPY testbeds/api/main.py /app/
COPY docker/entrypoint.sh /app/entrypoint.sh

RUN mkdir -p /var/log && touch /var/log/testbed-api.log && chmod 666 /var/log/testbed-api.log
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.testbed
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ COPY requirements.txt /app/

RUN pip install --no-cache-dir -r requirements.txt

COPY testbed /app/testbed/
COPY testbeds /app/testbeds/

COPY docker/entrypoint_testbed.sh /app/entrypoint.sh

Expand Down
2 changes: 1 addition & 1 deletion docker/entrypoint_testbed.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
set -e

exec gunicorn --bind 0.0.0.0:8000 --workers 4 --timeout 30 --log-level info --capture-output --enable-stdio-inheritance "testbed.testbed.server:create_app()"
exec gunicorn --bind 0.0.0.0:8000 --workers 4 --timeout 30 --log-level info --capture-output --enable-stdio-inheritance "testbeds.testbed.server:create_app()"
2 changes: 2 additions & 0 deletions k8s/api-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: ENABLE_EXEC
value: "${ENABLE_EXEC}"
volumes:
- name: api-keys
secret:
Expand Down
24 changes: 24 additions & 0 deletions k8s/k8s-ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
name: moatless-testbeds
namespace: testbed-dev
spec:
ingressClassName: webapprouting.kubernetes.azure.com
rules:
- host: testbeds.moatless.ai
http:
paths:
- backend:
service:
name: testbed-api-service
port:
number: 80
path: /
pathType: Prefix
tls:
- hosts:
- testbeds.moatless.ai
secretName: moatless-testbeds-tls
17 changes: 17 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[tool.poetry]
name = "moatless-testbeds"
version = "0.0.2"
description = "Run testbeds as isolated pods in a Kubernetes cluster"
authors = ["Albert Örwall <[email protected]>"]
readme = "README.md"
packages = [{include = "testbeds"}]

[tool.poetry.dependencies]
python = "^3.9"
requests = "^2.32.3"
pydantic = "^2.8.2"
datasets = "^3.0.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
5 changes: 4 additions & 1 deletion scripts/build_api.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
DOCKER_REGISTRY=${DOCKER_REGISTRY:-aorwall}
API_DOCKER_IMAGE=${DOCKER_REGISTRY}/moatless-testbed-api:latest
IMAGE_TAG=${IMAGE_TAG:-latest}
API_DOCKER_IMAGE=${DOCKER_REGISTRY}/moatless-testbed-api:${IMAGE_TAG}

echo "Building API Docker image: ${API_DOCKER_IMAGE}"

set -e

Expand Down
19 changes: 6 additions & 13 deletions scripts/deploy_api.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,14 @@
#!/bin/bash

DOCKER_IMAGE=${DOCKER_REGISTRY:-aorwall/moatless-testbed-api}
NAMESPACE=${NAMESPACE:-default}
export NAMESPACE=${KUBERNETES_NAMESPACE:-testbeds}
export DOCKER_REGISTRY=${DOCKER_REGISTRY:-aorwall}
export IMAGE_TAG=${IMAGE_TAG:-$(git rev-parse --short HEAD)}

docker build -t ${DOCKER_IMAGE}:latest -f Dockerfile.api .
docker push ${DOCKER_IMAGE}:latest
echo "Deploying API to namespace: $NAMESPACE with image tag: $IMAGE_TAG"

# Apply combined RBAC resources for API
kubectl apply -f <(envsubst < infra/testbed-rbac.yaml)
scripts/build_api.sh

# Apply testbed sidecar service account and roles
kubectl apply -f <(envsubst < infra/testbed-sa.yaml)
kubectl apply -f <(envsubst < infra/testbed-role.yaml)
kubectl apply -f <(envsubst < infra/testbed-rolebinding.yaml)

kubectl apply -f <(envsubst < k8s/api-keys-secret.yaml)
kubectl apply -f <(envsubst < k8s/api-deployment.yaml)
kubectl apply -f <(envsubst < k8s/api-service.yaml)

echo "Deployment completed in namespace: $NAMESPACE"
echo "Deployment completed in namespace: $NAMESPACE with image tag: $IMAGE_TAG"
20 changes: 8 additions & 12 deletions scripts/install.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/bin/bash

# Export the variables so envsubst can use them
export NAMESPACE=${KUBERNETES_NAMESPACE:-testbeds}
export TESTBED_NAMESPACE=${KUBERNETES_NAMESPACE:-testbeds}
export DOCKER_REGISTRY=${DOCKER_REGISTRY:-aorwall}
export IMAGE_TAG=${IMAGE_TAG:-latest}
export ENABLE_EXEC=${ENABLE_EXEC:-false}

set -e

Expand All @@ -14,10 +15,11 @@ if [ -z "$TESTBED_API_KEY" ]; then
fi

echo "Installing with configuration:"
echo " Namespace: $NAMESPACE"
echo " Namespace: $TESTBED_NAMESPACE"
echo " Docker Registry: $DOCKER_REGISTRY"
echo " Image Tag: $IMAGE_TAG"
echo " API Key: $TESTBED_API_KEY"
echo " Enable Exec: $ENABLE_EXEC"
echo "---"

kubectl apply -f <(envsubst < k8s/api-keys-secret.yaml)
Expand All @@ -29,13 +31,14 @@ kubectl apply -f <(envsubst < k8s/api-service.yaml)
echo "---"
echo "Waiting for external IP (this might take a few minutes)..."
while true; do
export TESTBED_API_IP=$(kubectl get service testbed-api-service -n $NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null)
export TESTBED_API_IP=$(kubectl get service testbed-api-service -n $TESTBED_NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null)
if [ -n "$TESTBED_API_IP" ]; then
echo "API is available at: http://$TESTBED_API_IP"

# Save IP to a file for later use
echo "export TESTBED_API_IP=$TESTBED_API_IP" > .env.testbed
echo "export NAMESPACE=$NAMESPACE" >> .env.testbed
echo "export TESTBED_HOSTNAME=http://$TESTBED_API_IP" >> .env.testbed
echo "export TESTBED_NAMESPACE=$TESTBED_NAMESPACE" >> .env.testbed
echo "export TESTBED_API_KEY=$TESTBED_API_KEY" >> .env.testbed

break
Expand All @@ -44,15 +47,8 @@ while true; do
sleep 5
done

echo "Installation complete!"

echo "---"
echo "Verifying installation..."

echo "Checking health endpoint http://$TESTBED_API_IP/health (this might take a few minutes)..."
curl "http://$TESTBED_API_IP/health"

echo "Verifying testbed instance django__django-11133..."
python scripts/verify.py --instance-id django__django-11133

echo "Verification complete!"
echo "Installation complete!"
27 changes: 15 additions & 12 deletions scripts/run_evaluation.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import argparse

from dotenv import load_dotenv
from testbed.sdk import TestbedSDK
from testbeds.sdk import TestbedSDK

logging.basicConfig(
level=logging.INFO,
Expand All @@ -16,24 +16,27 @@
logger = logging.getLogger(__name__)


def run_evaluation(instance_id="django__django-11133"):
namespace = os.getenv("NAMESPACE")
ip = os.getenv("TESTBED_API_IP")
api_key = os.getenv("TESTBED_API_KEY")

if not all([namespace, ip, api_key]):
logger.error("Missing required environment variables")
def run_evaluation(instance_id: str):
if not os.getenv("TESTBED_HOSTNAME"):
logger.error("TESTBED_HOSTNAME is not set")
return False
if not os.getenv("TESTBED_API_KEY"):
logger.error("TESTBED_API_KEY is not set")
return False

hostname = os.getenv("TESTBED_HOSTNAME")
api_key = os.getenv("TESTBED_API_KEY")

logger.info(f"Starting evaluation for instance: {instance_id}")

try:
sdk = TestbedSDK(
base_url=f"http://{ip}",
base_url=hostname,
api_key=api_key
)

logger.info("Creating evaluation instance...")
logger.info("Creating testbed instance...")
testbed = sdk.create_client(instance_id=instance_id)
logger.info(f"Created Testbed ID: {testbed.testbed_id}")

Expand All @@ -43,7 +46,7 @@ def run_evaluation(instance_id="django__django-11133"):
logger.info("Running evaluation script...")
result = testbed.run_evaluation()

logger.info("Cleaning up evaluation instance...")
logger.info("Cleaning up testbed instance...")
sdk.delete_testbed(testbed_id=testbed.testbed_id)

if result.resolved:
Expand All @@ -62,7 +65,7 @@ def run_evaluation(instance_id="django__django-11133"):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run evaluation on Testbed API')
parser.add_argument('--instance-id', type=str,
help='Instance ID to use for evaluation (default: django__django-11133)')
help='Instance ID to use for evaluation (e.g., django__django-11133)')

args = parser.parse_args()
success = run_evaluation(args.instance_id)
Expand Down
37 changes: 22 additions & 15 deletions scripts/run_tests.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import argparse

from dotenv import load_dotenv
from testbed.sdk import TestbedSDK
from testbeds.sdk import TestbedSDK

logging.basicConfig(
level=logging.INFO,
Expand All @@ -16,36 +16,41 @@
logger = logging.getLogger(__name__)


def run_evaluation(instance_id="django__django-11133"):
namespace = os.getenv("NAMESPACE")
ip = os.getenv("TESTBED_API_IP")
api_key = os.getenv("TESTBED_API_KEY")

if not all([namespace, ip, api_key]):
logger.error("Missing required environment variables")
def run_tests(instance_id: str, test_files: list[str] = None):
if not os.getenv("TESTBED_HOSTNAME"):
logger.error("TESTBED_HOSTNAME is not set")
return False
if not os.getenv("TESTBED_API_KEY"):
logger.error("TESTBED_API_KEY is not set")
return False

hostname = os.getenv("TESTBED_HOSTNAME")
api_key = os.getenv("TESTBED_API_KEY")

logger.info(f"Starting evaluation for instance: {instance_id}")

try:
sdk = TestbedSDK(
base_url=f"http://{ip}",
base_url=hostname,
api_key=api_key
)

logger.info("Creating evaluation instance...")
logger.info("Creating testbed instance...")
testbed = sdk.create_client(instance_id=instance_id)
logger.info(f"Created Testbed ID: {testbed.testbed_id}")

logger.info(f"Waiting for testbed to be ready...")
testbed.wait_until_ready()

test_files = testbed.test_spec.get_test_patch_files()
# Use provided test files or fall back to test_patch files
if test_files is None:
test_files = testbed.test_spec.get_test_patch_files()

logger.info("Running tests...")
result = testbed.run_tests(test_files)

logger.info("Cleaning up evaluation instance...")
logger.info("Cleaning up testbed instance...")
sdk.delete_testbed(testbed_id=testbed.testbed_id)

logger.info(f"Test results:\n{result.model_dump_json(indent=2)}")
Expand All @@ -58,10 +63,12 @@ def run_evaluation(instance_id="django__django-11133"):


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run evaluation on Testbed API')
parser = argparse.ArgumentParser(description='Run tests on Testbed API')
parser.add_argument('--instance-id', type=str,
help='Instance ID to use for evaluation (default: django__django-11133)')
help='Instance ID to test (e.g., django__django-11133)')
parser.add_argument('--test-files', nargs='+',
help='List of test files to run (optional)')

args = parser.parse_args()
success = run_evaluation(args.instance_id)
success = run_tests(args.instance_id, args.test_files)
sys.exit(0 if success else 1)
Loading

0 comments on commit e6cc79a

Please sign in to comment.