diff --git a/.gitea/workflows/ci.yaml b/.gitea/workflows/ci.yaml index 2dc42d0..584e808 100644 --- a/.gitea/workflows/ci.yaml +++ b/.gitea/workflows/ci.yaml @@ -5,21 +5,41 @@ on: branches: [main] pull_request: branches: [main] + workflow_dispatch: + inputs: + run_datagen: + description: "Run dataset generation" + required: false + type: boolean + default: false + num_assemblies: + description: "Number of assemblies to generate" + required: false + type: string + default: "100000" + num_workers: + description: "Parallel workers for datagen" + required: false + type: string + default: "4" + +env: + PIP_CACHE_DIR: /tmp/pip-cache-solver + TORCH_INDEX: https://download.pytorch.org/whl/cpu jobs: + # --------------------------------------------------------------------------- + # Lint — fast, no torch required + # --------------------------------------------------------------------------- lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: https://github.com/actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Install dependencies + - name: Install lint tools run: | - pip install ruff mypy - pip install -e ".[dev]" || pip install ruff mypy numpy + python3 -m pip install --cache-dir $PIP_CACHE_DIR ruff - name: Ruff check run: ruff check solver/ freecad/ tests/ scripts/ @@ -27,39 +47,122 @@ jobs: - name: Ruff format check run: ruff format --check solver/ freecad/ tests/ scripts/ + # --------------------------------------------------------------------------- + # Type check + # --------------------------------------------------------------------------- type-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: "3.11" + - name: Checkout + uses: https://github.com/actions/checkout@v4 - name: Install dependencies run: | - pip install mypy numpy - pip install torch --index-url https://download.pytorch.org/whl/cpu - pip install torch-geometric - pip install -e ".[dev]" + python3 -m pip install --cache-dir $PIP_CACHE_DIR \ + mypy numpy scipy \ + torch --index-url $TORCH_INDEX + python3 -m pip install --cache-dir $PIP_CACHE_DIR \ + torch-geometric + python3 -m pip install --cache-dir $PIP_CACHE_DIR -e ".[dev]" - name: Mypy run: mypy solver/ freecad/ + # --------------------------------------------------------------------------- + # Tests + # --------------------------------------------------------------------------- test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: "3.11" + - name: Checkout + uses: https://github.com/actions/checkout@v4 - name: Install dependencies run: | - pip install torch --index-url https://download.pytorch.org/whl/cpu - pip install torch-geometric - pip install -e ".[train,dev]" + python3 -m pip install --cache-dir $PIP_CACHE_DIR \ + torch --index-url $TORCH_INDEX + python3 -m pip install --cache-dir $PIP_CACHE_DIR \ + torch-geometric + python3 -m pip install --cache-dir $PIP_CACHE_DIR -e ".[train,dev]" - name: Run tests run: pytest tests/ freecad/tests/ -v --tb=short + + # --------------------------------------------------------------------------- + # Dataset generation — manual trigger or on main push + # --------------------------------------------------------------------------- + datagen: + runs-on: ubuntu-latest + if: >- + (github.event_name == 'workflow_dispatch' && inputs.run_datagen == true) || + (github.event_name == 'push' && github.ref == 'refs/heads/main') + needs: [test] + steps: + - name: Checkout + uses: https://github.com/actions/checkout@v4 + + - name: Install dependencies + run: | + python3 -m pip install --cache-dir $PIP_CACHE_DIR \ + torch --index-url $TORCH_INDEX + python3 -m pip install --cache-dir $PIP_CACHE_DIR \ + torch-geometric + python3 -m pip install --cache-dir $PIP_CACHE_DIR -e ".[train]" + + - name: Restore datagen checkpoint + id: datagen-cache + uses: https://github.com/actions/cache/restore@v4 + with: + path: data/synthetic + key: datagen-${{ github.sha }} + restore-keys: | + datagen- + + - name: Generate dataset + run: | + NUM=${INPUTS_NUM_ASSEMBLIES:-100000} + WORKERS=${INPUTS_NUM_WORKERS:-4} + echo "Generating ${NUM} assemblies with ${WORKERS} workers" + python3 scripts/generate_synthetic.py \ + --num-assemblies "${NUM}" \ + --num-workers "${WORKERS}" \ + --output-dir data/synthetic + env: + INPUTS_NUM_ASSEMBLIES: ${{ inputs.num_assemblies }} + INPUTS_NUM_WORKERS: ${{ inputs.num_workers }} + + - name: Save datagen checkpoint + if: always() + uses: https://github.com/actions/cache/save@v4 + with: + path: data/synthetic + key: datagen-${{ github.sha }} + + - name: Upload dataset + uses: https://github.com/actions/upload-artifact@v3 + with: + name: synthetic-dataset + path: | + data/synthetic/index.json + data/synthetic/stats.json + data/synthetic/shards/ + retention-days: 90 + + - name: Print summary + if: always() + run: | + echo "=== Dataset Generation Results ===" + if [ -f data/synthetic/stats.json ]; then + python3 -c " + import json + with open('data/synthetic/stats.json') as f: + s = json.load(f) + print(f'Total examples: {s[\"total_examples\"]}') + print(f'Classification: {json.dumps(s[\"classification_distribution\"], indent=2)}') + print(f'Rigid: {s[\"rigidity\"][\"rigid_fraction\"]*100:.1f}%') + print(f'Degeneracy: {s[\"geometric_degeneracy\"][\"fraction_with_degeneracy\"]*100:.1f}%') + " + else + echo "stats.json not found — generation may have failed" + ls -la data/synthetic/ 2>/dev/null || echo "output dir missing" + fi