From e18b8bdf1526f3b9904a2ee1ddd4cc594658a288 Mon Sep 17 00:00:00 2001 From: Jacob Oursland Date: Mon, 16 Jun 2025 22:39:08 -0700 Subject: [PATCH] CI: determine modified lines in a clang-tidy compatible way. --- .github/workflows/sub_prepare.yml | 33 +++++- tools/lint/changed_lines.py | 169 ++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+), 3 deletions(-) create mode 100644 tools/lint/changed_lines.py diff --git a/.github/workflows/sub_prepare.yml b/.github/workflows/sub_prepare.yml index cec57f0c17..f7ea5e68a3 100644 --- a/.github/workflows/sub_prepare.yml +++ b/.github/workflows/sub_prepare.yml @@ -46,10 +46,16 @@ on: value: ${{ jobs.Prepare.outputs.reportFile }} changedFiles: value: ${{ jobs.Prepare.outputs.changedFiles }} + changedLines: + value: ${{ jobs.Prepare.outputs.changedLines }} changedPythonFiles: value: ${{ jobs.Prepare.outputs.changedPythonFiles }} + changedPythonLines: + value: ${{ jobs.Prepare.outputs.changedPythonLines }} changedCppFiles: value: ${{ jobs.Prepare.outputs.changedCppFiles }} + changedCppLines: + value: ${{ jobs.Prepare.outputs.changedCppLines }} jobs: @@ -67,8 +73,11 @@ jobs: outputs: reportFile: ${{ steps.Init.outputs.reportFile }} changedFiles: ${{ steps.Output.outputs.changedFiles }} + changedLines: ${{ steps.Output.outputs.changedLines }} changedPythonFiles: ${{ steps.Output.outputs.changedPythonFiles }} + changedPythonLines: ${{ steps.Output.outputs.changedPythonLines }} changedCppFiles: ${{ steps.Output.outputs.changedCppFiles }} + changedCppLines: ${{ steps.Output.outputs.changedCppLines }} steps: - name: Harden the runner (Audit all outbound calls) @@ -84,6 +93,10 @@ jobs: commitCnt=0 touch ${{ env.logdir }}changedFiles.lst ${{ env.logdir }}changedCppFiles.lst ${{ env.logdir }}changedPythonFiles.lst echo "reportFile=${{ env.reportfilename }}" >> $GITHUB_OUTPUT + - name: Check out code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true - name: Determine base and head SHA in case of PR if: env.isPR == 'true' run: | @@ -134,10 +147,21 @@ jobs: echo "Changeset is composed of $commitCnt commit(s)" | tee -a ${{env.reportdir}}${{ env.reportfilename }} - name: Get files modified in changeset #TODO check what happens with deleted file in the subsequent process if: env.isPR == 'true' || env.isPush == 'true' + env: + API_URL: ${{ github.api_url }} + TOKEN: ${{ github.token }} + REPO: ${{ github.repository }} + REF: ${{ github.ref_name }} + PR: ${{ github.event.number }} run: | - jq '.files[] | if .status != "removed" then .filename else empty end' ${{ env.logdir }}compare.json > ${{ env.logdir }}changedFiles.lst - grep -E '\.(py|py3)"' ${{ env.logdir }}changedFiles.lst > ${{ env.logdir }}changedPythonFiles.lst || true - grep -E '\.(c|c\+\+|cc|cpp|cu|cuh|cxx|h|h\+\+|hh|hpp|hxx)"' ${{ env.logdir }}changedFiles.lst > ${{ env.logdir }}changedCppFiles.lst || true + # could reduce this to a single + python3 tools/lint/changed_lines.py --api-url ${API_URL} --token ${TOKEN} --repo ${REPO} --ref=${REF} --pr=${PR} > ${{ env.logdir }}changedLines.lst + cat ${{ env.logdir }}changedLines.lst | jq '.[].name' > ${{ env.logdir }}changedFiles.lst + python3 tools/lint/changed_lines.py --api-url ${API_URL} --token ${TOKEN} --repo ${REPO} --ref=${REF} --pr=${PR} --file-filter '.py, .pyi' > ${{ env.logdir }}changedPythonLines.lst + cat ${{ env.logdir }}changedPythonLines.lst | jq '.[].name' > ${{ env.logdir }}changedPythonFiles.lst + python3 tools/lint/changed_lines.py --api-url ${API_URL} --token ${TOKEN} --repo ${REPO} --ref=${REF} --pr=${PR} --file-filter '.c, .cc, .cu, .cuh, .c++, .cpp, .cxx, .h, .hh, .h++, .hpp, .hxx' > ${{ env.logdir }}changedCppLines.lst + cat ${{ env.logdir }}changedCppLines.lst | jq '.[].name' > ${{ env.logdir }}changedCppFiles.lst + # Write the report echo "::group::Modified files in changeset (removed files are ignored) :" ; cat ${{ env.logdir }}changedFiles.lst ; echo "::endgroup::" echo "
Modified files (removed files are ignored):" >> ${{env.reportdir}}${{ env.reportfilename }} @@ -148,8 +172,11 @@ jobs: id: Output run: | echo "changedFiles=$(cat ${{ env.logdir }}changedFiles.lst | tr '\n' ' ')" >> $GITHUB_OUTPUT + echo "changedLines=$(cat ${{ env.logdir }}changedLines.lst | tr '\n' ' ')" >> $GITHUB_OUTPUT echo "changedPythonFiles=$(cat ${{ env.logdir }}changedPythonFiles.lst | tr '\n' ' ')" >> $GITHUB_OUTPUT + echo "changedPythonLines=$(cat ${{ env.logdir }}changedPythonLines.lst | tr '\n' ' ')" >> $GITHUB_OUTPUT echo "changedCppFiles=$(cat ${{ env.logdir }}changedCppFiles.lst | tr '\n' ' ')" >> $GITHUB_OUTPUT + echo "changedCppLines=$(cat ${{ env.logdir }}changedCppLines.lst | tr '\n' ' ')" >> $GITHUB_OUTPUT echo "" >> $GITHUB_OUTPUT - name: Upload logs if: always() diff --git a/tools/lint/changed_lines.py b/tools/lint/changed_lines.py new file mode 100644 index 0000000000..f292e0e46b --- /dev/null +++ b/tools/lint/changed_lines.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +# Modified to generate output compatible with `clang-tidy`'s `--line-filter` option +# +# Based on https://github.com/hestonhoffman/changed-lines/blob/main/main.py +# +# Original License +# +# The MIT License (MIT) +# +# Copyright (c) 2023 Heston Hoffman +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" +Uses GitHub API to grab patch data for a PR and calculate changed lines +""" + +import argparse +import json +import os +import re +import requests + + +class MissingPatchData(Exception): + """Raised when the patch data is missing""" + + +def fetch_patch(args): + """Grabs the patch data from the GitHub API.""" + git_session = requests.Session() + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + if args.token: + headers["Authorization"] = f"Bearer {args.token}" + + git_request = git_session.get( + f"{args.api_url}/repos/{args.repo}/pulls/{args.pr}/files", headers=headers + ) + return git_request.json() + + +def parse_patch_file(entry): + """Parses the individual file changes within a patch""" + line_array = [] + sublist = [] + + patch_array = re.split("\n", entry["patch"]) + # clean patch array + patch_array = [i for i in patch_array if i] + + for item in patch_array: + # Grabs hunk annotation and strips out added lines + if item.startswith("@@ -"): + if sublist: + line_array.append(sublist) + sublist = [re.sub(r"\s@@(.*)", "", item.split("+")[1])] + # We don't need removed lines ('-') + elif not item.startswith("-") and not item == "\\ No newline at end of file": + sublist.append(item) + if sublist: + line_array.append(sublist) + return line_array + + +def parse_patch_data(patch_data): + """Takes the patch data and returns a dictionary of files and the lines""" + + final_dict = {} + for entry in patch_data: + # We don't need removed files + if entry["status"] == "removed": + continue + + # We can only operate on files with additions and a patch key + # Some really big files don't have a patch key because GitHub + # returns a message in the PR that the file is too large to display + if entry["additions"] != 0 and "patch" in entry: + line_array = parse_patch_file(entry) + final_dict[entry["filename"]] = line_array + return final_dict + + +def get_lines(line_dict): + """Takes the dictionary of files and lines and returns a dictionary of files and line numbers""" + final_dict = {} + for file_name, sublist in line_dict.items(): + line_array = [] + for array in sublist: + line_number = 0 + if "," not in array[0]: + line_number = int(array[0]) - 1 + else: + line_number = int(array[0].split(",")[0]) - 1 + + start = -1 + end = -1 + for line in array: + if line.startswith("+"): + if start < 0: + start = line_number + end = line_number + # line_array.append(line_number) + line_number += 1 + line_array.append([start, end]) + + # Remove deleted/renamed files (which appear as empty arrays) + if line_array: + final_dict[file_name] = line_array + return final_dict + + +def main(): + """main()""" + parser = argparse.ArgumentParser( + prog="changed_lines.py", + description="Identifies the changed files and lines in a GitHub PR.", + ) + parser.add_argument("--token") + parser.add_argument("--api-url", default="https://api.github.com") + parser.add_argument("--repo", default="FreeCAD/FreeCAD") + parser.add_argument("--ref", required=True) + parser.add_argument("--pr", required=True) + parser.add_argument("--file-filter", default="") + args = parser.parse_args() + + data = fetch_patch(args) + added_line_data = parse_patch_data(data) + added_lines = get_lines(added_line_data) + + if args.file_filter: + args.file_filter = set(args.file_filter.replace(" ", "").split(",")) + + filename_list = [] + line_filter = [] + for filename, _ in added_lines.items(): + if (not args.file_filter) or ( + os.path.splitext(filename)[1] in args.file_filter + ): + filename_list.append(filename) + lines_modified = {} + lines_modified["name"] = filename + lines_modified["lines"] = added_lines[filename] + line_filter.append(lines_modified) + + print(f"{json.dumps(line_filter)}") + + +if __name__ == "__main__": + main()