diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c61f10 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +gitleaks-report.json +gitleaks-report-detailed.json +temp.json diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 0000000..a42fff6 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,25 @@ +# Title for the gitleaks configuration file. +title = "Custom Gitleaks Config" + +[extend] +# useDefault will extend the base configuration with the default gitleaks config: +# https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml +useDefault = true + +# This is a global allowlist which has a higher order of precedence than rule-specific allowlists. +# If a commit listed in the `commits` field below is encountered then that commit will be skipped and no +# secrets will be detected for said commit. The same logic applies for regexes and paths. +[allowlist] +description = "Global allowlisted paths, regexes and stopwords" +paths = [ + '''\.gitleaks\.toml''', + '''\.gitleaks\.toml''', + '''gitleaks-report-detailed.json''', + '''gitleaks-report.json''' +] + +regexTarget = "match" + +regexes = [] + +stopwords = [] diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d38f6cb --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + + +## [1.0.0] - 2024-07-05 + +[1.0.0]: https://github.com/abdullahkhawer/find-and-report-secrets-in-code/releases/tag/v1.0.0 + +### Features + +- Develop a Python script and a Shell script to find secrets in a git repository using Gitleaks and to add the fingerprints and descriptions of the found secrets on an Atlassian Confluence page. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f49a4e1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5f9bc39 --- /dev/null +++ b/README.md @@ -0,0 +1,175 @@ +# Find and Report Secrets in Code + +- Founder: Abdullah Khawer (LinkedIn: https://www.linkedin.com/in/abdullah-khawer/) + +# Introduction + +A security solution that finds secrets in a git repository using Gitleaks, generates a JSON report based on the findings from Gitleaks by extracting only the relevant information, finds the commit id and commit author for each finding, updates an Atlassian Confluence page with the secrets found based on that generated report and finally sends an alert on Slack. + +❓ Where I can run this? + +👉🏻 This solution can be executed on any macOS or Linux system either locally or on a remote server. It can also be executed on a CI/CD pipeline. + +Below you can find an example of the JSON report generated: + +```json +[ + { + "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", + "File": "./code/main.py", + "Line No.": "11", + "Secret Type": "hashicorp-tf-password", + "Commit": "__REDACTED__", + "Author": "__REDACTED__" + }, + { + "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", + "File": "./code/main.conf", + "Line No.": "30", + "Secret Type": "hashicorp-tf-password", + "Commit": "__REDACTED__", + "Author": "__REDACTED__" + } +] +``` + +Note: In the actual execution, you will see the actual values instead of `__REDACTED__` values. + +Below you can find an example of the Slack notification: + +![image](https://github.com/abdullahkhawer/find-and-report-secrets-in-code/assets/27900716/fc798318-7373-4437-a205-4d71065fb2f7) + +# Usage Notes + +## Manually on a Local or Remote Server + +### Prerequisites + +Following are the prerequisites to be met once before you begin: + +- Following packages are installed on your system: + - In case of Linux, install the following packages using either `./installation/linux_install_packages.sh` script or manually: + - `git` + - `jq` + - `bash` + - `make` + - `wget` + - `python3` + - `py3-pip` + - `golang` + - `gitleaks` + - `atlassian-python-api` + - Using `pip` + - `pytz` + - Using `pip` + - `requests` + - Using `pip` + - In case of macOS, install the following packages using either `./installation/macos_install_packages.sh` script or manually: + - `git` + - `jq` + - `bash` + - `python` + - `python@3` + - `gitleaks` + - `atlassian-python-api` + - Using `pip` + - `pytz` + - Using `pip` + - `requests` + - Using `pip` + +### Execution Instructions + +Once all the prerequisites are met, set the following environment variables: + - `PATH_TO_GIT_REPO` + - Description: To keep the size of the git repository to be cloned lower to make the job faster. + - Example: `/Users/Abdullah.Khawer/Desktop/myrepo` + - Requirement: REQUIRED + - `CONFLUENCE_ENABLED` + - Description: Whether to enable reporting on Atlassian Confluence or not. + - Example: `1` + - Requirement: REQUIRED + - Possible Values: `1` or `0` + - `CONFLUENCE_SITE` + - Description: Atlassian Confluence host link. + - Example: `https://mydomain.atlassian.net` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_USER_EMAIL_ID` + - Description: Atlassian Confluence user email ID. + - Example: `myname@mydomain.com` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_USER_TOKEN` + - Description: Atlassian Confluence user token. + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_PAGE_TITLE` + - Description: Atlassian Confluence page title. + - Example: `Secrets Detected in the Git Repositories` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_PAGE_SPACE` + - Description: Atlassian Confluence page space. + - Example: `docs` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `SLACK_ENABLED` + - Description: Whether to enable notifications on Slack or not. + - Example: `1` + - Requirement: REQUIRED + - Possible Values: `1` or `0` + - `SLACK_WEBHOOK_URL` + - Description: Slack Webhook URL. + - Example: `[https://mydomain.atlassian.net](https://hooks.slack.com/services/__REDACTED__/__REDACTED__/__REDACTED__)` + - Requirement: REQUIRED (if `SLACK_ENABLED` is set to `1`) + +And then simply run the following 3 commands in the correct order: +- `bash gitleaks.sh` +- `python3 main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]` + - Example: `python3 main.py Europe/Amsterdam myproj/myrepo master` + - Note: Details about supported time zones and their constant names can be found here: [pypi.org > project > pytz > Helpers](https://pypi.org/project/pytz/#:~:text=through%20multiple%20timezones.-,Helpers,-There%20are%20two) + +## Automatically via CI/CD Pipeline + +### Setup Instructions + +In order to run it on any GitLab repository, add the following in the `.gitlab-ci.yml` file that is in the repository: + +``` +include: + - remote: 'https://raw.githubusercontent.com/abdullahkhawer/find-and-report-secrets-in-code/master/ci/.gitlab-ci.yml' + +stages: + - scan + +secrets_detection: + stage: scan + extends: + - .find-secrets:scan + variables: + CONFLUENCE_ENABLED: "1" + CONFLUENCE_SITE: $CONFLUENCE_SITE + CONFLUENCE_USER_EMAIL_ID: $CONFLUENCE_USER_EMAIL_ID + CONFLUENCE_USER_TOKEN: $CONFLUENCE_USER_TOKEN + CONFLUENCE_PAGE_TITLE: $CONFLUENCE_PAGE_TITLE + CONFLUENCE_PAGE_SPACE: $CONFLUENCE_PAGE_SPACE + SLACK_ENABLED: "1" + SLACK_WEBHOOK_URL: $SLACK_WEBHOOK_URL + retry: + max: 2 + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_REF_NAME == "master" + when: always + allow_failure: false +``` + +In the `rules` section, you specify rules for execution as `if` conditions. In the above example, the job is only allowed to execute if it is a scheduled job for the `master` branch. + +The variables referred using `$` are supposed to be created on the repository under `CI/CD Settings` page. + +The image used in this GitLab CI job is built using the Dockerfile that is present in this repository here: https://github.com/abdullahkhawer/find-and-report-secrets-in-code/tree/master/docker + +The image used is publicly available here: https://hub.docker.com/r/abdullahkhawer/find-and-report-secrets-in-code/ + +## Notes + +- A sample Gitleaks configuration file can be found here if interested in using it: `.gitleaks.toml` +- The Atlassian user should have access to the Confluence app, the `View` and `Add` permissions in the space on it and the `Can edit` permission on the page in that space. Also, you need to create an API token as the password won't work. + +#### Any contributions, improvements and suggestions will be highly appreciated. 😊 diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..0ec25f7 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +v1.0.0 diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml new file mode 100644 index 0000000..2198517 --- /dev/null +++ b/ci/.gitlab-ci.yml @@ -0,0 +1,75 @@ +# This job finds secrets in a git repository using Gitleaks, generates a JSON report based on the findings from Gitleaks +# by extracting only the relevant information, finds the commit id and commit author for each finding, updates an Atlassian +# Confluence page with the secrets found based on that generated report and finally sends an alert on Slack. + +.find-secrets:variables: + variables: + # Whether to enable reporting on Atlassian Confluence or not + CONFLUENCE_ENABLED: "0" + # Atlassian Confluence host link (e.g., https://mydomain.atlassian.net) + CONFLUENCE_SITE: "" + # Atlassian Confluence user email ID (e.g., myname@mydomain.com) + CONFLUENCE_USER_EMAIL_ID: "" + # Atlassian Confluence user token + CONFLUENCE_USER_TOKEN: "" + # Atlassian Confluence page title (e.g., "Secrets Detected in the Git Repositories") + CONFLUENCE_PAGE_TITLE: "" + # Atlassian Confluence page space (e.g., docs) + CONFLUENCE_PAGE_SPACE: "" + # Whether to enable notifications on Slack or not + SLACK_ENABLED: "0" + # Slack Webhook URL (e.g., https://hooks.slack.com/services/__REDACTED__/__REDACTED__/__REDACTED__) + SLACK_WEBHOOK_URL: "" + +.find-secrets:scan: + stage: scan + extends: + - .find-secrets:variables + image: abdullahkhawer/find-and-report-secrets-in-code:1.0.0 + before_script: + - | + if [ -n "$CONFLUENCE_ENABLED" ] && [ "$CONFLUENCE_ENABLED" -eq 1 ]; then + if [ -z "$CONFLUENCE_SITE" ]; then + echo "CONFLUENCE_SITE is empty or null. Exiting with error." + exit 1 + fi + + if [ -z "$CONFLUENCE_USER_EMAIL_ID" ]; then + echo "CONFLUENCE_USER_EMAIL_ID is empty or null. Exiting with error." + exit 1 + fi + + if [ -z "$CONFLUENCE_USER_TOKEN" ]; then + echo "CONFLUENCE_USER_TOKEN is empty or null. Exiting with error." + exit 1 + fi + + if [ -z "$CONFLUENCE_PAGE_TITLE" ]; then + echo "CONFLUENCE_PAGE_TITLE is empty or null. Exiting with error." + exit 1 + fi + + if [ -z "$CONFLUENCE_PAGE_SPACE" ]; then + echo "CONFLUENCE_PAGE_SPACE is empty or null. Exiting with error." + exit 1 + fi + fi + + if [ -n "$SLACK_ENABLED" ] && [ "$SLACK_ENABLED" -eq 1 ]; then + if [ -z "$SLACK_WEBHOOK_URL" ]; then + echo "SLACK_WEBHOOK_URL is empty or null. Exiting with error." + exit 1 + fi + fi + - git fetch origin $CI_COMMIT_BRANCH + script: + - export PATH=$PATH:/usr/local/gitleaks + - export PATH_TO_GIT_REPO=$(pwd) + - export REPO_NAME=$(echo "$CI_PROJECT_DIR" | sed 's|/builds/||') + - cd /find-and-report-secrets-in-code/ + - bash ./gitleaks.sh + - python3 main.py "Europe/Amsterdam" $REPO_NAME $CI_COMMIT_BRANCH $CI_JOB_URL/artifacts/raw/gitleaks-report.json + - cp ./gitleaks-report.json $PATH_TO_GIT_REPO/gitleaks-report.json + artifacts: + paths: + - gitleaks-report.json diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..651f367 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,5 @@ +FROM --platform=linux/amd64 alpine:3.20.0 + +COPY installation/linux_install_packages.sh .gitleaks.toml gitleaks.sh main.py ./find-and-report-secrets-in-code/ + +RUN sh ./find-and-report-secrets-in-code/linux_install_packages.sh diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..2dd8985 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,144 @@ +# Find and Report Secrets in Code + +- Founder: Abdullah Khawer (LinkedIn: https://www.linkedin.com/in/abdullah-khawer/) + +# Introduction + +This repository has a Docker image that finds secrets in a git repository using Gitleaks, generates a JSON report based on the findings from Gitleaks by extracting only the relevant information, finds the commit id and commit author for each finding, updates an Atlassian Confluence page with the secrets found based on that generated report and finally sends an alert on Slack. + +❓ Where I can run this? + +👉🏻 This Docker image can be executed on any Windows, macOS or Linux system either locally or on a remote server. It can also be executed on a CI/CD pipeline. + +Below you can find an example of the JSON report generated: + +```json +[ + { + "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", + "File": "./code/main.py", + "Line No.": "11", + "Secret Type": "hashicorp-tf-password", + "Commit": "__REDACTED__", + "Author": "__REDACTED__" + }, + { + "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", + "File": "./code/main.conf", + "Line No.": "30", + "Secret Type": "hashicorp-tf-password", + "Commit": "__REDACTED__", + "Author": "__REDACTED__" + } +] +``` + +Note: In the actual execution, you will see the actual values instead of `__REDACTED__` values. + +Below you can find an example of the Slack notification: + +![image](https://github.com/abdullahkhawer/find-and-report-secrets-in-code/assets/27900716/fc798318-7373-4437-a205-4d71065fb2f7) + +# Usage Notes + +## Manually on a Local or Remote Server + +### Execution Instructions + +Set the following environment variables: + - `PATH_TO_GIT_REPO` + - Description: To keep the size of the git repository to be cloned lower to make the job faster. + - Example: `/Users/Abdullah.Khawer/Desktop/myrepo` + - Requirement: REQUIRED + - `CONFLUENCE_ENABLED` + - Description: Whether to enable reporting on Atlassian Confluence or not. + - Example: `1` + - Requirement: REQUIRED + - Possible Values: `1` or `0` + - `CONFLUENCE_SITE` + - Description: Atlassian Confluence host link. + - Example: `https://mydomain.atlassian.net` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_USER_EMAIL_ID` + - Description: Atlassian Confluence user email ID. + - Example: `myname@mydomain.com` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_USER_TOKEN` + - Description: Atlassian Confluence user token. + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_PAGE_TITLE` + - Description: Atlassian Confluence page title. + - Example: `Secrets Detected in the Git Repositories` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `CONFLUENCE_PAGE_SPACE` + - Description: Atlassian Confluence page space. + - Example: `docs` + - Requirement: REQUIRED (if `CONFLUENCE_ENABLED` is set to `1`) + - `SLACK_ENABLED` + - Description: Whether to enable notifications on Slack or not. + - Example: `1` + - Requirement: REQUIRED + - Possible Values: `1` or `0` + - `SLACK_WEBHOOK_URL` + - Description: Slack Webhook URL. + - Example: `[https://mydomain.atlassian.net](https://hooks.slack.com/services/__REDACTED__/__REDACTED__/__REDACTED__)` + - Requirement: REQUIRED (if `SLACK_ENABLED` is set to `1`) + +And then simply run the following 4 commands: +- `docker run --platform linux/amd64 -it -e PATH_TO_GIT_REPO=/git_repo -e CONFLUENCE_ENABLED=1 -e CONFLUENCE_SITE=$CONFLUENCE_SITE -e CONFLUENCE_USER_EMAIL_ID=$CONFLUENCE_USER_EMAIL_ID -e CONFLUENCE_USER_TOKEN=$CONFLUENCE_USER_TOKEN -e CONFLUENCE_PAGE_TITLE=$CONFLUENCE_PAGE_TITLE -e CONFLUENCE_PAGE_SPACE=$CONFLUENCE_PAGE_SPACE -e SLACK_ENABLED=1 -e SLACK_WEBHOOK_URL=$SLACK_WEBHOOK_URL -v $PATH_TO_GIT_REPO:/git_repo abdullahkhawer/find-and-report-secrets-in-code:latest` +- `export PATH=$PATH:/usr/local/gitleaks` +- `bash /find-and-report-secrets-in-code/gitleaks.sh` +- `python3 /find-and-report-secrets-in-code/main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]` + - Example: `python3 /find-and-report-secrets-in-code/main.py Europe/Amsterdam myproj/myrepo master` + - Note: Details about supported time zones and their constant names can be found here: [pypi.org > project > pytz > Helpers](https://pypi.org/project/pytz/#:~:text=through%20multiple%20timezones.-,Helpers,-There%20are%20two) + +## Automatically via CI/CD Pipeline + +### Setup Instructions + +In order to run it on any GitLab repository, add the following in the `.gitlab-ci.yml` file that is in the repository: + +``` +include: + - remote: 'https://raw.githubusercontent.com/abdullahkhawer/find-and-report-secrets-in-code/master/ci/.gitlab-ci.yml' + +stages: + - scan + +secrets_detection: + stage: scan + extends: + - .find-secrets:scan + variables: + CONFLUENCE_ENABLED: "1" + CONFLUENCE_SITE: $CONFLUENCE_SITE + CONFLUENCE_USER_EMAIL_ID: $CONFLUENCE_USER_EMAIL_ID + CONFLUENCE_USER_TOKEN: $CONFLUENCE_USER_TOKEN + CONFLUENCE_PAGE_TITLE: $CONFLUENCE_PAGE_TITLE + CONFLUENCE_PAGE_SPACE: $CONFLUENCE_PAGE_SPACE + SLACK_ENABLED: "1" + SLACK_WEBHOOK_URL: $SLACK_WEBHOOK_URL + retry: + max: 2 + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_REF_NAME == "master" + when: always + allow_failure: false +``` + +In the `rules` section, you specify rules for execution as `if` conditions. In the above example, the job is only allowed to execute if it is a scheduled job for the `master` branch. + +The variables referred using `$` are supposed to be created on the repository under `CI/CD Settings` page. + +The image used in this GitLab CI job is built using the Dockerfile that is present in a repository here: `https://github.com/abdullahkhawer/find-and-report-secrets-in-code/blob/master/docker/Dockerfile` + +The image used is this one which is publicly available. + +## Notes + +- A sample Gitleaks configuration file can be found here if interested in using it: `https://github.com/abdullahkhawer/find-and-report-secrets-in-code/blob/master/.gitleaks.toml` +- The Atlassian user should have access to the Confluence app, the `View` and `Add` permissions in the space on it and the `Can edit` permission on the page in that space. Also, you need to create an API token as the password won't work. + +For more details, check the following repository on GitHub: https://github.com/abdullahkhawer/find-and-report-secrets-in-code/ + +#### Any contributions, improvements and suggestions will be highly appreciated. 😊 diff --git a/gitleaks.sh b/gitleaks.sh new file mode 100644 index 0000000..254028b --- /dev/null +++ b/gitleaks.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +echo "Script Execution Started!" + +# remove Gitleaks reports if they exist already +echo "Removing Gitleaks reports if they exist already..." +rm -rf ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json +rm -rf ./gitleaks-report.json + +# run Gitleaks to find secrets and generate a detailed report in JSON for the secrets found +echo "Running Gitleaks to find secrets and generate a detailed report in JSON for the secrets found..." +gitleaks detect -r ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json -f json -s ${PATH_TO_GIT_REPO} --redact --no-git + +# create a final report in JSON using the detailed report having relevant information only +echo "Creating a final report in JSON using the detailed report having relevant information only..." +echo "[" > ./gitleaks-report.json +cat ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json | jq -c '.[]' | while read -r line; do + description=$(jq -r '.Description' <<< "$line") + start_line=$(jq -r '.StartLine' <<< "$line") + file=$(jq -r '.File' <<< "$line") + file=$(echo "$file" | sed "s|^${PATH_TO_GIT_REPO}|.|") + secret_type=$(jq -r '.RuleID' <<< "$line") + + # use 'git blame' to find the commit id and author for each finding + blame=$(cd ${PATH_TO_GIT_REPO} && git blame -L "$start_line","$start_line" "$file" --porcelain) + commit_id=$(echo "$blame" | awk 'NR==1' | awk -F ' ' '{print $1}') + author=$(echo "$blame" | awk 'NR==2' | awk -F 'author ' '{print $2}') + + # append final JSON objects to the new report + jq -n \ + --arg desc "$description" \ + --arg file "$file" \ + --arg line_no "$start_line" \ + --arg type "$secret_type" \ + --arg commit "$commit_id" \ + --arg author "$author" \ + '{"Description": $desc, "File": $file, "Line No.": $line_no, "Secret Type": $type, "Commit": $commit, "Author": $author}' >> ./gitleaks-report.json + + echo "," >> ./gitleaks-report.json +done +head -n $(($(wc -l < ./gitleaks-report.json) - 1)) ./gitleaks-report.json > ./temp.json && mv ./temp.json ./gitleaks-report.json +echo "]" >> ./gitleaks-report.json +cat ./gitleaks-report.json | jq > ./temp.json && mv ./temp.json ./gitleaks-report.json + +echo "Script Execution Completed!" diff --git a/installation/linux_install_packages.sh b/installation/linux_install_packages.sh new file mode 100755 index 0000000..710f4f5 --- /dev/null +++ b/installation/linux_install_packages.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -ex + +# update package lists +apk update + +# install Git, jq, Bash, Make, wget, Python3, pip, Go (Golang) +apk add git jq bash make wget python3 py3-pip go +git --version && jq --version && bash --version && make --version && wget --version && python3 --version && pip --version && go version + +# install "Python Atlassian REST API Wrapper", "World timezone definitions, modern and historical" and "Requests" Python libraries +pip install atlassian-python-api pytz requests --break-system-packages + +# install Gitleaks +rm -rf /usr/local/gitleaks && git clone https://github.com/gitleaks/gitleaks.git /usr/local/gitleaks +cd /usr/local/gitleaks +make build +cd / +export PATH=$PATH:/usr/local/gitleaks +echo -n "gitleaks " && gitleaks version + +echo "Installation completed successfully." diff --git a/installation/macos_install_packages.sh b/installation/macos_install_packages.sh new file mode 100755 index 0000000..1aa6675 --- /dev/null +++ b/installation/macos_install_packages.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -ex + +# install Git, jq, Bash, pip, Python3, Gitleaks +brew install git jq bash python python@3 gitleaks +git --version && jq --version && bash --version && pip --version && python3 --version && echo -n "gitleaks " && gitleaks version + +# install "Python Atlassian REST API Wrapper", "World timezone definitions, modern and historical" and "Requests" Python libraries +pip install atlassian-python-api pytz requests || pip install atlassian-python-api pytz requests --break-system-packages + +echo "Installation completed successfully." diff --git a/main.py b/main.py new file mode 100644 index 0000000..cbd7a53 --- /dev/null +++ b/main.py @@ -0,0 +1,223 @@ +import json +import os +import pytz +import re +import requests +import sys +from atlassian import Confluence +from datetime import datetime + +# get time zone, repository name and branch name from the arguments passed to the script +if len(sys.argv) < 4 or len(sys.argv) > 5: + print("ERROR: Invalid arguments passed.") + print("Usage: python main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]") + print("Example: python main.py Europe/Amsterdam myproj/myrepo master") + sys.exit(1) +time_zone = sys.argv[1] +repo_name = sys.argv[2] +branch_name = sys.argv[3] +json_report_url = "" +if len(sys.argv) == 5: + json_report_url = sys.argv[4] + +# Get the current time in UTC and convert it into the desired time zone's time +time_now = datetime.now() +target_timezone = pytz.timezone(time_zone) +time_now = time_now.astimezone(target_timezone) +time_now = time_now.strftime('%Y-%m-%d %H:%M:%S %Z') + +# get environment variables related to Confluence +confluence_enabled = os.getenv("CONFLUENCE_ENABLED") +if confluence_enabled is None: + print("ERROR: CONFLUENCE_ENABLED environment variable is not set.") + sys.exit(1) +elif confluence_enabled == "1": + confluence_site = os.getenv("CONFLUENCE_SITE") + confluence_user = os.getenv("CONFLUENCE_USER_EMAIL_ID") + confluence_pass = os.getenv("CONFLUENCE_USER_TOKEN") + page_title = os.getenv("CONFLUENCE_PAGE_TITLE") + page_space = os.getenv("CONFLUENCE_PAGE_SPACE") + if confluence_site is None: + print("ERROR: CONFLUENCE_SITE environment variable is not set.") + sys.exit(1) + if confluence_user is None: + print("ERROR: CONFLUENCE_USER_EMAIL_ID environment variable is not set.") + sys.exit(1) + if confluence_pass is None: + print("ERROR: CONFLUENCE_USER_TOKEN environment variable is not set.") + sys.exit(1) + if page_title is None: + print("ERROR: CONFLUENCE_PAGE_TITLE environment variable is not set.") + sys.exit(1) + if page_space is None: + print("ERROR: CONFLUENCE_PAGE_SPACE environment variable is not set.") + sys.exit(1) + +# get environment variables related to Slack +slack_enabled = os.getenv("SLACK_ENABLED") +if slack_enabled is None: + print("ERROR: SLACK_ENABLED environment variable is not set.") + sys.exit(1) +elif slack_enabled == "1": + slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL") + if slack_webhook_url is None: + print("ERROR: SLACK_WEBHOOK_URL environment variable is not set.") + sys.exit(1) + +# define HTML page template +if confluence_enabled == "1": + html_template = """ +

Repository: {} - Branch: {}

+

Last Scan Time: {}

+

Secrets Found: {}

+ + + + + + + + + + + {} + +
+ +

Description

+
+

File

+
+

Line No.

+
+

Secret Type

+
+

Commit ID

+
+

Commit Author

+
+ """ + +# define HTML row template +if confluence_enabled == "1": + row_template = """ + + + {} + + +

{}

+ + +

{}

+ + +

{}

+ + +

{}

+ + +

{}

+ + +

{}

+ + + """ + +# connect to Confluence +if confluence_enabled == "1": + confluence = Confluence(url=confluence_site, username=confluence_user, password=confluence_pass) + +# resolve page ID +if confluence_enabled == "1": + page_id = confluence.get_page_id(page_space, page_title) + +# get current page content +if confluence_enabled == "1": + page = confluence.get_page_by_id(page_id, expand='body.storage') + page_content = page['body']['storage']['value'] + +# read JSON from file +with open("./gitleaks-report.json", "r") as file: + data = json.load(file) + +# update HTML page template and find unique commit authors from the data read from JSON file +authors = [] +rows = "" +rows_count = 1 +for entry in data: + author = entry["Author"] + if confluence_enabled == "1": + description = entry["Description"] + file = entry["File"] + line_no = entry["Line No."] + secret_type = entry["Secret Type"] + commit = entry["Commit"] + rows += row_template.format(rows_count, description, file, line_no, secret_type, commit, author) + if slack_enabled == "1": + authors.append(author) + rows_count = rows_count + 1 +if confluence_enabled == "1": + html_template = html_template.format(repo_name, branch_name, time_now, len(data), rows) +if slack_enabled == "1": + authors = list(set(authors)) + authors.sort() + +# define the pattern to replace the respective div +if confluence_enabled == "1": + pattern = r'

Repository: {} - Branch: {}.*?'.format(repo_name, branch_name) + +# Check if pattern is found or not and update the page content accordingly +if confluence_enabled == "1": + new_page_content = page_content + if re.search(pattern, page_content, flags=re.DOTALL): + # pattern found; replace matching pattern in the existing content with the new HTML page template + new_page_content = re.sub(pattern, html_template, page_content, flags=re.DOTALL) + else: + # pattern not found; add the new HTML page template at the end of the existing content + new_page_content = new_page_content + "\n" + html_template + +# update page with new content +if confluence_enabled == "1": + try: + confluence.update_page(page_id, page_title, new_page_content, type='page', representation='storage', minor_edit=False, full_width=True) + print("Confluence page updated successfully.") + except Exception as err: + print("ERROR: Failed to update Confluence page.") + print(f'ERROR: {err}') + sys.exit(1) + +# send notification to Slack +if slack_enabled == "1": + message = "*Secrets Detection Notification*" + message += f'\n>:file_folder: *Repository:* `{repo_name}`' + message += f'\n>:git: *Branch:* `{branch_name}`' + message += f'\n>:clock1: *Last Scan Time:* `{time_now}`' + message += f'\n>:warning: *Secrets Found:* `{len(data)}`' + message += f'\n>:technologist: *Commit Authors:* \n>• *{'*\n>• *'.join(authors)}*' + if confluence_enabled == "1": + message += f'\n:link: More details can be found here: <{confluence_site}/wiki/spaces/{page_space}/pages/{page_id}/{page_title}|Confluence Page>' + if json_report_url != "": + message += f'\n:link: JSON report can be found here: <{json_report_url}|JSON Report>' + slack_data = { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": message + } + } + ] + } + headers = {'Content-Type': "application/json"} + try: + response = requests.post(slack_webhook_url, data=json.dumps(slack_data), headers=headers) + response.raise_for_status() + print("Notification sent to Slack successfully.") + except Exception as err: + print("ERROR: Failed to send notification to Slack.") + print(f'ERROR: {err}') + sys.exit(1)