From 65d8187595a31602c40b35fa50a1ccf15c0daf14 Mon Sep 17 00:00:00 2001
From: krcroft <krcroft@users.noreply.github.com>
Date: Tue, 28 Jan 2020 11:40:53 -0800
Subject: [PATCH] Test more criteria during PVS analysis

This commit:

- Adds a separate analysis run against the MIRSA (Motor Industry
  Software Reliability Association) criteria, which is extremely
  thorough. This tally is not summarized or considered fatal to the
  workflow. It runs virtually instantly, and the results are very
  interesting; however are too numerous to include in our general
  analysis (ie: over 13,000 issues).

- Changes the PVS summary script output to a tally-per-file instead
  of trying to summarize the nature of the issue, which was mostly
  unhelpful without the full text.

- Adds the full list of possible supressible issue to the report
  directory, so if further suppressions are needed then these will be
  easy to find and use.

- Adds one dr_flac suppression per the resolution here:
  mackron/dr_libs#117
---
 .github/workflows/analysis.yml | 27 ++++++++++++++++---------
 .pvs-suppress                  |  8 ++++++++
 scripts/count-pvs-bugs.py      | 36 +++++++++++++++-------------------
 3 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
index 162cb498..1f062a07 100644
--- a/.github/workflows/analysis.yml
+++ b/.github/workflows/analysis.yml
@@ -103,22 +103,31 @@ jobs:
       - name: Analyze
         run:  |
           set -xeu
-          pvs-studio-analyzer analyze -s .pvs-suppress -o pvs-analysis.log -j "$(nproc)"
-          criteria="GA:1,2;64:1;OP:1,2,3;CS:1;MISRA:1,2"
-          plog-converter -a "${criteria}" -d V1042 -t csv -o pvs-report.csv pvs-analysis.log
-          mkdir -p pvs-analysis-report
+          log="pvs-analysis.log"
+          mirsa_criteria="MISRA:1,2"
+          general_criteria="GA:1,2;64:1;OP:1,2,3;CS:1"
           stamp="$(date +'%Y-%m-%d_T%H%M')-${GITHUB_SHA:0:8}"
-          plog-converter -a "${criteria}" -d V1042 -t fullhtml -p dosbox-staging \
-          -v "${GITHUB_SHA:0:8}" -o "pvs-analysis-report/pvs-analysis-report-${stamp}" \
-          pvs-analysis.log
+          reportdir="pvs-report/pvs-report-${stamp}"
+          mkdir -p "${reportdir}"
+          pvs-studio-analyzer analyze -a 63 -s .pvs-suppress -o "${log}" -j "$(nproc)"
+          plog-converter -a "${general_criteria}" -d V1042 -t fullhtml -p dosbox-staging \
+          -v "${GITHUB_SHA:0:8}" -o "${reportdir}" "${log}"
+          mv "${reportdir}/fullhtml" "${reportdir}/general"
+          plog-converter -a "${mirsa_criteria}" -d V1042 -t fullhtml -p dosbox-staging \
+          -v "${GITHUB_SHA:0:8}" -o "${reportdir}" "${log}"
+          mv "${reportdir}/fullhtml" "${reportdir}/mirsa"
+          plog-converter -a "${general_criteria}" -d V1042 -t csv -o pvs-report.csv "${log}"
+          cp -l pvs-report.csv "${reportdir}/general/"
+          pvs-studio-analyzer suppress -a "${general_criteria}" \
+          -o "${reportdir}/general/supressible-list.json" "${log}"
       - name: Upload report
         uses: actions/upload-artifact@master
         with:
           name: pvs-analysis-report
-          path: pvs-analysis-report
+          path: pvs-report
       - name: Summarize report
         env:
-          MAX_BUGS: 304
+          MAX_BUGS: 627
         run: |
           echo "Full report is included in build Artifacts"
           echo
diff --git a/.pvs-suppress b/.pvs-suppress
index 78e1196c..92f28451 100644
--- a/.pvs-suppress
+++ b/.pvs-suppress
@@ -33,6 +33,14 @@
             "FileName": "xxhash.c",
             "Message": "A call of the 'memcpy' function will lead to underflow of the buffer '& state'."
         },
+        {
+            "CodeCurrent": 3039254062,
+            "CodeNext": 3404253786,
+            "CodePrev": 3282303307,
+            "ErrorCode": "V560",
+            "FileName": "dr_flac.h",
+            "Message": "A part of conditional expression is always true: blockSize >= _."
+        },
         {
             "CodeCurrent": 2009695132,
             "CodeNext": 17733,
diff --git a/scripts/count-pvs-bugs.py b/scripts/count-pvs-bugs.py
index f8afe5d5..e41164e4 100755
--- a/scripts/count-pvs-bugs.py
+++ b/scripts/count-pvs-bugs.py
@@ -25,20 +25,20 @@ import sys
 
 def parse_issues(filename):
     """
-    Returns a dict of int keys and a list of string values, where the:
-    - keys are V### PVS-Studio error codes
-    - values are the message of the issue as found in a specific file
+    Returns a dict of source filename keys having occurrence-count values
 
     """
-    issues = collections.defaultdict(list)
+    cwd = os.getcwd()
+    issues = collections.defaultdict(int)
     with open(filename) as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
-            full = row['ErrorCode'] # extract the full code as an URL string
-            code = full[full.rfind('V'):full.rfind('"')] # get the trailing "V###" code
-            if code.startswith('V'):
-                # Convert the V### string into an integer for easy sorting
-                issues[int(code[1:])].append(row['Message'])
+            sourcefile = os.path.realpath(row['FilePath'])
+            # Skip non-file lines
+            if not sourcefile.startswith('/'):
+                continue
+            sourcefile = os.path.relpath(sourcefile, cwd)
+            issues[sourcefile] += 1
     return issues
 
 
@@ -48,20 +48,16 @@ def main(argv):
 
     # Get the issues and the total tally
     issues = parse_issues(argv[1])
-    tally = sum(len(messages) for messages in issues.values())
+    tally = sum(issues.values())
 
     if tally > 0:
-        # Step through the codes and summarize
-        print("Issues are tallied and sorted by code:\n")
-        print("   code | issue-string in common to all instances       | tally")
-        print("  -----   ---------------------------------------------   -----")
+        # find the longest source filename
+        longest_name = max(len(sourcefile) for sourcefile in issues.keys())
+        # Print the source filenames and their issue counts
+        print("Sorted by issue count:\n")
 
-    for code in sorted(issues.keys()):
-        messages = issues[code]
-        in_common = os.path.commonprefix(messages)[:45]
-        if len(in_common.split(' ')) < 4:
-            in_common = 'N/A (too little in-common between issues)'
-        print(f'  [{code:4}]  {in_common:45} : {len(messages)}')
+        for sourcefile in sorted(issues, key=issues.get, reverse=True):
+            print(f'  {sourcefile:{longest_name}} : {issues[sourcefile]}')
 
     # Print the tally against the desired maximum
     if len(sys.argv) == 3: