Add clang-tidy yaml report converter

Currently clang-tidy reports are parsed and converted to plist files from the clang-tidy standard outputs. This change adds a new report converter that uses clang-tidy's yaml outputs as inputs for parsing and creating plist files.
Ericsson · Sep 19, 2024 · c4e8a95 · c4e8a95
1 parent b049040
commit c4e8a95
Show file tree

Hide file tree

Showing 22 changed files with 1,447 additions and 15 deletions.
diff --git a/analyzer/codechecker_analyzer/analyzers/clangtidy/result_handler.py b/analyzer/codechecker_analyzer/analyzers/clangtidy/result_handler.py
@@ -9,11 +9,10 @@
 Result handler for Clang Tidy.
 """
 
+import os
 from typing import Optional
 
-from codechecker_report_converter.analyzers.clang_tidy.analyzer_result import \
-    AnalyzerResult
-from codechecker_report_converter.analyzers.clang_tidy.parser import Parser
+
 from codechecker_report_converter.report.parser.base import AnalyzerInfo
 from codechecker_report_converter.report import report_file
 from codechecker_report_converter.report.hash import get_report_hash, HashType
@@ -24,6 +23,23 @@
 
 from ..result_handler_base import ResultHandler
 
+# For testing purposes only - change bool to change parser
+yaml = True
+
+if not yaml:
+    from codechecker_report_converter.analyzers.clang_tidy.analyzer_result \
+        import (AnalyzerResult)
+    from codechecker_report_converter.analyzers.clang_tidy.parser import (
+        Parser)
+    print('------Tidy STDOUT------')
+else:
+    from codechecker_report_converter.analyzers.clang_tidy_yaml.analyzer_result \
+        import (AnalyzerResult)
+    from codechecker_report_converter.analyzers.clang_tidy_yaml.parser \
+        import (Parser)
+    print('------Tidy YAML------')
+
+
 LOG = get_logger('report')
 
 
@@ -47,10 +63,21 @@ def postprocess_result(
         into the database.
         """
         LOG.debug_analyzer(self.analyzer_stdout)
-        tidy_stdout = self.analyzer_stdout.splitlines()
 
-        reports = Parser().get_reports_from_iter(tidy_stdout)
-        reports = [r for r in reports if not r.skip(skip_handlers)]
+        # For testing purposes only
+
+        if not yaml:
+            tidy_stdout = self.analyzer_stdout.splitlines()
+
+            reports = Parser().get_reports_from_iter(tidy_stdout)
+            reports = [r for r in reports if not r.skip(skip_handlers)]
+
+        else:
+            '''if not os.path.exists(self.fixit_file):
+                print("no clang-tidy findings")
+                with open(self.fixit_file, 'w') as f:
+                    f.write("")'''
+            reports = Parser().get_reports(self.fixit_file)
 
         # In the earlier versions of CodeChecker Clang Tidy never used context
         # free hash even if we enabled it with '--report-hash context-free'

diff --git a/tools/report-converter/codechecker_report_converter/analyzers/clang_tidy_yaml/__init__.py b/tools/report-converter/codechecker_report_converter/analyzers/clang_tidy_yaml/__init__.py
@@ -0,0 +1,7 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
diff --git a/...eport-converter/codechecker_report_converter/analyzers/clang_tidy_yaml/analyzer_result.py b/...eport-converter/codechecker_report_converter/analyzers/clang_tidy_yaml/analyzer_result.py
@@ -0,0 +1,26 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+
+from typing import List
+
+from codechecker_report_converter.report import Report
+
+from ..analyzer_result import AnalyzerResultBase
+from .parser import Parser
+
+
+class AnalyzerResult(AnalyzerResultBase):
+    """ Transform analyzer result of Clang Tidy. """
+
+    TOOL_NAME = 'clang-tidy-yaml'
+    NAME = 'Clang Tidy'
+    URL = 'https://clang.llvm.org/extra/clang-tidy'
+
+    def get_reports(self, file_path: str) -> List[Report]:
+        """ Get reports from the given analyzer result. """
+        return Parser().get_reports(file_path)
diff --git a/tools/report-converter/codechecker_report_converter/analyzers/clang_tidy_yaml/parser.py b/tools/report-converter/codechecker_report_converter/analyzers/clang_tidy_yaml/parser.py
@@ -0,0 +1,141 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+
+import os
+import yaml
+import logging
+from typing import Iterator, List, Tuple, Optional
+
+from codechecker_report_converter.report import BugPathEvent, \
+    get_or_create_file, Report
+
+from ..parser import BaseParser
+
+LOG = logging.getLogger('report-converter')
+
+
+def get_location_by_offset(filename, offset):
+    """
+    This function returns the line and column number in the given file which
+    is located at the given offset (i.e. number of characters including new
+    line characters). None returns when the offset is greater than the file
+    length.
+    """
+    with open(filename, encoding='utf-8', errors='ignore') as f:
+        for row, line in enumerate(f, 1):
+            length = len(line)
+            if length < offset:
+                offset -= length
+            else:
+                return row, offset + 1
+
+    return None
+
+
+class Parser(BaseParser):
+    """Parser for clang-tidy YAML output."""
+
+    def get_reports(self, file_path: str) -> List[Report]:
+        """Parse Clang-Tidy's YAML output file."""
+        with open(file_path, 'r', encoding='utf-8') as file:
+            data = yaml.safe_load(file)
+
+        reports = []
+        if data:
+            for diagnostic in data['Diagnostics']:
+                report = self._parse_diagnostic(diagnostic)
+                if report is not None:
+                    reports.append(report)
+        return reports
+
+    def _parse_diagnostic(self, diagnostic: dict) -> Optional[Report]:
+        """Parse a Clang-Tidy diagnostic."""
+        checker_name = diagnostic['DiagnosticName']
+        diagnostic_message = diagnostic['DiagnosticMessage']
+        file_path = os.path.abspath(diagnostic_message['FilePath'])
+        file_obj = get_or_create_file(file_path, self._file_cache)
+        line, col = get_location_by_offset(
+            file_path, diagnostic_message['FileOffset'])
+
+        report = Report(
+            file=file_obj,
+            line=line,
+            column=col,
+            message=diagnostic_message['Message'].strip(),
+            checker_name=checker_name,
+            category=self._get_category(checker_name),
+            bug_path_events=[]
+        )
+
+        # Parse replacements (fixits) (if any)
+        if 'Replacements' in diagnostic_message:
+            for replacement in diagnostic_message['Replacements']:
+                replacement_path = os.path.abspath(replacement['FilePath'])
+                replacement_file_obj = get_or_create_file(replacement_path,
+                                                          self._file_cache)
+                fixit_line, fixit_col = get_location_by_offset(
+                    replacement_path, replacement['Offset'])
+                report.notes.append(
+                    BugPathEvent(
+                        f"{replacement['ReplacementText']} (fixit)",
+                        replacement_file_obj,
+                        fixit_line,
+                        fixit_col
+                    )
+                )
+
+        # Parse notes (if any)
+        if 'Notes' in diagnostic:
+            for note in diagnostic['Notes']:
+                if note['FilePath'] != '':
+                    note_path = os.path.abspath(note['FilePath'])
+                    note_line, note_col = get_location_by_offset(
+                        note_path, note['FileOffset'])
+                    note_file_obj = get_or_create_file(note_path,
+                                                       self._file_cache)
+                    report.bug_path_events.append(
+                        BugPathEvent(
+                            note['Message'].strip(),
+                            note_file_obj,
+                            note_line,
+                            note_col
+                        )
+                    )
+                else:
+                    report.notes.append(
+                        BugPathEvent(
+                            note['Message'].strip(),
+                            file_obj,
+                            line,
+                            col
+                        )
+                    )
+
+        if not report.bug_path_events:
+            report.bug_path_events.append(BugPathEvent(
+                report.message,
+                report.file,
+                report.line,
+                report.column))
+
+        report.bug_path_events = report.bug_path_events[::-1]
+
+        return report
+
+    def _get_category(self, checker_name: str) -> str:
+        """ Get category for Clang-Tidy checker. """
+        parts = checker_name.split('-')
+        return parts[0] if parts else 'unknown'
+
+    def _parse_line(self, it: Iterator[str], line: str) -> Tuple[
+            List[Report], str]:
+        # FIXME: This method is a placeholder to allow instantiation of the
+        #  Parser class.
+        # The _parse_line method is required because Parser is an abstract
+        # class that expects this method to be implemented in subclasses.
+        return [], ""
diff --git a/tools/report-converter/requirements_py/dev/requirements.txt b/tools/report-converter/requirements_py/dev/requirements.txt
@@ -4,3 +4,6 @@ pycodestyle==2.12.0
 pylint==3.2.4
 portalocker==2.2.1
 mypy==1.7.1
+setuptools==70.2.0
+PyYAML==6.0.1
+types-PyYAML==6.0.12
diff --git a/tools/report-converter/tests/unit/analyzers/test_clang_tidy_yaml_parser.py b/tools/report-converter/tests/unit/analyzers/test_clang_tidy_yaml_parser.py
@@ -0,0 +1,115 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+
+"""
+This module tests the correctness of the OutputParser and PListConverter, which
+used in sequence transform a Clang Tidy output file to a plist file.
+"""
+
+
+import os
+import plistlib
+import shutil
+import tempfile
+import unittest
+
+from codechecker_report_converter.analyzers.clang_tidy_yaml import (
+    analyzer_result)
+from codechecker_report_converter.report.parser import plist
+
+
+OLD_PWD = None
+
+
+def setup_module():
+    """Setup the test tidy reprs for the test classes in the module."""
+    global OLD_PWD
+    OLD_PWD = os.getcwd()
+    os.chdir(os.path.join(os.path.dirname(__file__),
+                          'tidy_yaml_output_test_files'))
+
+
+def teardown_module():
+    """Restore environment after tests have ran."""
+    global OLD_PWD
+    os.chdir(OLD_PWD)
+
+
+class ClangTidyYamlAnalyzerResultTestCase(unittest.TestCase):
+    """ Test the output of the ClangTidyYamlAnalyzerResult. """
+
+    def setUp(self):
+        """ Setup the test. """
+        self.analyzer_result = analyzer_result.AnalyzerResult()
+        self.cc_result_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """ Clean temporary directory. """
+        shutil.rmtree(self.cc_result_dir)
+
+    def __check_analyzer_result(self, analyzer_result, analyzer_result_plist,
+                                source_files, expected_plist):
+        """ Check the result of the analyzer transformation. """
+        self.analyzer_result.transform(
+            [analyzer_result], self.cc_result_dir, plist.EXTENSION,
+            file_name="{source_file}_{analyzer}")
+
+        plist_file = os.path.join(self.cc_result_dir, analyzer_result_plist)
+        with open(plist_file, mode='rb') as pfile:
+            res = plistlib.load(pfile)
+
+            # Use relative path for this test.
+            res['files'] = source_files
+            print("res")
+            print(res)
+
+        with open(expected_plist, mode='rb') as pfile:
+            exp = plistlib.load(pfile)
+
+            self.assertTrue(res['metadata']['generated_by']['version'])
+            res['metadata']['generated_by']['version'] = "x.y.z"
+            print("exp")
+            print(exp)
+
+        self.assertEqual(res, exp)
+
+    def test_empty1(self):
+        """ Test for empty Messages. """
+        ret = self.analyzer_result.transform(
+            ['empty1.out'], self.cc_result_dir, plist.EXTENSION)
+        self.assertFalse(ret)
+
+    def test_empty2(self):
+        """ Test for empty Messages with multiple line. """
+        ret = self.analyzer_result.transform(
+            ['empty2.out'], self.cc_result_dir, plist.EXTENSION)
+        self.assertFalse(ret)
+
+    def test_tidy1(self):
+        """ Test for the tidy1.plist file. """
+        self.__check_analyzer_result('tidy1.out',
+                                     'test.cpp_clang-tidy-yaml.plist',
+                                     ['files/test.cpp'], 'tidy1.plist')
+
+    def test_tidy2(self):
+        """ Test for the tidy2.plist file. """
+        self.__check_analyzer_result('tidy2.out',
+                                     'test2.cpp_clang-tidy-yaml.plist',
+                                     ['files/test2.cpp'], 'tidy2.plist')
+
+    def test_tidy3(self):
+        """ Test for the tidy3.plist file. """
+        self.__check_analyzer_result('tidy3.out',
+                                     'test3.cpp_clang-tidy-yaml.plist',
+                                     ['files/test3.cpp'],
+                                     'tidy3_cpp.plist')
+
+        self.__check_analyzer_result('tidy3.out',
+                                     'test3.hh_clang-tidy-yaml.plist',
+                                     ['files/test3.cpp', 'files/test3.hh'],
+                                     'tidy3_hh.plist')
diff --git a/tools/report-converter/tests/unit/analyzers/tidy_output_test_files/tidy3_cpp.plist b/tools/report-converter/tests/unit/analyzers/tidy_output_test_files/tidy3_cpp.plist
@@ -25,14 +25,14 @@
 			<key>notes</key>
 			<array>
 				<dict>
-				<key>location</key>
-				<dict>
-					<key>col</key>
-					<integer>12</integer>
-					<key>file</key>
-					<integer>0</integer>
-					<key>line</key>
-					<integer>4</integer>
+					<key>location</key>
+					<dict>
+						<key>col</key>
+						<integer>12</integer>
+						<key>file</key>
+						<integer>0</integer>
+						<key>line</key>
+						<integer>4</integer>
 					</dict>
 					<key>message</key>
 					<string>nullptr (fixit)</string>
@@ -82,4 +82,4 @@
 		</dict>
 	</dict>
 </dict>
-</plist>
+</plist>