BaseTools/Plugin/CodeQL/analyze/analyze_filter.py - edk2 - Git at Google

 # @file analyze_filter.py
 #
 # Filters results in a SARIF file.
 #
 #            Apache License
 #      Version 2.0, January 2004
 #   http://www.apache.org/licenses/
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # This file has been altered from its original form. Based on code in:
 #   https://github.com/advanced-security/filter-sarif
 #
 # It primarily contains modifications made to integrate with the CodeQL plugin.
 #
 # Specifically:
 #   https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
 #
 # View the full and complete license as provided by that repository here:
 #   https://github.com/advanced-security/filter-sarif/blob/main/LICENSE
 #
 # SPDX-License-Identifier: Apache-2.0
 ##

 import json
 import logging
 import re
 from os import PathLike
 from typing import Iterable, List, Tuple

 from analyze.globber import match


 def _match_path_and_rule(
     path: str, rule: str, patterns: Iterable[str]) -> bool:
     """Returns whether a given path matches a given rule.

     Args:
         path (str): A file path string.
         rule (str): A rule file path string.
         patterns (Iterable[str]): An iterable of pattern strings.

     Returns:
         bool: True if the path matches a rule. Otherwise, False.
     """
     result = True
     for s, fp, rp in patterns:
         if match(rp, rule) and match(fp, path):
             result = s
     return result


 def _parse_pattern(line: str) -> Tuple[str]:
     """Parses a given pattern line.

     Args:
         line (str): The line string that contains the rule.

     Returns:
         Tuple[str]: The parsed sign, file pattern, and rule pattern from the
                     line.
     """
     sep_char = ':'
     esc_char = '\\'
     file_pattern = ''
     rule_pattern = ''
     seen_separator = False
     sign = True

     # inclusion or exclusion pattern?
     u_line = line
     if line:
         if line[0] == '-':
             sign = False
             u_line = line[1:]
         elif line[0] == '+':
             u_line = line[1:]

     i = 0
     while i < len(u_line):
         c = u_line[i]
         i = i + 1
         if c == sep_char:
             if seen_separator:
                 raise Exception(
                     'Invalid pattern: "' + line + '" Contains more than one '
                     'separator!')
             seen_separator = True
             continue
         elif c == esc_char:
             next_c = u_line[i] if (i < len(u_line)) else None
             if next_c in ['+' , '-', esc_char, sep_char]:
                 i = i + 1
                 c = next_c
         if seen_separator:
             rule_pattern = rule_pattern + c
         else:
             file_pattern = file_pattern + c

     if not rule_pattern:
         rule_pattern = '**'

     return sign, file_pattern, rule_pattern


 def filter_sarif(input_sarif: PathLike,
                  output_sarif: PathLike,
                  patterns: List[str],
                  split_lines: bool) -> None:
     """Filters a SARIF file with a given set of filter patterns.

     Args:
         input_sarif (PathLike): Input SARIF file path.
         output_sarif (PathLike): Output SARIF file path.
         patterns (PathLike): List of filter pattern strings.
         split_lines (PathLike): Whether to split lines in individual patterns.
     """
     if split_lines:
         tmp = []
         for p in patterns:
             tmp = tmp + re.split('\r?\n', p)
         patterns = tmp

     patterns = [_parse_pattern(p) for p in patterns if p]

     logging.debug('Given patterns:')
     for s, fp, rp in patterns:
         logging.debug(
             'files: {file_pattern}    rules: {rule_pattern} ({sign})'.format(
                 file_pattern=fp,
                 rule_pattern=rp,
                 sign='positive' if s else 'negative'))

     with open(input_sarif, 'r') as f:
         s = json.load(f)

     for run in s.get('runs', []):
         if run.get('results', []):
             new_results = []
             for r in run['results']:
                 if r.get('locations', []):
                     new_locations = []
                     for l in r['locations']:
                         # TODO: The uri field is optional. We might have to
                         #       fetch the actual uri from "artifacts" via
                         #       "index"
                         # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#-linking-results-to-artifacts)
                         uri = l.get(
                                     'physicalLocation', {}).get(
                                         'artifactLocation', {}).get(
                                             'uri', None)

                         # TODO: The ruleId field is optional and potentially
                         #       ambiguous. We might have to fetch the actual
                         #       ruleId from the rule metadata via the ruleIndex
                         #       field.
                         # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#rule-metadata)
                         ruleId = r['ruleId']

                         if (uri is None or
                             _match_path_and_rule(uri, ruleId, patterns)):
                             new_locations.append(l)
                     r['locations'] = new_locations
                     if new_locations:
                         new_results.append(r)
                 else:
                     # locations array doesn't exist or is empty, so we can't
                     # match on anything. Therefore, we include the result in
                     # the output.
                     new_results.append(r)
             run['results'] = new_results

     with open(output_sarif, 'w') as f:
         json.dump(s, f, indent=2)
	# @file analyze_filter.py
	#
	# Filters results in a SARIF file.
	#
	# Apache License
	# Version 2.0, January 2004
	# http://www.apache.org/licenses/
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# This file has been altered from its original form. Based on code in:
	# https://github.com/advanced-security/filter-sarif
	#
	# It primarily contains modifications made to integrate with the CodeQL plugin.
	#
	# Specifically:
	# https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
	#
	# View the full and complete license as provided by that repository here:
	# https://github.com/advanced-security/filter-sarif/blob/main/LICENSE
	#
	# SPDX-License-Identifier: Apache-2.0
	##

	import json
	import logging
	import re
	from os import PathLike
	from typing import Iterable, List, Tuple

	from analyze.globber import match


	def _match_path_and_rule(
	path: str, rule: str, patterns: Iterable[str]) -> bool:
	"""Returns whether a given path matches a given rule.

	Args:
	path (str): A file path string.
	rule (str): A rule file path string.
	patterns (Iterable[str]): An iterable of pattern strings.

	Returns:
	bool: True if the path matches a rule. Otherwise, False.
	"""
	result = True
	for s, fp, rp in patterns:
	if match(rp, rule) and match(fp, path):
	result = s
	return result


	def _parse_pattern(line: str) -> Tuple[str]:
	"""Parses a given pattern line.

	Args:
	line (str): The line string that contains the rule.

	Returns:
	Tuple[str]: The parsed sign, file pattern, and rule pattern from the
	line.
	"""
	sep_char = ':'
	esc_char = '\\'
	file_pattern = ''
	rule_pattern = ''
	seen_separator = False
	sign = True

	# inclusion or exclusion pattern?
	u_line = line
	if line:
	if line[0] == '-':
	sign = False
	u_line = line[1:]
	elif line[0] == '+':
	u_line = line[1:]

	i = 0
	while i < len(u_line):
	c = u_line[i]
	i = i + 1
	if c == sep_char:
	if seen_separator:
	raise Exception(
	'Invalid pattern: "' + line + '" Contains more than one '
	'separator!')
	seen_separator = True
	continue
	elif c == esc_char:
	next_c = u_line[i] if (i < len(u_line)) else None
	if next_c in ['+' , '-', esc_char, sep_char]:
	i = i + 1
	c = next_c
	if seen_separator:
	rule_pattern = rule_pattern + c
	else:
	file_pattern = file_pattern + c

	if not rule_pattern:
	rule_pattern = '**'

	return sign, file_pattern, rule_pattern


	def filter_sarif(input_sarif: PathLike,
	output_sarif: PathLike,
	patterns: List[str],
	split_lines: bool) -> None:
	"""Filters a SARIF file with a given set of filter patterns.

	Args:
	input_sarif (PathLike): Input SARIF file path.
	output_sarif (PathLike): Output SARIF file path.
	patterns (PathLike): List of filter pattern strings.
	split_lines (PathLike): Whether to split lines in individual patterns.
	"""
	if split_lines:
	tmp = []
	for p in patterns:
	tmp = tmp + re.split('\r?\n', p)
	patterns = tmp

	patterns = [_parse_pattern(p) for p in patterns if p]

	logging.debug('Given patterns:')
	for s, fp, rp in patterns:
	logging.debug(
	'files: {file_pattern} rules: {rule_pattern} ({sign})'.format(
	file_pattern=fp,
	rule_pattern=rp,
	sign='positive' if s else 'negative'))

	with open(input_sarif, 'r') as f:
	s = json.load(f)

	for run in s.get('runs', []):
	if run.get('results', []):
	new_results = []
	for r in run['results']:
	if r.get('locations', []):
	new_locations = []
	for l in r['locations']:
	# TODO: The uri field is optional. We might have to
	# fetch the actual uri from "artifacts" via
	# "index"
	# (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#-linking-results-to-artifacts)
	uri = l.get(
	'physicalLocation', {}).get(
	'artifactLocation', {}).get(
	'uri', None)

	# TODO: The ruleId field is optional and potentially
	# ambiguous. We might have to fetch the actual
	# ruleId from the rule metadata via the ruleIndex
	# field.
	# (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#rule-metadata)
	ruleId = r['ruleId']

	if (uri is None or
	_match_path_and_rule(uri, ruleId, patterns)):
	new_locations.append(l)
	r['locations'] = new_locations
	if new_locations:
	new_results.append(r)
	else:
	# locations array doesn't exist or is empty, so we can't
	# match on anything. Therefore, we include the result in
	# the output.
	new_results.append(r)
	run['results'] = new_results

	with open(output_sarif, 'w') as f:
	json.dump(s, f, indent=2)