manodeep · May 19, 2025 03:59
diff --git a/valgrind_filter.py b/valgrind_filter.py
 #!/usr/bin/env python
 # Written by Manodeep Sinha, May 2025 to filter out valgrind error logs. 
 # Regex idea extended from https://stackoverflow.com/a/34407168

 # Untested. Use at your own risk. - Manodeep Sinha, May 19, 2025

 import fileinput
 import re


 def main(discard_string_list=None, always_print_if_main_is_present=False):
  """
  
  # Defines an error chunk as follows:
  # 1. Starts with a line matching the START regex (which is usually a type of valgrind reported error)
  # 2. Ends with a line matching the STOP regex - which is the line with just "==<pid>== " (i.e., empty line otherwise)
  # 3. Contains at least one line matching the GOOD regex (which is usually a source file name and a line number)

  While this will parse the error logs into error chunks, it will not filter out the errors that are not relevant to the user. To achieve that, we look for 
  matching substrings in the last two lines of the error chunk. If any of the matching substrings are present, we discard the entire error chunk.

  Really, what should happen is the corresponding error that we would use valgrind suppression files, however, that requires repeatedly running valgrind 
  with updated suppression file. Instead, we will just filter out the error chunks that are not relevant to the user using the *same* valgrind log (i.e., run application only once).

  # Known limitation:
  - Since the filtering line is pretty crude and only looks for matching substrings in the last two lines of the error chunk, it is possible that real errors with 
  source in user-code, but execution in library code is filtered out. As a way of reducing the chances of this happening, there is a boolean parameter ``always_print_if_main_is_present`` to print 
  all error chunks that contain "main" anywhere in the error chunk. This is a good way to catch errors that are not in the user code, but are still relevant to the user.
  """

  START = re.compile(r"(Conditional jump or move depends on uninitialised value|Use of uninitialised value|Syscall param|Invalid free|Invalid write|Invalid read)")
  STOP = re.compile(r"^==\d+== $")
  GOOD = re.compile(r"\.(f90|c|cpp|cxx|F90):\d+", re.M)

  ## These are specific to gadi and to the ESM1.6 PI standalone setup I (MS) am using in May 2025
  ## If these strings are present in the last two lines of the error chunk, then discard the entire error. 
  if not discard_string_list:
    discard_string_list = ["ucp_worker_iface_open", "uct_rc_iface_verbs_init_rx", "PMPI_Init", "MPI_COMM_SPLIT", 
                          "mpi_comm_split", "ompi_comm_split_with_info", "opal_hwloc_base_get_topology", 
                          "libuct_xpmem.so.0.0.0", "librdmacm.so.1.3.56.0", "libibverbs.so.1.14.56.0",  
                          "libmlx5.so.1.25.56.0", "/usr/lib64/libc-2.28.so"]

  in_line = False
  error_chunk = []
  for line in fileinput.input():
    if in_line:
      in_line = not STOP.search(line)
    else:
      in_line = START.search(line)

    if in_line:
      error_chunk.append(line)
    else:
      match = GOOD.findall("".join(error_chunk))
      if len(match) > 2:
        last_two_lines = "".join(error_chunk[-2:])
        printable_error_chunk = "".join(error_chunk)
        discard_string_present = any(s in last_two_lines for s in discard_string_list)
        do_print = not discard_string_present
        if (not do_print) and always_print_if_main_is_present:
          do_print = "main" in printable_error_chunk

        if do_print:
            print(printable_error_chunk)

      error_chunk = []

 if __name__ == "__main__":
  main()
	#!/usr/bin/env python
	# Written by Manodeep Sinha, May 2025 to filter out valgrind error logs.
	# Regex idea extended from https://stackoverflow.com/a/34407168

	# Untested. Use at your own risk. - Manodeep Sinha, May 19, 2025

	import fileinput
	import re


	def main(discard_string_list=None, always_print_if_main_is_present=False):
	"""

	# Defines an error chunk as follows:
	# 1. Starts with a line matching the START regex (which is usually a type of valgrind reported error)
	# 2. Ends with a line matching the STOP regex - which is the line with just "==<pid>== " (i.e., empty line otherwise)
	# 3. Contains at least one line matching the GOOD regex (which is usually a source file name and a line number)

	While this will parse the error logs into error chunks, it will not filter out the errors that are not relevant to the user. To achieve that, we look for
	matching substrings in the last two lines of the error chunk. If any of the matching substrings are present, we discard the entire error chunk.

	Really, what should happen is the corresponding error that we would use valgrind suppression files, however, that requires repeatedly running valgrind
	with updated suppression file. Instead, we will just filter out the error chunks that are not relevant to the user using the same valgrind log (i.e., run application only once).

	# Known limitation:
	- Since the filtering line is pretty crude and only looks for matching substrings in the last two lines of the error chunk, it is possible that real errors with
	source in user-code, but execution in library code is filtered out. As a way of reducing the chances of this happening, there is a boolean parameter ``always_print_if_main_is_present`` to print
	all error chunks that contain "main" anywhere in the error chunk. This is a good way to catch errors that are not in the user code, but are still relevant to the user.
	"""

	START = re.compile(r"(Conditional jump or move depends on uninitialised value\|Use of uninitialised value\|Syscall param\|Invalid free\|Invalid write\|Invalid read)")
	STOP = re.compile(r"^==\d+== $")
	GOOD = re.compile(r"\.(f90\|c\|cpp\|cxx\|F90):\d+", re.M)

	## These are specific to gadi and to the ESM1.6 PI standalone setup I (MS) am using in May 2025
	## If these strings are present in the last two lines of the error chunk, then discard the entire error.
	if not discard_string_list:
	discard_string_list = ["ucp_worker_iface_open", "uct_rc_iface_verbs_init_rx", "PMPI_Init", "MPI_COMM_SPLIT",
	"mpi_comm_split", "ompi_comm_split_with_info", "opal_hwloc_base_get_topology",
	"libuct_xpmem.so.0.0.0", "librdmacm.so.1.3.56.0", "libibverbs.so.1.14.56.0",
	"libmlx5.so.1.25.56.0", "/usr/lib64/libc-2.28.so"]

	in_line = False
	error_chunk = []
	for line in fileinput.input():
	if in_line:
	in_line = not STOP.search(line)
	else:
	in_line = START.search(line)

	if in_line:
	error_chunk.append(line)
	else:
	match = GOOD.findall("".join(error_chunk))
	if len(match) > 2:
	last_two_lines = "".join(error_chunk[-2:])
	printable_error_chunk = "".join(error_chunk)
	discard_string_present = any(s in last_two_lines for s in discard_string_list)
	do_print = not discard_string_present
	if (not do_print) and always_print_if_main_is_present:
	do_print = "main" in printable_error_chunk

	if do_print:
	print(printable_error_chunk)

	error_chunk = []

	if __name__ == "__main__":
	main()