v8/tools/locs.py
Sigurd Schneider 40633b4f48 [tools] Also count input bytes in tools/locs.py
As Marja suggested, this CL changes tools/locs.py in such a way that
it also counts the bytes in the input file and the output file.

Example output now looks similar to this:

Processed 1,526 files in 24.58 sec.
gen         (   31 files):    94,507 LoC (  4,972 kB) to    2,839,311 LoC (183,777 kB) (   30x)
src         (  630 files):   371,499 LoC ( 14,743 kB) to   53,707,841 LoC (  3,155 MB) (  145x)
test        (  381 files):   492,861 LoC ( 25,372 kB) to   36,885,988 LoC (  2,194 MB) (   75x)
third_party (  433 files):   239,155 LoC (  8,683 kB) to    9,713,872 LoC (412,829 kB) (   41x)
total       ( 1526 files): 1,212,675 LoC ( 54,242 kB) to  104,133,982 LoC (  5,973 MB) (   86x)


Change-Id: I1ff5e752ee3a96d388a4393c2592aec68f834000
Notry: true
Reviewed-on: https://chromium-review.googlesource.com/c/1450113
Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59322}
2019-02-04 09:34:28 +00:00

390 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright 2018 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
""" locs.py - Count lines of code before and after preprocessor expansion
Consult --help for more information.
"""
import argparse
import json
import os
import re
import subprocess
import sys
import tempfile
import time
from pathlib import Path
ARGPARSE = argparse.ArgumentParser(
description=("A script that computes LoC for a build dir or from a"
"compile_commands.json file"),
epilog="""Examples:
Count with default settings for build in out/Default:
locs.py --build-dir out/Default
Count with default settings according to given compile_commands file:
locs.py --compile-commands compile_commands.json
Count only a custom group of files settings for build in out/Default:
tools/locs.py --build-dir out/Default
--group src-compiler '\.\./\.\./src/compiler'
--only src-compiler
Report the 10 files with the worst expansion:
tools/locs.py --build-dir out/Default --worst 10
Report the 10 files with the worst expansion in src/compiler:
tools/locs.py --build-dir out/Default --worst 10
--group src-compiler '\.\./\.\./src/compiler'
--only src-compiler
Report the 10 largest files after preprocessing:
tools/locs.py --build-dir out/Default --largest 10
Report the 10 smallest input files:
tools/locs.py --build-dir out/Default --smallest 10""",
formatter_class=argparse.RawTextHelpFormatter
)
ARGPARSE.add_argument(
'--json',
action='store_true',
default=False,
help="output json instead of short summary")
ARGPARSE.add_argument(
'--build-dir',
type=str,
default="",
help="Use specified build dir and generate necessary files")
ARGPARSE.add_argument(
'--echocmd',
action='store_true',
default=False,
help="output command used to compute LoC")
ARGPARSE.add_argument(
'--compile-commands',
type=str,
default='compile_commands.json',
help="Use specified compile_commands.json file")
ARGPARSE.add_argument(
'--only',
action='append',
default=[],
help="Restrict counting to report group (can be passed multiple times)")
ARGPARSE.add_argument(
'--not',
action='append',
default=[],
help="Exclude specific group (can be passed multiple times)")
ARGPARSE.add_argument(
'--list-groups',
action='store_true',
default=False,
help="List groups and associated regular expressions")
ARGPARSE.add_argument(
'--group',
nargs=2,
action='append',
default=[],
help="Add a report group (can be passed multiple times)")
ARGPARSE.add_argument(
'--largest',
type=int,
nargs='?',
default=0,
const=3,
help="Output the n largest files after preprocessing")
ARGPARSE.add_argument(
'--worst',
type=int,
nargs='?',
default=0,
const=3,
help="Output the n files with worst expansion by preprocessing")
ARGPARSE.add_argument(
'--smallest',
type=int,
nargs='?',
default=0,
const=3,
help="Output the n smallest input files")
ARGPARSE.add_argument(
'--files',
type=int,
nargs='?',
default=0,
const=3,
help="Output results for each file separately")
ARGS = vars(ARGPARSE.parse_args())
def MaxWidth(strings):
max_width = 0
for s in strings:
max_width = max(max_width, len(s))
return max_width
def GenerateCompileCommandsAndBuild(build_dir, compile_commands_file, out):
if not os.path.isdir(build_dir):
print("Error: Specified build dir {} is not a directory.".format(
build_dir), file=sys.stderr)
exit(1)
compile_commands_file = "{}/compile_commands.json".format(build_dir)
print("Generating compile commands in {}.".format(
compile_commands_file), file=out)
ninja = "ninja -C {} -t compdb cxx cc > {}".format(
build_dir, compile_commands_file)
if subprocess.call(ninja, shell=True, stdout=out) != 0:
print("Error: Cound not generate {} for {}.".format(
compile_commands_file, build_dir), file=sys.stderr)
exit(1)
autoninja = "autoninja -C {} v8_generated_cc_files".format(build_dir)
if subprocess.call(autoninja, shell=True, stdout=out) != 0:
print("Error: Building target 'v8_generated_cc_files'"
" failed for {}.".format(build_dir), file=sys.stderr)
exit(1)
return compile_commands_file
def fmt_bytes(bytes):
if bytes > 1024*1024*1024:
return int(bytes / (1024*1024)), "MB"
elif bytes > 1024*1024:
return int(bytes / (1024)), "kB"
return int(bytes), " B"
class CompilationData:
def __init__(self, loc, in_bytes, expanded, expanded_bytes):
self.loc = loc
self.in_bytes = in_bytes
self.expanded = expanded
self.expanded_bytes = expanded_bytes
def ratio(self):
return self.expanded / (self.loc+1)
def to_string(self):
exp_bytes, exp_unit = fmt_bytes(self.expanded_bytes)
in_bytes, in_unit = fmt_bytes(self.in_bytes)
return "{:>9,} LoC ({:>7,} {}) to {:>12,} LoC ({:>7,} {}) ({:>5.0f}x)".format(
self.loc, in_bytes, in_unit, self.expanded, exp_bytes, exp_unit, self.ratio())
class File(CompilationData):
def __init__(self, file, loc, in_bytes, expanded, expanded_bytes):
super().__init__(loc, in_bytes, expanded, expanded_bytes)
self.file = file
def to_string(self):
return "{} {}".format(super().to_string(), self.file)
class Group(CompilationData):
def __init__(self, name, regexp_string):
super().__init__(0, 0, 0, 0)
self.name = name
self.count = 0
self.regexp = re.compile(regexp_string)
def account(self, unit):
if (self.regexp.match(unit.file)):
self.loc += unit.loc
self.in_bytes += unit.in_bytes
self.expanded += unit.expanded
self.expanded_bytes += unit.expanded_bytes
self.count += 1
def to_string(self, name_width):
return "{:<{}} ({:>5} files): {}".format(
self.name, name_width, self.count, super().to_string())
def SetupReportGroups():
default_report_groups = {"total": '.*',
"src": '\\.\\./\\.\\./src',
"test": '\\.\\./\\.\\./test',
"third_party": '\\.\\./\\.\\./third_party',
"gen": 'gen'}
report_groups = {**default_report_groups, **dict(ARGS['group'])}
if ARGS['only']:
for only_arg in ARGS['only']:
if not only_arg in report_groups.keys():
print("Error: specified report group '{}' is not defined.".format(
ARGS['only']))
exit(1)
else:
report_groups = {
k: v for (k, v) in report_groups.items() if k in ARGS['only']}
if ARGS['not']:
report_groups = {
k: v for (k, v) in report_groups.items() if k not in ARGS['not']}
if ARGS['list_groups']:
print_cat_max_width = MaxWidth(list(report_groups.keys()) + ["Category"])
print(" {:<{}} {}".format("Category",
print_cat_max_width, "Regular expression"))
for cat, regexp_string in report_groups.items():
print(" {:<{}}: {}".format(
cat, print_cat_max_width, regexp_string))
report_groups = {k: Group(k, v) for (k, v) in report_groups.items()}
return report_groups
class Results:
def __init__(self):
self.groups = SetupReportGroups()
self.units = {}
def track(self, filename):
is_tracked = False
for group in self.groups.values():
if group.regexp.match(filename):
is_tracked = True
return is_tracked
def recordFile(self, filename, loc, in_bytes, expanded, expanded_bytes):
unit = File(filename, loc, in_bytes, expanded, expanded_bytes)
self.units[filename] = unit
for group in self.groups.values():
group.account(unit)
def maxGroupWidth(self):
return MaxWidth([v.name for v in self.groups.values()])
def printGroupResults(self, file):
for key in sorted(self.groups.keys()):
print(self.groups[key].to_string(self.maxGroupWidth()), file=file)
def printSorted(self, key, count, reverse, out):
for unit in sorted(list(self.units.values()), key=key, reverse=reverse)[:count]:
print(unit.to_string(), file=out)
class LocsEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, File):
return {"file": o.file, "loc": o.loc, "in_bytes": o.in_bytes,
"expanded": o.expanded, "expanded_bytes": o.expanded_bytes}
if isinstance(o, Group):
return {"name": o.name, "loc": o.loc, "in_bytes": o.in_bytes,
"expanded": o.expanded, "expanded_bytes": o.expanded_bytes}
if isinstance(o, Results):
return {"groups": o.groups, "units": o.units}
return json.JSONEncoder.default(self, o)
class StatusLine:
def __init__(self):
self.max_width = 0
def print(self, statusline, end="\r", file=sys.stdout):
self.max_width = max(self.max_width, len(statusline))
print("{0:<{1}}".format(statusline, self.max_width), end=end, file=file, flush=True)
class CommandSplitter:
def __init__(self):
self.cmd_pattern = re.compile(
"([^\\s]*\\s+)?(?P<clangcmd>[^\\s]*clang.*)"
" -c (?P<infile>.*) -o (?P<outfile>.*)")
def process(self, compilation_unit, temp_file_name):
cmd = self.cmd_pattern.match(compilation_unit['command'])
outfilename = cmd.group('outfile') + ".cc"
infilename = cmd.group('infile')
infile = Path(compilation_unit['directory']).joinpath(infilename)
outfile = Path(str(temp_file_name)).joinpath(outfilename)
return [cmd.group('clangcmd'), infilename, infile, outfile]
def Main():
compile_commands_file = ARGS['compile_commands']
out = sys.stdout
if ARGS['json']:
out = sys.stderr
if ARGS['build_dir']:
compile_commands_file = GenerateCompileCommandsAndBuild(
ARGS['build_dir'], compile_commands_file, out)
try:
with open(compile_commands_file) as file:
data = json.load(file)
except FileNotFoundError:
print("Error: Cannot read '{}'. Consult --help to get started.")
exit(1)
result = Results()
status = StatusLine()
with tempfile.TemporaryDirectory(dir='/tmp/', prefix="locs.") as temp:
processes = []
start = time.time()
cmd_splitter = CommandSplitter()
for i, key in enumerate(data):
if not result.track(key['file']):
continue
status.print("[{}/{}] Counting LoCs of {}".format(i, len(data), key['file']),
file=out)
clangcmd, infilename, infile, outfile = cmd_splitter.process(key, temp)
outfile.parent.mkdir(parents=True, exist_ok=True)
if infile.is_file():
clangcmd = clangcmd + " -E -P " + \
str(infile) + " -o /dev/stdout | sed '/^\\s*$/d' | wc -lc"
loccmd = ("cat {} | sed '\\;^\\s*//;d' | sed '\\;^/\\*;d'"
" | sed '/^\\*/d' | sed '/^\\s*$/d' | wc -lc").format(
infile)
runcmd = " {} ; {}".format(clangcmd, loccmd)
if ARGS['echocmd']:
print(runcmd)
p = subprocess.Popen(
runcmd, shell=True, cwd=key['directory'], stdout=subprocess.PIPE)
processes.append({'process': p, 'infile': infilename})
for i, p in enumerate(processes):
status.print("[{}/{}] Summing up {}".format(
i, len(processes), p['infile']), file=out)
output, err = p['process'].communicate()
expanded, expanded_bytes, loc, in_bytes = list(map(int, output.split()))
result.recordFile(p['infile'], loc, in_bytes, expanded, expanded_bytes)
end = time.time()
if ARGS['json']:
print(json.dumps(result, ensure_ascii=False, cls=LocsEncoder))
status.print("Processed {:,} files in {:,.2f} sec.".format(
len(processes), end-start), end="\n", file=out)
result.printGroupResults(file=out)
if ARGS['largest']:
print("Largest {} files after expansion:".format(ARGS['largest']))
result.printSorted(
lambda v: v.expanded, ARGS['largest'], reverse=True, out=out)
if ARGS['worst']:
print("Worst expansion ({} files):".format(ARGS['worst']))
result.printSorted(
lambda v: v.ratio(), ARGS['worst'], reverse=True, out=out)
if ARGS['smallest']:
print("Smallest {} input files:".format(ARGS['smallest']))
result.printSorted(
lambda v: v.loc, ARGS['smallest'], reverse=False, out=out)
if ARGS['files']:
print("List of input files:")
result.printSorted(
lambda v: v.file, ARGS['files'], reverse=False, out=out)
return 0
if __name__ == '__main__':
sys.exit(Main())