[tools] Also count input bytes in tools/locs.py

As Marja suggested, this CL changes tools/locs.py in such a way that it also counts the bytes in the input file and the output file. Example output now looks similar to this: Processed 1,526 files in 24.58 sec. gen ( 31 files): 94,507 LoC ( 4,972 kB) to 2,839,311 LoC (183,777 kB) ( 30x) src ( 630 files): 371,499 LoC ( 14,743 kB) to 53,707,841 LoC ( 3,155 MB) ( 145x) test ( 381 files): 492,861 LoC ( 25,372 kB) to 36,885,988 LoC ( 2,194 MB) ( 75x) third_party ( 433 files): 239,155 LoC ( 8,683 kB) to 9,713,872 LoC (412,829 kB) ( 41x) total ( 1526 files): 1,212,675 LoC ( 54,242 kB) to 104,133,982 LoC ( 5,973 MB) ( 86x) Change-Id: I1ff5e752ee3a96d388a4393c2592aec68f834000 Notry: true Reviewed-on: https://chromium-review.googlesource.com/c/1450113 Commit-Queue: Sigurd Schneider <sigurds@chromium.org> Reviewed-by: Marja Hölttä <marja@chromium.org> Cr-Commit-Position: refs/heads/master@{#59322}
2019-02-04 10:32:09 +01:00 · 2019-02-04 10:32:09 +01:00 · 40633b4f48
commit 40633b4f48
parent a177078acd
1 changed files with 31 additions and 18 deletions
--- a/tools/locs.py
+++ b/tools/locs.py
@ -147,23 +147,33 @@ def GenerateCompileCommandsAndBuild(build_dir, compile_commands_file, out):

  return compile_commands_file

+def fmt_bytes(bytes):
+  if bytes > 1024*1024*1024:
+    return int(bytes / (1024*1024)), "MB"
+  elif bytes > 1024*1024:
+    return int(bytes / (1024)), "kB"
+  return int(bytes), " B"
+

 class CompilationData:
-  def __init__(self, loc, expanded):
+  def __init__(self, loc, in_bytes, expanded, expanded_bytes):
    self.loc = loc
+    self.in_bytes = in_bytes
    self.expanded = expanded
+    self.expanded_bytes = expanded_bytes

  def ratio(self):
    return self.expanded / (self.loc+1)

  def to_string(self):
-    return "{:>9,} to {:>12,} ({:>5.0f}x)".format(
-        self.loc, self.expanded, self.ratio())
-
+    exp_bytes, exp_unit = fmt_bytes(self.expanded_bytes)
+    in_bytes, in_unit = fmt_bytes(self.in_bytes)
+    return "{:>9,} LoC ({:>7,} {}) to {:>12,} LoC ({:>7,} {}) ({:>5.0f}x)".format(
+        self.loc, in_bytes, in_unit, self.expanded, exp_bytes, exp_unit, self.ratio())

 class File(CompilationData):
-  def __init__(self, file, loc, expanded):
-    super().__init__(loc, expanded)
+  def __init__(self, file, loc, in_bytes, expanded, expanded_bytes):
+    super().__init__(loc, in_bytes, expanded, expanded_bytes)
    self.file = file

  def to_string(self):
@ -172,7 +182,7 @@ class File(CompilationData):

 class Group(CompilationData):
  def __init__(self, name, regexp_string):
-    super().__init__(0, 0)
+    super().__init__(0, 0, 0, 0)
    self.name = name
    self.count = 0
    self.regexp = re.compile(regexp_string)
@ -180,7 +190,9 @@ class Group(CompilationData):
  def account(self, unit):
    if (self.regexp.match(unit.file)):
      self.loc += unit.loc
+      self.in_bytes += unit.in_bytes
      self.expanded += unit.expanded
+      self.expanded_bytes += unit.expanded_bytes
      self.count += 1

  def to_string(self, name_width):
@ -236,8 +248,8 @@ class Results:
        is_tracked = True
    return is_tracked

-  def recordFile(self, filename, loc, expanded):
-    unit = File(filename, loc, expanded)
+  def recordFile(self, filename, loc, in_bytes, expanded, expanded_bytes):
+    unit = File(filename, loc, in_bytes, expanded, expanded_bytes)
    self.units[filename] = unit
    for group in self.groups.values():
      group.account(unit)
@ -257,9 +269,11 @@ class Results:
 class LocsEncoder(json.JSONEncoder):
  def default(self, o):
    if isinstance(o, File):
-      return {"file": o.file, "loc": o.loc, "expanded": o.expanded}
+      return {"file": o.file, "loc": o.loc, "in_bytes": o.in_bytes,
+              "expanded": o.expanded, "expanded_bytes": o.expanded_bytes}
    if isinstance(o, Group):
-      return {"name": o.name, "loc": o.loc, "expanded": o.expanded}
+      return {"name": o.name, "loc": o.loc, "in_bytes": o.in_bytes,
+              "expanded": o.expanded, "expanded_bytes": o.expanded_bytes}
    if isinstance(o, Results):
      return {"groups": o.groups, "units": o.units}
    return json.JSONEncoder.default(self, o)
@ -317,16 +331,15 @@ def Main():
    for i, key in enumerate(data):
      if not result.track(key['file']):
        continue
-      if not ARGS['json']:
-        status.print(
-            "[{}/{}] Counting LoCs of {}".format(i, len(data), key['file']))
+      status.print("[{}/{}] Counting LoCs of {}".format(i, len(data), key['file']),
+        file=out)
      clangcmd, infilename, infile, outfile = cmd_splitter.process(key, temp)
      outfile.parent.mkdir(parents=True, exist_ok=True)
      if infile.is_file():
        clangcmd = clangcmd + " -E -P " + \
-            str(infile) + " -o /dev/stdout | sed '/^\\s*$/d' | wc -l"
+            str(infile) + " -o /dev/stdout | sed '/^\\s*$/d' | wc -lc"
        loccmd = ("cat {}  | sed '\\;^\\s*//;d' | sed '\\;^/\\*;d'"
-                  " | sed '/^\\*/d' | sed '/^\\s*$/d' | wc -l").format(
+                  " | sed '/^\\*/d' | sed '/^\\s*$/d' | wc -lc").format(
            infile)
        runcmd = " {} ; {}".format(clangcmd, loccmd)
        if ARGS['echocmd']:
@ -339,8 +352,8 @@ def Main():
      status.print("[{}/{}] Summing up {}".format(
          i, len(processes), p['infile']), file=out)
      output, err = p['process'].communicate()
-      expanded, loc = list(map(int, output.split()))
-      result.recordFile(p['infile'], loc, expanded)
+      expanded, expanded_bytes, loc, in_bytes = list(map(int, output.split()))
+      result.recordFile(p['infile'], loc, in_bytes, expanded, expanded_bytes)

    end = time.time()
    if ARGS['json']: