etc/ci/performance/download_buildbot_timings.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

#!/usr/bin/env python3

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import argparse
import csv
from datetime import datetime, date
import json
from math import floor
import os
from urllib.request import urlopen, HTTPError

SCRIPT_PATH = os.path.split(__file__)[0]


def main():
    default_output_dir = os.path.join(SCRIPT_PATH, 'output')
    default_cache_dir = os.path.join(SCRIPT_PATH, '.cache')

    parser = argparse.ArgumentParser(
        description="Download buildbot metadata"
    )
    parser.add_argument("--index-url",
                        type=str,
                        default='https://build.servo.org/json',
                        help="the URL to get the JSON index data index from. "
                        "Default: https://build.servo.org/json")
    parser.add_argument("--build-url",
                        type=str,
                        default='https://build.servo.org/json/builders/{}/builds/{}',
                        help="the URL to get the JSON build data from. "
                        "Default: https://build.servo.org/json/builders/{}/builds/{}")
    parser.add_argument("--cache-dir",
                        type=str,
                        default=default_cache_dir,
                        help="the directory to cache JSON files in. Default: " + default_cache_dir)
    parser.add_argument("--cache-name",
                        type=str,
                        default='build-{}-{}.json',
                        help="the filename to cache JSON data in. "
                        "Default: build-{}-{}.json")
    parser.add_argument("--output-dir",
                        type=str,
                        default=default_output_dir,
                        help="the directory to save the CSV data to. Default: " + default_output_dir)
    parser.add_argument("--output-name",
                        type=str,
                        default='builds-{}-{}.csv',
                        help="the filename to save the CSV data to. "
                        "Default: builds-{}-{}.csv")
    parser.add_argument("--verbose", "-v",
                        action='store_true',
                        help="print every HTTP request")
    args = parser.parse_args()

    os.makedirs(args.cache_dir, exist_ok=True)
    os.makedirs(args.output_dir, exist_ok=True)

    # Get the index to find out the list of builder names
    # Note: this isn't cached
    if args.verbose:
        print("Downloading index {}.".format(args.index_url))
    with urlopen(args.index_url) as response:
        index = json.loads(response.read().decode('utf-8'))

    builds = []

    for builder in sorted(index["builders"]):
        # The most recent build is at offset -1
        # Fetch it to find out the build number
        # Note: this isn't cached
        recent_build_url = args.build_url.format(builder, -1)
        if args.verbose:
            print("Downloading recent build {}.".format(recent_build_url))
        with urlopen(recent_build_url) as response:
            recent_build = json.loads(response.read().decode('utf-8'))
            recent_build_number = recent_build["number"]

        # Download each build, and convert to CSV
        for build_number in range(0, recent_build_number):

            # Rather annoyingly, we can't just use the Python http cache,
            # because it doesn't cache 404 responses. So we roll our own.
            cache_json_name = args.cache_name.format(builder, build_number)
            cache_json = os.path.join(args.cache_dir, cache_json_name)
            if os.path.isfile(cache_json):
                with open(cache_json) as f:
                    build = json.load(f)

            else:
                # Get the build data
                build_url = args.build_url.format(builder, build_number)
                if args.verbose:
                    print("Downloading build {}.".format(build_url))
                try:
                    with urlopen(build_url) as response:
                        build = json.loads(response.read().decode('utf-8'))
                except HTTPError as e:
                    if e.code == 404:
                        build = {}
                    else:
                        raise

                # Don't cache current builds.
                if build.get('currentStep'):
                    continue

                with open(cache_json, 'w+') as f:
                    json.dump(build, f)

            if 'times' in build:
                builds.append(build)

    years = {}
    for build in builds:
        build_date = date.fromtimestamp(build['times'][0])
        years.setdefault(build_date.year, {}).setdefault(build_date.month, []).append(build)

    for year, months in years.items():
        for month, builds in months.items():

            output_name = args.output_name.format(year, month)
            output = os.path.join(args.output_dir, output_name)

            # Create the CSV file.
            if args.verbose:
                print('Creating file {}.'.format(output))
            with open(output, 'w+') as output_file:
                output_csv = csv.writer(output_file)

                # The CSV column names
                output_csv.writerow([
                    'builder',
                    'buildNumber',
                    'buildTimestamp',
                    'stepName',
                    'stepText',
                    'stepNumber',
                    'stepStart',
                    'stepFinish'
                ])

                for build in builds:

                    builder = build["builderName"]
                    build_number = build["number"]
                    build_timestamp = datetime.fromtimestamp(build["times"][0]).replace(microsecond=0)

                    # Write out the timing data for each step
                    for step in build["steps"]:
                        if step["isFinished"]:
                            step_name = step["name"]
                            step_text = ' '.join(step["text"])
                            step_number = step["step_number"]
                            step_start = floor(step["times"][0])
                            step_finish = floor(step["times"][1])
                            output_csv.writerow([
                                builder,
                                build_number,
                                build_timestamp,
                                step_name,
                                step_text,
                                step_number,
                                step_start,
                                step_finish
                            ])


if __name__ == "__main__":
    main()