aboutsummaryrefslogtreecommitdiff
path: root/rumba/storyboard.py
blob: 8c73422c59cce8d53894ec110d862090c7f511bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#
# A library to manage ARCFIRE experiments
#
#    Copyright (C) 2017 Nextworks S.r.l.
#    Copyright (C) 2017 imec
#
#    Sander Vrijders   <sander.vrijders@ugent.be>
#    Vincenzo Maffione <v.maffione@nextworks.it>
#    Marco Capitani    <m.capitani@nextworks.it>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., http://www.fsf.org/about/contact/.
#

# Base class for client programs
#
# @ap: Application Process binary
# @options: Options to pass to the binary
#
import random
import time

import rumba.model as model
import rumba.ssh_support as ssh_support
import rumba.log as log

logger = log.get_logger(__name__)

try:
    from numpy.random import poisson
    from numpy.random import exponential
    logger.debug("Using numpy for faster and better random variables.")
except ImportError:
    from rumba.recpoisson import poisson

    def exponential(mean_duration):
        return random.expovariate(1.0 / mean_duration)

    logger.debug("Falling back to simple implementations.")
    # PROBLEM! These logs will almost never be printed... But we might not care


class Client(object):
    def __init__(self, ap, nodes=None, options=None):
        self.ap = ap
        self.options = options
        if isinstance(nodes, model.Node):
            nodes = [nodes]
        if nodes is None:
            nodes = []
        self.nodes = nodes

    def add_node(self, node):
        if not isinstance(node, model.Node):
            raise Exception("A Node is required.")
        self.nodes.append(node)

    def process(self, duration):
        node = random.choice(self.nodes) if len(self.nodes) > 0 else None
        return ClientProcess(self.ap, duration, node, self.options)


# Base class for client processes
#
# @ap: Application Process binary
# @duration: The time (in seconds) this process should run
# @start_time: The time at which this process is started.
# @options: Options to pass to the binary
#
class ClientProcess(Client):
    def __init__(self, ap, duration, node=None, options=None):
        super(ClientProcess, self).__init__(ap, node, options=options)
        self.duration = duration
        self.start_time = None
        self.running = False
        self.node = node
        self.pid = None

    def run(self, node=None):
        self.node = node
        if self.node is None:
            raise Exception('No node specified for client %s' % (self.ap,))
        self.start_time = time.time()

        logger.debug(
            'Starting client app %s on node %s with duration %s.',
            self.ap, self.node.name, self.duration
        )

        opt_str = self.options if self.options is not None else ""
        cmd = "./startup.sh %s %s" % (self.ap, opt_str)
        self.running = True
        try:
            self.pid = self.node.execute_command(cmd)
        except ssh_support.SSHException:
            logger.warning('Could not start client %s on node %s.',
                           self.ap, self.node.name)
        logger.debug('Client app %s on node %s got pid %s.',
                     self.ap, self.node.name, self.pid)

    def stop(self):
        logger.debug(
            'Killing client %s on node %s.',
            self.ap, self.node.name
        )
        try:
            self.node.execute_command("kill %s" % self.pid)
        except ssh_support.SSHException:
                logger.warn('Could not kill client %s on node %s.',
                            self.ap, self.node.name)

    def check(self):
        """Check if the process should keep running, stop it if not,
        and return true if and only if it is still running."""
        now = time.time()
        if not self.running:
            return False
        if now - self.start_time >= self.duration:
            self.stop()
            self.running = False
            return False
        return True


# Base class for server programs
#
# @ap: Application Process binary
# @arrival_rate: Average requests/s to be received by this server
# @mean_duration: Average duration of a client connection (in seconds)
# @options: Options to pass to the binary
# @max_clients: Maximum number of clients to serve
# @clients: Client binaries that will use this server
# @nodes: Specific nodes to start this server on
#
class Server:
    def __init__(self, ap, arrival_rate, mean_duration,
                 options=None, max_clients=float('inf'),
                 clients=None, nodes=None):
        self.ap = ap
        self.options = options if options is not None else ""
        self.max_clients = max_clients
        if clients is None:
            clients = list()
        self.clients = clients
        self.nodes = nodes
        self.arrival_rate = arrival_rate  # mean requests/s
        self.mean_duration = mean_duration  # in seconds
        self.pids = {}

    def add_client(self, client):
        self.clients.append(client)

    def del_client(self, client):
        self.clients.remove(client)

    def add_node(self, node):
        self.nodes.append(node)

    def del_node(self, node):
        self.nodes.remove(node)

    def get_new_clients(self, interval):
        """
        Returns a list of clients of size appropriate to the server's rate.

        The list's size should be a sample from Poisson(arrival_rate) over
        interval seconds.
        Hence, the average size should be interval * arrival_rate.
        """
        number = poisson(self.arrival_rate * interval)
        number = int(min(number, self.max_clients))
        return [self.make_client_process() for _ in range(number)]

    def make_client_process(self):
        """Returns a client of this server"""
        if len(self.clients) == 0:
            raise Exception("Server %s has empty client list." % (self,))
        duration = exponential(self.mean_duration)
        return random.choice(self.clients).process(duration=duration)

    def run(self):
        for node in self.nodes:
            opt_str = self.options
            logfile = "%s_server.log" % self.ap
            script = r'nohup "$@" > %s & echo "$!"' % (logfile,)
            cmds = ["echo '%s' > startup.sh && chmod a+x startup.sh"
                    % (script,),
                    "./startup.sh %s %s" % (self.ap, opt_str)]
            logger.debug(
                'Starting server %s on node %s with logfile %s.',
                self.ap, node.name, logfile
            )
            try:
                self.pids[node] = (node.execute_commands(cmds))
            except ssh_support.SSHException:
                logger.warn('Could not start server %s on node %s.',
                            self.ap, node.name)

    def stop(self):
        for node, pid in self.pids.items():
            logger.debug(
                'Killing server %s on node %s.',
                self.ap, node.name
            )
            try:
                node.execute_command("kill %s" % pid)
            except ssh_support.SSHException:
                logger.warn('Could not kill server %s on node %s.',
                            self.ap, node.name)


# Base class for ARCFIRE storyboards
#
# @experiment: Experiment to use as input
# @duration: Duration of the whole storyboard
# @servers: App servers available in the network.
#           Type == Server or Type == List[Tuple[Server, Node]]
#
class StoryBoard:

    DEFAULT_INTERVAL = 2.5  # in seconds (may be a float)

    def __init__(self, duration, experiment=None, servers=None):
        self.experiment = experiment
        self.duration = duration
        self.servers = list()
        if servers is None:
            servers = list()
        for s in servers:
            self._validate_and_add_server(s)
        self.active_clients = []
        self.start_time = None

    def _validate_and_add_server(self, s):
        if self.experiment is None:
            raise ValueError("Cannot add a server before "
                             "setting the experiment.")
        if hasattr(s, '__len__') and len(s) == 2:
            server, node = s
            if not isinstance(server, Server) or not isinstance(node, model.Node):
                raise TypeError('First element must be of "Server" type, '
                                'second must be of "Node" type.')
            server.add_node(node)
            self.servers.append(server)
        elif type(s) == Server:
            self.servers.append(s)
        else:
            raise TypeError('Input servers should be either an object of '
                            '"Server" type or a Server-Node couple.')
        for node in self.servers[-1].nodes:
            if node not in self.experiment.nodes:
                raise ValueError('Cannot run server on node %s, '
                                 'not in experiment.' % (node.name,))

    def set_experiment(self, experiment):
        if not isinstance(experiment, model.Experiment):
            raise TypeError('Experiment instance required.')
        self.experiment = experiment

    def add_server(self, server):
        self._validate_and_add_server(server)

    def del_server(self, server):
        self.servers.remove(server)

    def start(self):
        self.start_time = time.time()
        script = r'nohup "$@" > /tmp/ & echo "$!"'
        logger.debug("Writing utility startup script on client nodes.")
        for server in self.servers:
            for client in server.clients:
                for node in client.nodes:
                    node.execute_command(
                        "echo '%s' > startup.sh && chmod a+x startup.sh" % (script,)
                    )
        try:
            for server in self.servers:
                server.run()
            while time.time() - self.start_time < self.duration:
                for server in self.servers:
                    clients = server.get_new_clients(self.DEFAULT_INTERVAL)
                    for new_client in clients:
                        new_client.run()
                        self.active_clients.append(new_client)
                surviving = []
                for x in self.active_clients:
                    if x.check():
                        surviving.append(x)
                self.active_clients = surviving
                time.sleep(self.DEFAULT_INTERVAL)
        finally:
            for client in self.active_clients:
                client.stop()
            for server in self.servers:
                server.stop()