1/* 2 * Copyright (c) 2008 Princeton University 3 * Copyright (c) 2016 Georgia Institute of Technology 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer; 10 * redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution; 13 * neither the name of the copyright holders nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * Authors: Niket Agarwal 30 * Tushar Krishna 31 */ 32 33 34#include "mem/ruby/network/garnet2.0/SwitchAllocator.hh" 35 36#include "debug/RubyNetwork.hh" 37#include "mem/ruby/network/garnet2.0/GarnetNetwork.hh" 38#include "mem/ruby/network/garnet2.0/InputUnit.hh" 39#include "mem/ruby/network/garnet2.0/OutputUnit.hh" 40#include "mem/ruby/network/garnet2.0/Router.hh" 41 42SwitchAllocator::SwitchAllocator(Router *router) 43 : Consumer(router) 44{ 45 m_router = router; 46 m_num_vcs = m_router->get_num_vcs(); 47 m_vc_per_vnet = m_router->get_vc_per_vnet(); 48 49 m_input_arbiter_activity = 0; 50 m_output_arbiter_activity = 0; 51} 52 53void 54SwitchAllocator::init() 55{ 56 m_input_unit = m_router->get_inputUnit_ref(); 57 m_output_unit = m_router->get_outputUnit_ref(); 58 59 m_num_inports = m_router->get_num_inports(); 60 m_num_outports = m_router->get_num_outports(); 61 m_round_robin_inport.resize(m_num_outports); 62 m_round_robin_invc.resize(m_num_inports); 63 m_port_requests.resize(m_num_outports); 64 m_vc_winners.resize(m_num_outports); 65 66 for (int i = 0; i < m_num_inports; i++) { 67 m_round_robin_invc[i] = 0; 68 } 69 70 for (int i = 0; i < m_num_outports; i++) { 71 m_port_requests[i].resize(m_num_inports); 72 m_vc_winners[i].resize(m_num_inports); 73 74 m_round_robin_inport[i] = 0; 75 76 for (int j = 0; j < m_num_inports; j++) { 77 m_port_requests[i][j] = false; // [outport][inport] 78 } 79 } 80} 81 82/* 83 * The wakeup function of the SwitchAllocator performs a 2-stage 84 * seperable switch allocation. At the end of the 2nd stage, a free 85 * output VC is assigned to the winning flits of each output port. 86 * There is no separate VCAllocator stage like the one in garnet1.0. 87 * At the end of this function, the router is rescheduled to wakeup 88 * next cycle for peforming SA for any flits ready next cycle. 89 */ 90 91void 92SwitchAllocator::wakeup() 93{ 94 arbitrate_inports(); // First stage of allocation 95 arbitrate_outports(); // Second stage of allocation 96 97 clear_request_vector(); 98 check_for_wakeup(); 99} 100 101/* 102 * SA-I (or SA-i) loops through all input VCs at every input port, 103 * and selects one in a round robin manner. 104 * - For HEAD/HEAD_TAIL flits only selects an input VC whose output port 105 * has at least one free output VC. 106 * - For BODY/TAIL flits, only selects an input VC that has credits 107 * in its output VC. 108 * Places a request for the output port from this input VC. 109 */ 110 111void 112SwitchAllocator::arbitrate_inports() 113{ 114 // Select a VC from each input in a round robin manner 115 // Independent arbiter at each input port 116 for (int inport = 0; inport < m_num_inports; inport++) { 117 int invc = m_round_robin_invc[inport]; 118 119 for (int invc_iter = 0; invc_iter < m_num_vcs; invc_iter++) { 120 121 if (m_input_unit[inport]->need_stage(invc, SA_, 122 m_router->curCycle())) { 123 124 // This flit is in SA stage 125 126 int outport = m_input_unit[inport]->get_outport(invc); 127 int outvc = m_input_unit[inport]->get_outvc(invc); 128 129 // check if the flit in this InputVC is allowed to be sent 130 // send_allowed conditions described in that function. 131 bool make_request = 132 send_allowed(inport, invc, outport, outvc); 133 134 if (make_request) { 135 m_input_arbiter_activity++; 136 m_port_requests[outport][inport] = true; 137 m_vc_winners[outport][inport]= invc; 138 139 // Update Round Robin pointer to the next VC 140 m_round_robin_invc[inport] = invc + 1; 141 if (m_round_robin_invc[inport] >= m_num_vcs) 142 m_round_robin_invc[inport] = 0; 143 144 break; // got one vc winner for this port 145 } 146 } 147 148 invc++; 149 if (invc >= m_num_vcs) 150 invc = 0; 151 } 152 } 153} 154 155/* 156 * SA-II (or SA-o) loops through all output ports, 157 * and selects one input VC (that placed a request during SA-I) 158 * as the winner for this output port in a round robin manner. 159 * - For HEAD/HEAD_TAIL flits, performs simplified outvc allocation. 160 * (i.e., select a free VC from the output port). 161 * - For BODY/TAIL flits, decrement a credit in the output vc. 162 * The winning flit is read out from the input VC and sent to the 163 * CrossbarSwitch. 164 * An increment_credit signal is sent from the InputUnit 165 * to the upstream router. For HEAD_TAIL/TAIL flits, is_free_signal in the 166 * credit is set to true. 167 */ 168 169void 170SwitchAllocator::arbitrate_outports() 171{ 172 // Now there are a set of input vc requests for output vcs. 173 // Again do round robin arbitration on these requests 174 // Independent arbiter at each output port 175 for (int outport = 0; outport < m_num_outports; outport++) { 176 int inport = m_round_robin_inport[outport]; 177 178 for (int inport_iter = 0; inport_iter < m_num_inports; 179 inport_iter++) { 180 181 // inport has a request this cycle for outport 182 if (m_port_requests[outport][inport]) { 183 184 // grant this outport to this inport 185 int invc = m_vc_winners[outport][inport]; 186 187 int outvc = m_input_unit[inport]->get_outvc(invc); 188 if (outvc == -1) { 189 // VC Allocation - select any free VC from outport 190 outvc = vc_allocate(outport, inport, invc); 191 } 192 193 // remove flit from Input VC 194 flit *t_flit = m_input_unit[inport]->getTopFlit(invc); 195 196 DPRINTF(RubyNetwork, "SwitchAllocator at Router %d " 197 "granted outvc %d at outport %d " 198 "to invc %d at inport %d to flit %s at " 199 "time: %lld\n", 200 m_router->get_id(), outvc, 201 m_router->getPortDirectionName( 202 m_output_unit[outport]->get_direction()), 203 invc, 204 m_router->getPortDirectionName( 205 m_input_unit[inport]->get_direction()), 206 *t_flit, 207 m_router->curCycle()); 208 209 210 // Update outport field in the flit since this is 211 // used by CrossbarSwitch code to send it out of 212 // correct outport. 213 // Note: post route compute in InputUnit, 214 // outport is updated in VC, but not in flit 215 t_flit->set_outport(outport); 216 217 // set outvc (i.e., invc for next hop) in flit 218 // (This was updated in VC by vc_allocate, but not in flit) 219 t_flit->set_vc(outvc); 220 221 // decrement credit in outvc 222 m_output_unit[outport]->decrement_credit(outvc); 223 224 // flit ready for Switch Traversal 225 t_flit->advance_stage(ST_, m_router->curCycle()); 226 m_router->grant_switch(inport, t_flit); 227 m_output_arbiter_activity++; 228 229 if ((t_flit->get_type() == TAIL_) || 230 t_flit->get_type() == HEAD_TAIL_) { 231 232 // This Input VC should now be empty 233 assert(!(m_input_unit[inport]->isReady(invc, 234 m_router->curCycle()))); 235 236 // Free this VC 237 m_input_unit[inport]->set_vc_idle(invc, 238 m_router->curCycle()); 239 240 // Send a credit back 241 // along with the information that this VC is now idle 242 m_input_unit[inport]->increment_credit(invc, true, 243 m_router->curCycle()); 244 } else { 245 // Send a credit back 246 // but do not indicate that the VC is idle 247 m_input_unit[inport]->increment_credit(invc, false, 248 m_router->curCycle()); 249 } 250 251 // remove this request 252 m_port_requests[outport][inport] = false; 253 254 // Update Round Robin pointer 255 m_round_robin_inport[outport] = inport + 1; 256 if (m_round_robin_inport[outport] >= m_num_inports) 257 m_round_robin_inport[outport] = 0; 258 259 break; // got a input winner for this outport 260 } 261 262 inport++; 263 if (inport >= m_num_inports) 264 inport = 0; 265 } 266 } 267} 268 269/* 270 * A flit can be sent only if 271 * (1) there is at least one free output VC at the 272 * output port (for HEAD/HEAD_TAIL), 273 * or 274 * (2) if there is at least one credit (i.e., buffer slot) 275 * within the VC for BODY/TAIL flits of multi-flit packets. 276 * and 277 * (3) pt-to-pt ordering is not violated in ordered vnets, i.e., 278 * there should be no other flit in this input port 279 * within an ordered vnet 280 * that arrived before this flit and is requesting the same output port. 281 */ 282 283bool 284SwitchAllocator::send_allowed(int inport, int invc, int outport, int outvc) 285{ 286 // Check if outvc needed 287 // Check if credit needed (for multi-flit packet) 288 // Check if ordering violated (in ordered vnet) 289 290 int vnet = get_vnet(invc); 291 bool has_outvc = (outvc != -1); 292 bool has_credit = false; 293 294 if (!has_outvc) { 295 296 // needs outvc 297 // this is only true for HEAD and HEAD_TAIL flits. 298 299 if (m_output_unit[outport]->has_free_vc(vnet)) { 300 301 has_outvc = true; 302 303 // each VC has at least one buffer, 304 // so no need for additional credit check 305 has_credit = true; 306 } 307 } else { 308 has_credit = m_output_unit[outport]->has_credit(outvc); 309 } 310 311 // cannot send if no outvc or no credit. 312 if (!has_outvc || !has_credit) 313 return false; 314 315 316 // protocol ordering check 317 if ((m_router->get_net_ptr())->isVNetOrdered(vnet)) { 318 319 // enqueue time of this flit 320 Cycles t_enqueue_time = m_input_unit[inport]->get_enqueue_time(invc); 321 322 // check if any other flit is ready for SA and for same output port 323 // and was enqueued before this flit 324 int vc_base = vnet*m_vc_per_vnet; 325 for (int vc_offset = 0; vc_offset < m_vc_per_vnet; vc_offset++) { 326 int temp_vc = vc_base + vc_offset; 327 if (m_input_unit[inport]->need_stage(temp_vc, SA_, 328 m_router->curCycle()) && 329 (m_input_unit[inport]->get_outport(temp_vc) == outport) && 330 (m_input_unit[inport]->get_enqueue_time(temp_vc) < 331 t_enqueue_time)) { 332 return false; 333 } 334 } 335 } 336 337 return true; 338} 339 340// Assign a free VC to the winner of the output port. 341int 342SwitchAllocator::vc_allocate(int outport, int inport, int invc) 343{ 344 // Select a free VC from the output port 345 int outvc = m_output_unit[outport]->select_free_vc(get_vnet(invc)); 346 347 // has to get a valid VC since it checked before performing SA 348 assert(outvc != -1); 349 m_input_unit[inport]->grant_outvc(invc, outvc); 350 return outvc; 351} 352 353// Wakeup the router next cycle to perform SA again 354// if there are flits ready. 355void 356SwitchAllocator::check_for_wakeup() 357{ 358 Cycles nextCycle = m_router->curCycle() + Cycles(1); 359 360 for (int i = 0; i < m_num_inports; i++) { 361 for (int j = 0; j < m_num_vcs; j++) { 362 if (m_input_unit[i]->need_stage(j, SA_, nextCycle)) { 363 m_router->schedule_wakeup(Cycles(1)); 364 return; 365 } 366 } 367 } 368} 369 370int 371SwitchAllocator::get_vnet(int invc) 372{ 373 int vnet = invc/m_vc_per_vnet; 374 assert(vnet < m_router->get_num_vnets()); 375 return vnet; 376} 377 378 379// Clear the request vector within the allocator at end of SA-II. 380// Was populated by SA-I. 381void 382SwitchAllocator::clear_request_vector() 383{ 384 for (int i = 0; i < m_num_outports; i++) { 385 for (int j = 0; j < m_num_inports; j++) { 386 m_port_requests[i][j] = false; 387 } 388 } 389} 390 391void 392SwitchAllocator::resetStats() 393{ 394 m_input_arbiter_activity = 0; 395 m_output_arbiter_activity = 0; 396} 397