SwitchAllocator.cc revision 11666
1/* 2 * Copyright (c) 2008 Princeton University 3 * Copyright (c) 2016 Georgia Institute of Technology 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer; 10 * redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution; 13 * neither the name of the copyright holders nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * Authors: Niket Agarwal 30 * Tushar Krishna 31 */ 32 33 34#include "mem/ruby/network/garnet2.0/SwitchAllocator.hh" 35 36#include "debug/RubyNetwork.hh" 37#include "mem/ruby/network/garnet2.0/GarnetNetwork.hh" 38#include "mem/ruby/network/garnet2.0/InputUnit.hh" 39#include "mem/ruby/network/garnet2.0/OutputUnit.hh" 40#include "mem/ruby/network/garnet2.0/Router.hh" 41 42SwitchAllocator::SwitchAllocator(Router *router) 43 : Consumer(router) 44{ 45 m_router = router; 46 m_num_vcs = m_router->get_num_vcs(); 47 m_vc_per_vnet = m_router->get_vc_per_vnet(); 48 49 m_input_arbiter_activity = 0; 50 m_output_arbiter_activity = 0; 51} 52 53void 54SwitchAllocator::init() 55{ 56 m_input_unit = m_router->get_inputUnit_ref(); 57 m_output_unit = m_router->get_outputUnit_ref(); 58 59 m_num_inports = m_router->get_num_inports(); 60 m_num_outports = m_router->get_num_outports(); 61 m_round_robin_inport.resize(m_num_outports); 62 m_round_robin_invc.resize(m_num_inports); 63 m_port_requests.resize(m_num_outports); 64 m_vc_winners.resize(m_num_outports); 65 66 for (int i = 0; i < m_num_inports; i++) { 67 m_round_robin_invc[i] = 0; 68 } 69 70 for (int i = 0; i < m_num_outports; i++) { 71 m_port_requests[i].resize(m_num_inports); 72 m_vc_winners[i].resize(m_num_inports); 73 74 m_round_robin_inport[i] = 0; 75 76 for (int j = 0; j < m_num_inports; j++) { 77 m_port_requests[i][j] = false; // [outport][inport] 78 } 79 } 80} 81 82/* 83 * The wakeup function of the SwitchAllocator performs a 2-stage 84 * seperable switch allocation. At the end of the 2nd stage, a free 85 * output VC is assigned to the winning flits of each output port. 86 * There is no separate VCAllocator stage like the one in garnet1.0. 87 * At the end of this function, the router is rescheduled to wakeup 88 * next cycle for peforming SA for any flits ready next cycle. 89 */ 90 91void 92SwitchAllocator::wakeup() 93{ 94 arbitrate_inports(); // First stage of allocation 95 arbitrate_outports(); // Second stage of allocation 96 97 clear_request_vector(); 98 check_for_wakeup(); 99} 100 101/* 102 * SA-I (or SA-i) loops through all input VCs at every input port, 103 * and selects one in a round robin manner. 104 * - For HEAD/HEAD_TAIL flits only selects an input VC whose output port 105 * has at least one free output VC. 106 * - For BODY/TAIL flits, only selects an input VC that has credits 107 * in its output VC. 108 * Places a request for the output port from this input VC. 109 */ 110 111void 112SwitchAllocator::arbitrate_inports() 113{ 114 // Select a VC from each input in a round robin manner 115 // Independent arbiter at each input port 116 for (int inport = 0; inport < m_num_inports; inport++) { 117 int invc = m_round_robin_invc[inport]; 118 119 // Select next round robin vc candidate within valid vnet 120 int next_round_robin_invc = invc; 121 next_round_robin_invc++; 122 if (next_round_robin_invc >= m_num_vcs) 123 next_round_robin_invc = 0; 124 m_round_robin_invc[inport] = next_round_robin_invc; 125 126 for (int invc_iter = 0; invc_iter < m_num_vcs; invc_iter++) { 127 128 if (m_input_unit[inport]->need_stage(invc, SA_, 129 m_router->curCycle())) { 130 131 // This flit is in SA stage 132 133 int outport = m_input_unit[inport]->get_outport(invc); 134 int outvc = m_input_unit[inport]->get_outvc(invc); 135 136 // check if the flit in this InputVC is allowed to be sent 137 // send_allowed conditions described in that function. 138 bool make_request = 139 send_allowed(inport, invc, outport, outvc); 140 141 if (make_request) { 142 m_input_arbiter_activity++; 143 m_port_requests[outport][inport] = true; 144 m_vc_winners[outport][inport]= invc; 145 break; // got one vc winner for this port 146 } 147 } 148 149 invc++; 150 if (invc >= m_num_vcs) 151 invc = 0; 152 } 153 } 154} 155 156/* 157 * SA-II (or SA-o) loops through all output ports, 158 * and selects one input VC (that placed a request during SA-I) 159 * as the winner for this output port in a round robin manner. 160 * - For HEAD/HEAD_TAIL flits, performs simplified outvc allocation. 161 * (i.e., select a free VC from the output port). 162 * - For BODY/TAIL flits, decrement a credit in the output vc. 163 * The winning flit is read out from the input VC and sent to the 164 * CrossbarSwitch. 165 * An increment_credit signal is sent from the InputUnit 166 * to the upstream router. For HEAD_TAIL/TAIL flits, is_free_signal in the 167 * credit is set to true. 168 */ 169 170void 171SwitchAllocator::arbitrate_outports() 172{ 173 // Now there are a set of input vc requests for output vcs. 174 // Again do round robin arbitration on these requests 175 // Independent arbiter at each output port 176 for (int outport = 0; outport < m_num_outports; outport++) { 177 int inport = m_round_robin_inport[outport]; 178 m_round_robin_inport[outport]++; 179 180 if (m_round_robin_inport[outport] >= m_num_inports) 181 m_round_robin_inport[outport] = 0; 182 183 for (int inport_iter = 0; inport_iter < m_num_inports; 184 inport_iter++) { 185 186 // inport has a request this cycle for outport 187 if (m_port_requests[outport][inport]) { 188 189 // grant this outport to this inport 190 int invc = m_vc_winners[outport][inport]; 191 192 int outvc = m_input_unit[inport]->get_outvc(invc); 193 if (outvc == -1) { 194 // VC Allocation - select any free VC from outport 195 outvc = vc_allocate(outport, inport, invc); 196 } 197 198 // remove flit from Input VC 199 flit *t_flit = m_input_unit[inport]->getTopFlit(invc); 200 201 DPRINTF(RubyNetwork, "SwitchAllocator at Router %d " 202 "granted outvc %d at outport %d " 203 "to invc %d at inport %d to flit %s at " 204 "time: %lld\n", 205 m_router->get_id(), outvc, 206 m_router->getPortDirectionName( 207 m_output_unit[outport]->get_direction()), 208 invc, 209 m_router->getPortDirectionName( 210 m_input_unit[inport]->get_direction()), 211 *t_flit, 212 m_router->curCycle()); 213 214 215 // Update outport field in the flit since this is 216 // used by CrossbarSwitch code to send it out of 217 // correct outport. 218 // Note: post route compute in InputUnit, 219 // outport is updated in VC, but not in flit 220 t_flit->set_outport(outport); 221 222 // set outvc (i.e., invc for next hop) in flit 223 // (This was updated in VC by vc_allocate, but not in flit) 224 t_flit->set_vc(outvc); 225 226 // decrement credit in outvc 227 m_output_unit[outport]->decrement_credit(outvc); 228 229 // flit ready for Switch Traversal 230 t_flit->advance_stage(ST_, m_router->curCycle()); 231 m_router->grant_switch(inport, t_flit); 232 m_output_arbiter_activity++; 233 234 if ((t_flit->get_type() == TAIL_) || 235 t_flit->get_type() == HEAD_TAIL_) { 236 237 // This Input VC should now be empty 238 assert(!(m_input_unit[inport]->isReady(invc, 239 m_router->curCycle()))); 240 241 // Free this VC 242 m_input_unit[inport]->set_vc_idle(invc, 243 m_router->curCycle()); 244 245 // Send a credit back 246 // along with the information that this VC is now idle 247 m_input_unit[inport]->increment_credit(invc, true, 248 m_router->curCycle()); 249 } else { 250 // Send a credit back 251 // but do not indicate that the VC is idle 252 m_input_unit[inport]->increment_credit(invc, false, 253 m_router->curCycle()); 254 } 255 256 // remove this request 257 m_port_requests[outport][inport] = false; 258 259 break; // got a input winner for this outport 260 } 261 262 inport++; 263 if (inport >= m_num_inports) 264 inport = 0; 265 } 266 } 267} 268 269/* 270 * A flit can be sent only if 271 * (1) there is at least one free output VC at the 272 * output port (for HEAD/HEAD_TAIL), 273 * or 274 * (2) if there is at least one credit (i.e., buffer slot) 275 * within the VC for BODY/TAIL flits of multi-flit packets. 276 * and 277 * (3) pt-to-pt ordering is not violated in ordered vnets, i.e., 278 * there should be no other flit in this input port 279 * within an ordered vnet 280 * that arrived before this flit and is requesting the same output port. 281 */ 282 283bool 284SwitchAllocator::send_allowed(int inport, int invc, int outport, int outvc) 285{ 286 // Check if outvc needed 287 // Check if credit needed (for multi-flit packet) 288 // Check if ordering violated (in ordered vnet) 289 290 int vnet = get_vnet(invc); 291 bool has_outvc = (outvc != -1); 292 bool has_credit = false; 293 294 if (!has_outvc) { 295 296 // needs outvc 297 // this is only true for HEAD and HEAD_TAIL flits. 298 299 if (m_output_unit[outport]->has_free_vc(vnet)) { 300 301 has_outvc = true; 302 303 // each VC has at least one buffer, 304 // so no need for additional credit check 305 has_credit = true; 306 } 307 } else { 308 has_credit = m_output_unit[outport]->has_credit(outvc); 309 } 310 311 // cannot send if no outvc or no credit. 312 if (!has_outvc || !has_credit) 313 return false; 314 315 316 // protocol ordering check 317 if ((m_router->get_net_ptr())->isVNetOrdered(vnet)) { 318 319 // enqueue time of this flit 320 Cycles t_enqueue_time = m_input_unit[inport]->get_enqueue_time(invc); 321 322 // check if any other flit is ready for SA and for same output port 323 // and was enqueued before this flit 324 int vc_base = vnet*m_vc_per_vnet; 325 for (int vc_offset = 0; vc_offset < m_vc_per_vnet; vc_offset++) { 326 int temp_vc = vc_base + vc_offset; 327 if (m_input_unit[inport]->need_stage(temp_vc, SA_, 328 m_router->curCycle()) && 329 (m_input_unit[inport]->get_outport(temp_vc) == outport) && 330 (m_input_unit[inport]->get_enqueue_time(temp_vc) < 331 t_enqueue_time)) { 332 return false; 333 } 334 } 335 } 336 337 return true; 338} 339 340// Assign a free VC to the winner of the output port. 341int 342SwitchAllocator::vc_allocate(int outport, int inport, int invc) 343{ 344 // Select a free VC from the output port 345 int outvc = m_output_unit[outport]->select_free_vc(get_vnet(invc)); 346 347 // has to get a valid VC since it checked before performing SA 348 assert(outvc != -1); 349 m_input_unit[inport]->grant_outvc(invc, outvc); 350 return outvc; 351} 352 353// Wakeup the router next cycle to perform SA again 354// if there are flits ready. 355void 356SwitchAllocator::check_for_wakeup() 357{ 358 Cycles nextCycle = m_router->curCycle() + Cycles(1); 359 360 for (int i = 0; i < m_num_inports; i++) { 361 for (int j = 0; j < m_num_vcs; j++) { 362 if (m_input_unit[i]->need_stage(j, SA_, nextCycle)) { 363 m_router->schedule_wakeup(Cycles(1)); 364 return; 365 } 366 } 367 } 368} 369 370int 371SwitchAllocator::get_vnet(int invc) 372{ 373 int vnet = invc/m_vc_per_vnet; 374 assert(vnet < m_router->get_num_vnets()); 375 return vnet; 376} 377 378 379// Clear the request vector within the allocator at end of SA-II. 380// Was populated by SA-I. 381void 382SwitchAllocator::clear_request_vector() 383{ 384 for (int i = 0; i < m_num_outports; i++) { 385 for (int j = 0; j < m_num_inports; j++) { 386 m_port_requests[i][j] = false; 387 } 388 } 389} 390