Topology.cc revision 6154
1 2/* 3 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer; 10 * redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution; 13 * neither the name of the copyright holders nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* 31 * Topology.C 32 * 33 * Description: See Topology.h 34 * 35 * $Id$ 36 * 37 * */ 38 39#include "mem/ruby/network/simple/Topology.hh" 40#include "mem/ruby/common/NetDest.hh" 41#include "mem/ruby/network/Network.hh" 42#include "mem/protocol/TopologyType.hh" 43#include "mem/ruby/config/RubyConfig.hh" 44#include "mem/gems_common/util.hh" 45#include "mem/protocol/MachineType.hh" 46#include "mem/protocol/Protocol.hh" 47#include <string> 48 49static const int INFINITE_LATENCY = 10000; // Yes, this is a big hack 50static const int DEFAULT_BW_MULTIPLIER = 1; // Just to be consistent with above :) 51 52// Note: In this file, we use the first 2*m_nodes SwitchIDs to 53// represent the input and output endpoint links. These really are 54// not 'switches', as they will not have a Switch object allocated for 55// them. The first m_nodes SwitchIDs are the links into the network, 56// the second m_nodes set of SwitchIDs represent the the output queues 57// of the network. 58 59// Helper functions based on chapter 29 of Cormen et al. 60static Matrix extend_shortest_path(const Matrix& current_dist, Matrix& latencies, Matrix& inter_switches); 61static Matrix shortest_path(const Matrix& weights, Matrix& latencies, Matrix& inter_switches); 62static bool link_is_shortest_path_to_node(SwitchID src, SwitchID next, SwitchID final, const Matrix& weights, const Matrix& dist); 63static NetDest shortest_path_to_node(SwitchID src, SwitchID next, const Matrix& weights, const Matrix& dist); 64 65 66Topology::Topology(Network* network_ptr, int number_of_nodes) 67{ 68 m_network_ptr = network_ptr; 69 m_nodes = number_of_nodes; 70 m_number_of_switches = 0; 71 init(); 72} 73 74void Topology::init() 75{ 76 if (m_nodes == 1) { 77 SwitchID id = newSwitchID(); 78 addLink(0, id, NETWORK_LINK_LATENCY); 79 addLink(id, 1, NETWORK_LINK_LATENCY); 80 return; 81 } 82 83 // topology-specific set-up 84 TopologyType topology = string_to_TopologyType(g_NETWORK_TOPOLOGY); 85 switch (topology) { 86 case TopologyType_TORUS_2D: 87 make2DTorus(); 88 break; 89 case TopologyType_HIERARCHICAL_SWITCH: 90 makeHierarchicalSwitch(FAN_OUT_DEGREE); 91 break; 92 case TopologyType_CROSSBAR: 93 makeHierarchicalSwitch(1024); 94 break; 95 case TopologyType_PT_TO_PT: 96 makePtToPt(); 97 break; 98 case TopologyType_FILE_SPECIFIED: 99 makeFileSpecified(); 100 break; 101 default: 102 ERROR_MSG("Unexpected typology type") 103 } 104 105 // initialize component latencies record 106 m_component_latencies.setSize(0); 107 m_component_inter_switches.setSize(0); 108} 109 110void Topology::makeSwitchesPerChip(Vector< Vector < SwitchID > > &nodePairs, Vector<int> &latencies, Vector<int> &bw_multis, int numberOfChipSwitches) 111{ 112 113 Vector < SwitchID > nodes; // temporary buffer 114 nodes.setSize(2); 115 116 Vector<bool> endpointConnectionExist; // used to ensure all endpoints are connected to the network 117 endpointConnectionExist.setSize(m_nodes); 118 // initialize endpoint check vector 119 for (int k = 0; k < endpointConnectionExist.size(); k++) { 120 endpointConnectionExist[k] = false; 121 } 122 123 Vector<int> componentCount; 124 componentCount.setSize(MachineType_NUM); 125 for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { 126 componentCount[mType] = 0; 127 } 128 129 // components to/from network links 130 for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { 131 for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { 132 for (int component = 0; component < MachineType_chip_count(mType, chip); component++) { 133 134 int latency = -1; 135 int bw_multiplier = -1; // internal link bw multiplier of the global bandwidth 136 if (mType != MachineType_Directory) { 137 latency = ON_CHIP_LINK_LATENCY; // internal link latency 138 bw_multiplier = 10; // internal link bw multiplier of the global bandwidth 139 } else { 140 latency = NETWORK_LINK_LATENCY; // local memory latency 141 bw_multiplier = 1; // local memory link bw multiplier of the global bandwidth 142 } 143 nodes[0] = MachineType_base_number(mType)+componentCount[mType]; 144 nodes[1] = chip+m_nodes*2; // this is the chip's internal switch id # 145 146 // insert link 147 nodePairs.insertAtBottom(nodes); 148 latencies.insertAtBottom(latency); 149 //bw_multis.insertAtBottom(bw_multiplier); 150 bw_multis.insertAtBottom(componentCount[mType]+MachineType_base_number((MachineType)mType)); 151 152 // opposite direction link 153 Vector < SwitchID > otherDirectionNodes; 154 otherDirectionNodes.setSize(2); 155 otherDirectionNodes[0] = nodes[1]; 156 otherDirectionNodes[1] = nodes[0]+m_nodes; 157 nodePairs.insertAtBottom(otherDirectionNodes); 158 latencies.insertAtBottom(latency); 159 bw_multis.insertAtBottom(bw_multiplier); 160 161 assert(!endpointConnectionExist[nodes[0]]); 162 endpointConnectionExist[nodes[0]] = true; 163 componentCount[mType]++; 164 } 165 } 166 } 167 168 // make sure all enpoints are connected in the soon to be created network 169 for (int k = 0; k < endpointConnectionExist.size(); k++) { 170 if (endpointConnectionExist[k] == false) { 171 cerr << "Error: Unconnected Endpoint: " << k << endl; 172 exit(1); 173 } 174 } 175 176 // secondary check to ensure we saw the correct machine counts 177 for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { 178 assert(componentCount[mType] == MachineType_base_count((MachineType)mType)); 179 } 180 181} 182 183// 2D torus topology 184 185void Topology::make2DTorus() 186{ 187 Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file 188 Vector<int> latencies; // link latencies for each link extracted 189 Vector<int> bw_multis; // bw multipliers for each link extracted 190 191 Vector < SwitchID > nodes; // temporary buffer 192 nodes.setSize(2); 193 194 // number of inter-chip switches 195 int numberOfTorusSwitches = m_nodes/MachineType_base_level(MachineType_NUM); 196 // one switch per machine node grouping 197 Vector<SwitchID> torusSwitches; 198 for(int i=0; i<numberOfTorusSwitches; i++){ 199 SwitchID new_switch = newSwitchID(); 200 torusSwitches.insertAtBottom(new_switch); 201 } 202 203 makeSwitchesPerChip(nodePairs, latencies, bw_multis, numberOfTorusSwitches); 204 205 int lengthOfSide = (int)sqrt((double)numberOfTorusSwitches); 206 207 // Now connect the inter-chip torus links 208 209 int latency = NETWORK_LINK_LATENCY; // external link latency 210 int bw_multiplier = 1; // external link bw multiplier of the global bandwidth 211 212 for(int i=0; i<numberOfTorusSwitches; i++){ 213 nodes[0] = torusSwitches[i]; // current switch 214 215 // left 216 if(nodes[0]%lengthOfSide == 0){ // determine left neighbor 217 nodes[1] = nodes[0] - 1 + lengthOfSide; 218 } else { 219 nodes[1] = nodes[0] - 1; 220 } 221 nodePairs.insertAtBottom(nodes); 222 latencies.insertAtBottom(latency); 223 bw_multis.insertAtBottom(bw_multiplier); 224 225 // right 226 if((nodes[0] + 1)%lengthOfSide == 0){ // determine right neighbor 227 nodes[1] = nodes[0] + 1 - lengthOfSide; 228 } else { 229 nodes[1] = nodes[0] + 1; 230 } 231 nodePairs.insertAtBottom(nodes); 232 latencies.insertAtBottom(latency); 233 bw_multis.insertAtBottom(bw_multiplier); 234 235 // top 236 if(nodes[0] - lengthOfSide < 2*m_nodes){ // determine if node is on the top 237 nodes[1] = nodes[0] - lengthOfSide + (lengthOfSide*lengthOfSide); 238 } else { 239 nodes[1] = nodes[0] - lengthOfSide; 240 } 241 nodePairs.insertAtBottom(nodes); 242 latencies.insertAtBottom(latency); 243 bw_multis.insertAtBottom(bw_multiplier); 244 245 // bottom 246 if(nodes[0] + lengthOfSide >= 2*m_nodes+numberOfTorusSwitches){ // determine if node is on the bottom 247 // sorin: bad bug if this is a > instead of a >= 248 nodes[1] = nodes[0] + lengthOfSide - (lengthOfSide*lengthOfSide); 249 } else { 250 nodes[1] = nodes[0] + lengthOfSide; 251 } 252 nodePairs.insertAtBottom(nodes); 253 latencies.insertAtBottom(latency); 254 bw_multis.insertAtBottom(bw_multiplier); 255 256 } 257 258 // add links 259 ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size()) 260 for (int k = 0; k < nodePairs.size(); k++) { 261 ASSERT(nodePairs[k].size() == 2); 262 addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k]); 263 } 264 265} 266 267// hierarchical switch topology 268void Topology::makeHierarchicalSwitch(int fan_out_degree) 269{ 270 // Make a row of switches with only one input. This extra row makes 271 // sure the links out of the nodes have latency and limited 272 // bandwidth. 273 274 // number of inter-chip switches, i.e. the last row of switches 275 Vector<SwitchID> last_level; 276 for (int i=0; i<m_nodes; i++) { 277 SwitchID new_switch = newSwitchID(); // internal switch id # 278 addLink(i, new_switch, NETWORK_LINK_LATENCY); 279 last_level.insertAtBottom(new_switch); 280 } 281 282 // Create Hierarchical Switches 283 284 // start from the bottom level and work up to root 285 Vector<SwitchID> next_level; 286 while(last_level.size() > 1) { 287 for (int i=0; i<last_level.size(); i++) { 288 if ((i % fan_out_degree) == 0) { 289 next_level.insertAtBottom(newSwitchID()); 290 } 291 // Add this link to the last switch we created 292 addLink(last_level[i], next_level[next_level.size()-1], NETWORK_LINK_LATENCY); 293 } 294 295 // Make the current level the last level to get ready for next 296 // iteration 297 last_level = next_level; 298 next_level.clear(); 299 } 300 301 SwitchID root_switch = last_level[0]; 302 303 Vector<SwitchID> out_level; 304 for (int i=0; i<m_nodes; i++) { 305 out_level.insertAtBottom(m_nodes+i); 306 } 307 308 // Build the down network from the endpoints to the root 309 while(out_level.size() != 1) { 310 311 // A level of switches 312 for (int i=0; i<out_level.size(); i++) { 313 if ((i % fan_out_degree) == 0) { 314 if (out_level.size() > fan_out_degree) { 315 next_level.insertAtBottom(newSwitchID()); 316 } else { 317 next_level.insertAtBottom(root_switch); 318 } 319 } 320 // Add this link to the last switch we created 321 addLink(next_level[next_level.size()-1], out_level[i], NETWORK_LINK_LATENCY); 322 } 323 324 // Make the current level the last level to get ready for next 325 // iteration 326 out_level = next_level; 327 next_level.clear(); 328 } 329} 330 331// one internal node per chip, point to point links between chips 332void Topology::makePtToPt() 333{ 334 Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file 335 Vector<int> latencies; // link latencies for each link extracted 336 Vector<int> bw_multis; // bw multipliers for each link extracted 337 338 Vector < SwitchID > nodes; 339 nodes.setSize(2); 340 341 // number of inter-chip switches 342 int numberOfChipSwitches = m_nodes/MachineType_base_level(MachineType_NUM); 343 // two switches per machine node grouping 344 // one intra-chip switch and one inter-chip switch per chip 345 for(int i=0; i<numberOfChipSwitches; i++){ 346 SwitchID new_switch = newSwitchID(); 347 new_switch = newSwitchID(); 348 } 349 350 makeSwitchesPerChip(nodePairs, latencies, bw_multis, numberOfChipSwitches); 351 352 // connect intra-chip switch to inter-chip switch 353 for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { 354 355 int latency = ON_CHIP_LINK_LATENCY; // internal link latency 356 int bw_multiplier = 10; // external link bw multiplier of the global bandwidth 357 358 nodes[0] = chip+m_nodes*2; 359 nodes[1] = chip+m_nodes*2+RubyConfig::numberOfChips(); 360 361 // insert link 362 nodePairs.insertAtBottom(nodes); 363 latencies.insertAtBottom(latency); 364 bw_multis.insertAtBottom(bw_multiplier); 365 366 // opposite direction link 367 Vector < SwitchID > otherDirectionNodes; 368 otherDirectionNodes.setSize(2); 369 otherDirectionNodes[0] = nodes[1]; 370 otherDirectionNodes[1] = nodes[0]; 371 nodePairs.insertAtBottom(otherDirectionNodes); 372 latencies.insertAtBottom(latency); 373 bw_multis.insertAtBottom(bw_multiplier); 374 } 375 376 // point-to-point network between chips 377 for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { 378 for (int other_chip = chip+1; other_chip < RubyConfig::numberOfChips(); other_chip++) { 379 380 int latency = NETWORK_LINK_LATENCY; // external link latency 381 int bw_multiplier = 1; // external link bw multiplier of the global bandwidth 382 383 nodes[0] = chip+m_nodes*2+RubyConfig::numberOfChips(); 384 nodes[1] = other_chip+m_nodes*2+RubyConfig::numberOfChips(); 385 386 // insert link 387 nodePairs.insertAtBottom(nodes); 388 latencies.insertAtBottom(latency); 389 bw_multis.insertAtBottom(bw_multiplier); 390 391 // opposite direction link 392 Vector < SwitchID > otherDirectionNodes; 393 otherDirectionNodes.setSize(2); 394 otherDirectionNodes[0] = nodes[1]; 395 otherDirectionNodes[1] = nodes[0]; 396 nodePairs.insertAtBottom(otherDirectionNodes); 397 latencies.insertAtBottom(latency); 398 bw_multis.insertAtBottom(bw_multiplier); 399 } 400 } 401 402 // add links 403 ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size()) 404 for (int k = 0; k < nodePairs.size(); k++) { 405 ASSERT(nodePairs[k].size() == 2); 406 addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k]); 407 } 408} 409 410// make a network as described by the networkFile 411void Topology::makeFileSpecified() 412{ 413 414 Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file 415 Vector<int> latencies; // link latencies for each link extracted 416 Vector<int> bw_multis; // bw multipliers for each link extracted 417 Vector<int> weights; // link weights used to enfore e-cube deadlock free routing 418 Vector< SwitchID > int_network_switches; // internal switches extracted from the file 419 Vector<bool> endpointConnectionExist; // used to ensure all endpoints are connected to the network 420 421 endpointConnectionExist.setSize(m_nodes); 422 423 // initialize endpoint check vector 424 for (int k = 0; k < endpointConnectionExist.size(); k++) { 425 endpointConnectionExist[k] = false; 426 } 427 428 string filename = "network/simple/Network_Files/"; 429 filename = filename+g_CACHE_DESIGN 430 +"_Procs-"+int_to_string(RubyConfig::numberOfProcessors()) 431 +"_ProcsPerChip-"+int_to_string(RubyConfig::numberOfProcsPerChip()) 432 +"_L2Banks-"+int_to_string(RubyConfig::numberOfL2Cache()) 433 +"_Memories-"+int_to_string(RubyConfig::numberOfMemories()) 434 +".txt"; 435 436 ifstream networkFile( filename.c_str() , ios::in); 437 if (!networkFile.is_open()) { 438 cerr << "Error: Could not open network file: " << filename << endl; 439 cerr << "Probably no network file exists for " << RubyConfig::numberOfProcessors() 440 << " processors and " << RubyConfig::numberOfProcsPerChip() << " procs per chip " << endl; 441 exit(1); 442 } 443 444 string line = ""; 445 446 while (!networkFile.eof()) { 447 448 Vector < SwitchID > nodes; 449 nodes.setSize(2); 450 int latency = -1; // null latency 451 int weight = -1; // null weight 452 int bw_multiplier = DEFAULT_BW_MULTIPLIER; // default multiplier incase the network file doesn't define it 453 int i = 0; // node pair index 454 int varsFound = 0; // number of varsFound on the line 455 int internalNodes = 0; // used to determine if the link is between 2 internal nodes 456 std::getline(networkFile, line, '\n'); 457 string varStr = string_split(line, ' '); 458 459 // parse the current line in the file 460 while (varStr != "") { 461 string label = string_split(varStr, ':'); 462 463 // valid node labels 464 if (label == "ext_node" || label == "int_node") { 465 ASSERT(i < 2); // one link between 2 switches per line 466 varsFound++; 467 bool isNewIntSwitch = true; 468 if (label == "ext_node") { // input link to node 469 MachineType machine = string_to_MachineType(string_split(varStr, ':')); 470 string nodeStr = string_split(varStr, ':'); 471 if (string_split(varStr, ':') == "bank") { 472 nodes[i] = MachineType_base_number(machine) 473 + atoi(nodeStr.c_str()) 474 + atoi((string_split(varStr, ':')).c_str())*RubyConfig::numberOfChips(); 475 } else { 476 nodes[i] = MachineType_base_number(machine) 477 + atoi(nodeStr.c_str()); 478 } 479 // in nodes should be numbered 0 to m_nodes-1 480 ASSERT(nodes[i] >= 0 && nodes[i] < m_nodes); 481 isNewIntSwitch = false; 482 endpointConnectionExist[nodes[i]] = true; 483 } 484 if (label == "int_node") { // interior node 485 nodes[i] = atoi((string_split(varStr, ':')).c_str())+m_nodes*2; 486 // in nodes should be numbered >= m_nodes*2 487 ASSERT(nodes[i] >= m_nodes*2); 488 for (int k = 0; k < int_network_switches.size(); k++) { 489 if (int_network_switches[k] == nodes[i]) { 490 isNewIntSwitch = false; 491 } 492 } 493 if (isNewIntSwitch) { // if internal switch 494 m_number_of_switches++; 495 int_network_switches.insertAtBottom(nodes[i]); 496 } 497 internalNodes++; 498 } 499 i++; 500 } else if (label == "link_latency") { 501 latency = atoi((string_split(varStr, ':')).c_str()); 502 varsFound++; 503 } else if (label == "bw_multiplier") { // not necessary, defaults to DEFAULT_BW_MULTIPLIER 504 bw_multiplier = atoi((string_split(varStr, ':')).c_str()); 505 } else if (label == "link_weight") { // not necessary, defaults to link_latency 506 weight = atoi((string_split(varStr, ':')).c_str()); 507 } else if (label == "processors") { 508 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfProcessors()); 509 } else if (label == "bw_unit") { 510 ASSERT(atoi((string_split(varStr, ':')).c_str()) == g_endpoint_bandwidth); 511 } else if (label == "procs_per_chip") { 512 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfProcsPerChip()); 513 } else if (label == "L2banks") { 514 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfL2Cache()); 515 } else if (label == "memories") { 516 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfMemories()); 517 } else { 518 cerr << "Error: Unexpected Identifier: " << label << endl; 519 exit(1); 520 } 521 varStr = string_split(line, ' '); 522 } 523 if (varsFound == 3) { // all three necessary link variables where found so add the link 524 nodePairs.insertAtBottom(nodes); 525 latencies.insertAtBottom(latency); 526 if (weight != -1) { 527 weights.insertAtBottom(weight); 528 } else { 529 weights.insertAtBottom(latency); 530 } 531 bw_multis.insertAtBottom(bw_multiplier); 532 Vector < SwitchID > otherDirectionNodes; 533 otherDirectionNodes.setSize(2); 534 otherDirectionNodes[0] = nodes[1]; 535 if (internalNodes == 2) { // this is an internal link 536 otherDirectionNodes[1] = nodes[0]; 537 } else { 538 otherDirectionNodes[1] = nodes[0]+m_nodes; 539 } 540 nodePairs.insertAtBottom(otherDirectionNodes); 541 latencies.insertAtBottom(latency); 542 if (weight != -1) { 543 weights.insertAtBottom(weight); 544 } else { 545 weights.insertAtBottom(latency); 546 } 547 bw_multis.insertAtBottom(bw_multiplier); 548 } else { 549 if (varsFound != 0) { // if this is not a valid link, then no vars should have been found 550 cerr << "Error in line: " << line << endl; 551 exit(1); 552 } 553 } 554 } // end of file 555 556 // makes sure all enpoints are connected in the soon to be created network 557 for (int k = 0; k < endpointConnectionExist.size(); k++) { 558 if (endpointConnectionExist[k] == false) { 559 cerr << "Error: Unconnected Endpoint: " << k << endl; 560 exit(1); 561 } 562 } 563 564 ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size() && latencies.size() == weights.size()) 565 for (int k = 0; k < nodePairs.size(); k++) { 566 ASSERT(nodePairs[k].size() == 2); 567 addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k], weights[k]); 568 } 569 570 networkFile.close(); 571} 572 573void Topology::createLinks(bool isReconfiguration) 574{ 575 // Find maximum switchID 576 577 SwitchID max_switch_id = 0; 578 for (int i=0; i<m_links_src_vector.size(); i++) { 579 max_switch_id = max(max_switch_id, m_links_src_vector[i]); 580 max_switch_id = max(max_switch_id, m_links_dest_vector[i]); 581 } 582 583 // Initialize weight vector 584 Matrix topology_weights; 585 Matrix topology_latency; 586 Matrix topology_bw_multis; 587 int num_switches = max_switch_id+1; 588 topology_weights.setSize(num_switches); 589 topology_latency.setSize(num_switches); 590 topology_bw_multis.setSize(num_switches); 591 m_component_latencies.setSize(num_switches); // FIXME setting the size of a member variable here is a HACK! 592 m_component_inter_switches.setSize(num_switches); // FIXME setting the size of a member variable here is a HACK! 593 for(int i=0; i<topology_weights.size(); i++) { 594 topology_weights[i].setSize(num_switches); 595 topology_latency[i].setSize(num_switches); 596 topology_bw_multis[i].setSize(num_switches); 597 m_component_latencies[i].setSize(num_switches); 598 m_component_inter_switches[i].setSize(num_switches); // FIXME setting the size of a member variable here is a HACK! 599 for(int j=0; j<topology_weights[i].size(); j++) { 600 topology_weights[i][j] = INFINITE_LATENCY; 601 topology_latency[i][j] = -1; // initialize to an invalid value 602 topology_bw_multis[i][j] = -1; // initialize to an invalid value 603 m_component_latencies[i][j] = -1; // initialize to an invalid value 604 m_component_inter_switches[i][j] = 0; // initially assume direct connections / no intermediate switches between components 605 } 606 } 607 608 // Set identity weights to zero 609 for(int i=0; i<topology_weights.size(); i++) { 610 topology_weights[i][i] = 0; 611 } 612 613 // Fill in the topology weights and bandwidth multipliers 614 for (int i=0; i<m_links_src_vector.size(); i++) { 615 topology_weights[m_links_src_vector[i]][m_links_dest_vector[i]] = m_links_weight_vector[i]; 616 topology_latency[m_links_src_vector[i]][m_links_dest_vector[i]] = m_links_latency_vector[i]; 617 m_component_latencies[m_links_src_vector[i]][m_links_dest_vector[i]] = m_links_latency_vector[i]; // initialize to latency vector 618 topology_bw_multis[m_links_src_vector[i]][m_links_dest_vector[i]] = m_bw_multiplier_vector[i]; 619 } 620 621 // Walk topology and hookup the links 622 Matrix dist = shortest_path(topology_weights, m_component_latencies, m_component_inter_switches); 623 for(int i=0; i<topology_weights.size(); i++) { 624 for(int j=0; j<topology_weights[i].size(); j++) { 625 int weight = topology_weights[i][j]; 626 int bw_multiplier = topology_bw_multis[i][j]; 627 int latency = topology_latency[i][j]; 628 if (weight > 0 && weight != INFINITE_LATENCY) { 629 NetDest destination_set = shortest_path_to_node(i, j, topology_weights, dist); 630 assert(latency != -1); 631 makeLink(i, j, destination_set, latency, weight, bw_multiplier, isReconfiguration); 632 } 633 } 634 } 635} 636 637SwitchID Topology::newSwitchID() 638{ 639 m_number_of_switches++; 640 return m_number_of_switches-1+m_nodes+m_nodes; 641} 642 643void Topology::addLink(SwitchID src, SwitchID dest, int link_latency) 644{ 645 addLink(src, dest, link_latency, DEFAULT_BW_MULTIPLIER, link_latency); 646} 647 648void Topology::addLink(SwitchID src, SwitchID dest, int link_latency, int bw_multiplier) 649{ 650 addLink(src, dest, link_latency, bw_multiplier, link_latency); 651} 652 653void Topology::addLink(SwitchID src, SwitchID dest, int link_latency, int bw_multiplier, int link_weight) 654{ 655 ASSERT(src <= m_number_of_switches+m_nodes+m_nodes); 656 ASSERT(dest <= m_number_of_switches+m_nodes+m_nodes); 657 m_links_src_vector.insertAtBottom(src); 658 m_links_dest_vector.insertAtBottom(dest); 659 m_links_latency_vector.insertAtBottom(link_latency); 660 m_links_weight_vector.insertAtBottom(link_weight); 661 m_bw_multiplier_vector.insertAtBottom(bw_multiplier); 662} 663 664void Topology::makeLink(SwitchID src, SwitchID dest, const NetDest& routing_table_entry, int link_latency, int link_weight, int bw_multiplier, bool isReconfiguration) 665{ 666 // Make sure we're not trying to connect two end-point nodes directly together 667 assert((src >= 2*m_nodes) || (dest >= 2*m_nodes)); 668 669 if (src < m_nodes) { 670 m_network_ptr->makeInLink(src, dest-(2*m_nodes), routing_table_entry, link_latency, bw_multiplier, isReconfiguration); 671 } else if (dest < 2*m_nodes) { 672 assert(dest >= m_nodes); 673 NodeID node = dest-m_nodes; 674 m_network_ptr->makeOutLink(src-(2*m_nodes), node, routing_table_entry, link_latency, link_weight, bw_multiplier, isReconfiguration); 675 } else { 676 assert((src >= 2*m_nodes) && (dest >= 2*m_nodes)); 677 m_network_ptr->makeInternalLink(src-(2*m_nodes), dest-(2*m_nodes), routing_table_entry, link_latency, link_weight, bw_multiplier, isReconfiguration); 678 } 679} 680 681void Topology::printConfig(ostream& out) const 682{ 683 assert(m_component_latencies.size() > 0); 684 685 out << "--- Begin Topology Print ---" << endl; 686 out << endl; 687 out << "Topology print ONLY indicates the _NETWORK_ latency between two machines" << endl; 688 out << "It does NOT include the latency within the machines" << endl; 689 out << endl; 690 for (int m=0; m<MachineType_NUM; m++) { 691 for (int i=0; i<MachineType_base_count((MachineType)m); i++) { 692 MachineID cur_mach = {(MachineType)m, i}; 693 out << cur_mach << " Network Latencies" << endl; 694 for (int n=0; n<MachineType_NUM; n++) { 695 for (int j=0; j<MachineType_base_count((MachineType)n); j++) { 696 MachineID dest_mach = {(MachineType)n, j}; 697 if (cur_mach != dest_mach) { 698 int link_latency = m_component_latencies[MachineType_base_number((MachineType)m)+i][MachineType_base_number(MachineType_NUM)+MachineType_base_number((MachineType)n)+j]; 699 int intermediate_switches = m_component_inter_switches[MachineType_base_number((MachineType)m)+i][MachineType_base_number(MachineType_NUM)+MachineType_base_number((MachineType)n)+j]; 700 out << " " << cur_mach << " -> " << dest_mach << " net_lat: " 701 << link_latency+intermediate_switches << endl; // NOTE switches are assumed to have single cycle latency 702 } 703 } 704 } 705 out << endl; 706 } 707 } 708 709 out << "--- End Topology Print ---" << endl; 710} 711 712/**************************************************************************/ 713 714// The following all-pairs shortest path algorithm is based on the 715// discussion from Cormen et al., Chapter 26.1. 716 717static void extend_shortest_path(Matrix& current_dist, Matrix& latencies, Matrix& inter_switches) 718{ 719 bool change = true; 720 int nodes = current_dist.size(); 721 722 while (change) { 723 change = false; 724 for (int i=0; i<nodes; i++) { 725 for (int j=0; j<nodes; j++) { 726 int minimum = current_dist[i][j]; 727 int previous_minimum = minimum; 728 int intermediate_switch = -1; 729 for (int k=0; k<nodes; k++) { 730 minimum = min(minimum, current_dist[i][k] + current_dist[k][j]); 731 if (previous_minimum != minimum) { 732 intermediate_switch = k; 733 inter_switches[i][j] = inter_switches[i][k] + inter_switches[k][j] + 1; 734 } 735 previous_minimum = minimum; 736 } 737 if (current_dist[i][j] != minimum) { 738 change = true; 739 current_dist[i][j] = minimum; 740 assert(intermediate_switch >= 0); 741 assert(intermediate_switch < latencies[i].size()); 742 latencies[i][j] = latencies[i][intermediate_switch] + latencies[intermediate_switch][j]; 743 } 744 } 745 } 746 } 747} 748 749static Matrix shortest_path(const Matrix& weights, Matrix& latencies, Matrix& inter_switches) 750{ 751 Matrix dist = weights; 752 extend_shortest_path(dist, latencies, inter_switches); 753 return dist; 754} 755 756static bool link_is_shortest_path_to_node(SwitchID src, SwitchID next, SwitchID final, 757 const Matrix& weights, const Matrix& dist) 758{ 759 return (weights[src][next] + dist[next][final] == dist[src][final]); 760} 761 762static NetDest shortest_path_to_node(SwitchID src, SwitchID next, 763 const Matrix& weights, const Matrix& dist) 764{ 765 NetDest result; 766 int d = 0; 767 int machines; 768 int max_machines; 769 770 machines = MachineType_NUM; 771 max_machines = MachineType_base_number(MachineType_NUM); 772 773 for (int m=0; m<machines; m++) { 774 for (int i=0; i<MachineType_base_count((MachineType)m); i++) { 775 // we use "d+max_machines" below since the "destination" switches for the machines are numbered 776 // [MachineType_base_number(MachineType_NUM)...2*MachineType_base_number(MachineType_NUM)-1] 777 // for the component network 778 if (link_is_shortest_path_to_node(src, next, 779 d+max_machines, 780 weights, dist)) { 781 MachineID mach = {(MachineType)m, i}; 782 result.add(mach); 783 } 784 d++; 785 } 786 } 787 788 DEBUG_MSG(NETWORK_COMP, MedPrio, "returning shortest path"); 789 DEBUG_EXPR(NETWORK_COMP, MedPrio, (src-(2*max_machines))); 790 DEBUG_EXPR(NETWORK_COMP, MedPrio, (next-(2*max_machines))); 791 DEBUG_EXPR(NETWORK_COMP, MedPrio, src); 792 DEBUG_EXPR(NETWORK_COMP, MedPrio, next); 793 DEBUG_EXPR(NETWORK_COMP, MedPrio, result); 794 DEBUG_NEWLINE(NETWORK_COMP, MedPrio); 795 796 return result; 797} 798 799