Topology.cc revision 6145
1 2/* 3 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer; 10 * redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution; 13 * neither the name of the copyright holders nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* 31 * Topology.C 32 * 33 * Description: See Topology.h 34 * 35 * $Id$ 36 * 37 * */ 38 39#include "Topology.hh" 40#include "NetDest.hh" 41#include "Network.hh" 42#include "TopologyType.hh" 43#include "RubyConfig.hh" 44#include "util.hh" 45#include "MachineType.hh" 46#include "Protocol.hh" 47#include <string> 48 49static const int INFINITE_LATENCY = 10000; // Yes, this is a big hack 50static const int DEFAULT_BW_MULTIPLIER = 1; // Just to be consistent with above :) 51 52// Note: In this file, we use the first 2*m_nodes SwitchIDs to 53// represent the input and output endpoint links. These really are 54// not 'switches', as they will not have a Switch object allocated for 55// them. The first m_nodes SwitchIDs are the links into the network, 56// the second m_nodes set of SwitchIDs represent the the output queues 57// of the network. 58 59// Helper functions based on chapter 29 of Cormen et al. 60static Matrix extend_shortest_path(const Matrix& current_dist, Matrix& latencies, Matrix& inter_switches); 61static Matrix shortest_path(const Matrix& weights, Matrix& latencies, Matrix& inter_switches); 62static bool link_is_shortest_path_to_node(SwitchID src, SwitchID next, SwitchID final, const Matrix& weights, const Matrix& dist); 63static NetDest shortest_path_to_node(SwitchID src, SwitchID next, const Matrix& weights, const Matrix& dist); 64 65 66Topology::Topology(Network* network_ptr, int number_of_nodes) 67{ 68 m_network_ptr = network_ptr; 69 m_nodes = number_of_nodes; 70 m_number_of_switches = 0; 71 init(); 72} 73 74void Topology::init() 75{ 76 if (m_nodes == 1) { 77 SwitchID id = newSwitchID(); 78 addLink(0, id, NETWORK_LINK_LATENCY); 79 addLink(id, 1, NETWORK_LINK_LATENCY); 80 return; 81 } 82 83 // topology-specific set-up 84 TopologyType topology = string_to_TopologyType(g_NETWORK_TOPOLOGY); 85 switch (topology) { 86 case TopologyType_TORUS_2D: 87 make2DTorus(); 88 break; 89 case TopologyType_HIERARCHICAL_SWITCH: 90 makeHierarchicalSwitch(FAN_OUT_DEGREE); 91 break; 92 case TopologyType_CROSSBAR: 93 makeHierarchicalSwitch(1024); 94 break; 95 case TopologyType_PT_TO_PT: 96 makePtToPt(); 97 break; 98 case TopologyType_FILE_SPECIFIED: 99 makeFileSpecified(); 100 break; 101 default: 102 ERROR_MSG("Unexpected typology type") 103 } 104 105 // initialize component latencies record 106 m_component_latencies.setSize(0); 107 m_component_inter_switches.setSize(0); 108} 109 110void Topology::makeSwitchesPerChip(Vector< Vector < SwitchID > > &nodePairs, Vector<int> &latencies, Vector<int> &bw_multis, int numberOfChipSwitches) 111{ 112 113 Vector < SwitchID > nodes; // temporary buffer 114 nodes.setSize(2); 115 116 Vector<bool> endpointConnectionExist; // used to ensure all endpoints are connected to the network 117 endpointConnectionExist.setSize(m_nodes); 118 // initialize endpoint check vector 119 for (int k = 0; k < endpointConnectionExist.size(); k++) { 120 endpointConnectionExist[k] = false; 121 } 122 123 Vector<int> componentCount; 124 componentCount.setSize(MachineType_NUM); 125 for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { 126 componentCount[mType] = 0; 127 } 128 129 // components to/from network links 130 for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { 131 for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { 132 for (int component = 0; component < MachineType_chip_count(mType, chip); component++) { 133 134 int latency = -1; 135 int bw_multiplier = -1; // internal link bw multiplier of the global bandwidth 136 if (mType != MachineType_Directory) { 137 latency = ON_CHIP_LINK_LATENCY; // internal link latency 138 bw_multiplier = 10; // internal link bw multiplier of the global bandwidth 139 } else { 140 latency = NETWORK_LINK_LATENCY; // local memory latency 141 bw_multiplier = 1; // local memory link bw multiplier of the global bandwidth 142 } 143 nodes[0] = MachineType_base_number(mType)+componentCount[mType]; 144 nodes[1] = chip+m_nodes*2; // this is the chip's internal switch id # 145 146 // insert link 147 nodePairs.insertAtBottom(nodes); 148 latencies.insertAtBottom(latency); 149 //bw_multis.insertAtBottom(bw_multiplier); 150 bw_multis.insertAtBottom(componentCount[mType]+MachineType_base_number((MachineType)mType)); 151 152 // opposite direction link 153 Vector < SwitchID > otherDirectionNodes; 154 otherDirectionNodes.setSize(2); 155 otherDirectionNodes[0] = nodes[1]; 156 otherDirectionNodes[1] = nodes[0]+m_nodes; 157 nodePairs.insertAtBottom(otherDirectionNodes); 158 latencies.insertAtBottom(latency); 159 bw_multis.insertAtBottom(bw_multiplier); 160 161 assert(!endpointConnectionExist[nodes[0]]); 162 endpointConnectionExist[nodes[0]] = true; 163 componentCount[mType]++; 164 } 165 } 166 } 167 168 // make sure all enpoints are connected in the soon to be created network 169 for (int k = 0; k < endpointConnectionExist.size(); k++) { 170 if (endpointConnectionExist[k] == false) { 171 cerr << "Error: Unconnected Endpoint: " << k << endl; 172 exit(1); 173 } 174 } 175 176 // secondary check to ensure we saw the correct machine counts 177 for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { 178 assert(componentCount[mType] == MachineType_base_count((MachineType)mType)); 179 } 180 181} 182 183// 2D torus topology 184 185void Topology::make2DTorus() 186{ 187 Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file 188 Vector<int> latencies; // link latencies for each link extracted 189 Vector<int> bw_multis; // bw multipliers for each link extracted 190 191 Vector < SwitchID > nodes; // temporary buffer 192 nodes.setSize(2); 193 194 // number of inter-chip switches 195 int numberOfTorusSwitches = m_nodes/MachineType_base_level(MachineType_NUM); 196 // one switch per machine node grouping 197 Vector<SwitchID> torusSwitches; 198 for(int i=0; i<numberOfTorusSwitches; i++){ 199 SwitchID new_switch = newSwitchID(); 200 torusSwitches.insertAtBottom(new_switch); 201 } 202 203 makeSwitchesPerChip(nodePairs, latencies, bw_multis, numberOfTorusSwitches); 204 205 int lengthOfSide = (int)sqrt((double)numberOfTorusSwitches); 206 207 // Now connect the inter-chip torus links 208 209 int latency = NETWORK_LINK_LATENCY; // external link latency 210 int bw_multiplier = 1; // external link bw multiplier of the global bandwidth 211 212 for(int i=0; i<numberOfTorusSwitches; i++){ 213 nodes[0] = torusSwitches[i]; // current switch 214 215 // left 216 if(nodes[0]%lengthOfSide == 0){ // determine left neighbor 217 nodes[1] = nodes[0] - 1 + lengthOfSide; 218 } else { 219 nodes[1] = nodes[0] - 1; 220 } 221 nodePairs.insertAtBottom(nodes); 222 latencies.insertAtBottom(latency); 223 bw_multis.insertAtBottom(bw_multiplier); 224 225 // right 226 if((nodes[0] + 1)%lengthOfSide == 0){ // determine right neighbor 227 nodes[1] = nodes[0] + 1 - lengthOfSide; 228 } else { 229 nodes[1] = nodes[0] + 1; 230 } 231 nodePairs.insertAtBottom(nodes); 232 latencies.insertAtBottom(latency); 233 bw_multis.insertAtBottom(bw_multiplier); 234 235 // top 236 if(nodes[0] - lengthOfSide < 2*m_nodes){ // determine if node is on the top 237 nodes[1] = nodes[0] - lengthOfSide + (lengthOfSide*lengthOfSide); 238 } else { 239 nodes[1] = nodes[0] - lengthOfSide; 240 } 241 nodePairs.insertAtBottom(nodes); 242 latencies.insertAtBottom(latency); 243 bw_multis.insertAtBottom(bw_multiplier); 244 245 // bottom 246 if(nodes[0] + lengthOfSide >= 2*m_nodes+numberOfTorusSwitches){ // determine if node is on the bottom 247 // sorin: bad bug if this is a > instead of a >= 248 nodes[1] = nodes[0] + lengthOfSide - (lengthOfSide*lengthOfSide); 249 } else { 250 nodes[1] = nodes[0] + lengthOfSide; 251 } 252 nodePairs.insertAtBottom(nodes); 253 latencies.insertAtBottom(latency); 254 bw_multis.insertAtBottom(bw_multiplier); 255 256 } 257 258 // add links 259 ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size()) 260 for (int k = 0; k < nodePairs.size(); k++) { 261 ASSERT(nodePairs[k].size() == 2); 262 addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k]); 263 } 264 265} 266 267// hierarchical switch topology 268void Topology::makeHierarchicalSwitch(int fan_out_degree) 269{ 270 // Make a row of switches with only one input. This extra row makes 271 // sure the links out of the nodes have latency and limited 272 // bandwidth. 273 274 // number of inter-chip switches, i.e. the last row of switches 275 Vector<SwitchID> last_level; 276 for (int i=0; i<m_nodes; i++) { 277 SwitchID new_switch = newSwitchID(); // internal switch id # 278 addLink(i, new_switch, NETWORK_LINK_LATENCY); 279 last_level.insertAtBottom(new_switch); 280 } 281 282 // Create Hierarchical Switches 283 284 // start from the bottom level and work up to root 285 Vector<SwitchID> next_level; 286 while(last_level.size() > 1) { 287 for (int i=0; i<last_level.size(); i++) { 288 if ((i % fan_out_degree) == 0) { 289 next_level.insertAtBottom(newSwitchID()); 290 } 291 // Add this link to the last switch we created 292 addLink(last_level[i], next_level[next_level.size()-1], NETWORK_LINK_LATENCY); 293 } 294 295 // Make the current level the last level to get ready for next 296 // iteration 297 last_level = next_level; 298 next_level.clear(); 299 } 300 301 SwitchID root_switch = last_level[0]; 302 303 Vector<SwitchID> out_level; 304 for (int i=0; i<m_nodes; i++) { 305 out_level.insertAtBottom(m_nodes+i); 306 } 307 308 // Build the down network from the endpoints to the root 309 while(out_level.size() != 1) { 310 311 // A level of switches 312 for (int i=0; i<out_level.size(); i++) { 313 if ((i % fan_out_degree) == 0) { 314 if (out_level.size() > fan_out_degree) { 315 next_level.insertAtBottom(newSwitchID()); 316 } else { 317 next_level.insertAtBottom(root_switch); 318 } 319 } 320 // Add this link to the last switch we created 321 addLink(next_level[next_level.size()-1], out_level[i], NETWORK_LINK_LATENCY); 322 } 323 324 // Make the current level the last level to get ready for next 325 // iteration 326 out_level = next_level; 327 next_level.clear(); 328 } 329} 330 331// one internal node per chip, point to point links between chips 332void Topology::makePtToPt() 333{ 334 Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file 335 Vector<int> latencies; // link latencies for each link extracted 336 Vector<int> bw_multis; // bw multipliers for each link extracted 337 338 Vector < SwitchID > nodes; 339 nodes.setSize(2); 340 341 // number of inter-chip switches 342 int numberOfChipSwitches = m_nodes/MachineType_base_level(MachineType_NUM); 343 // two switches per machine node grouping 344 // one intra-chip switch and one inter-chip switch per chip 345 for(int i=0; i<numberOfChipSwitches; i++){ 346 SwitchID new_switch = newSwitchID(); 347 new_switch = newSwitchID(); 348 } 349 350 makeSwitchesPerChip(nodePairs, latencies, bw_multis, numberOfChipSwitches); 351 352 // connect intra-chip switch to inter-chip switch 353 for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { 354 355 int latency = ON_CHIP_LINK_LATENCY; // internal link latency 356 int bw_multiplier = 10; // external link bw multiplier of the global bandwidth 357 358 nodes[0] = chip+m_nodes*2; 359 nodes[1] = chip+m_nodes*2+RubyConfig::numberOfChips(); 360 361 // insert link 362 nodePairs.insertAtBottom(nodes); 363 latencies.insertAtBottom(latency); 364 bw_multis.insertAtBottom(bw_multiplier); 365 366 // opposite direction link 367 Vector < SwitchID > otherDirectionNodes; 368 otherDirectionNodes.setSize(2); 369 otherDirectionNodes[0] = nodes[1]; 370 otherDirectionNodes[1] = nodes[0]; 371 nodePairs.insertAtBottom(otherDirectionNodes); 372 latencies.insertAtBottom(latency); 373 bw_multis.insertAtBottom(bw_multiplier); 374 } 375 376 // point-to-point network between chips 377 for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { 378 for (int other_chip = chip+1; other_chip < RubyConfig::numberOfChips(); other_chip++) { 379 380 int latency = NETWORK_LINK_LATENCY; // external link latency 381 int bw_multiplier = 1; // external link bw multiplier of the global bandwidth 382 383 nodes[0] = chip+m_nodes*2+RubyConfig::numberOfChips(); 384 nodes[1] = other_chip+m_nodes*2+RubyConfig::numberOfChips(); 385 386 // insert link 387 nodePairs.insertAtBottom(nodes); 388 latencies.insertAtBottom(latency); 389 bw_multis.insertAtBottom(bw_multiplier); 390 391 // opposite direction link 392 Vector < SwitchID > otherDirectionNodes; 393 otherDirectionNodes.setSize(2); 394 otherDirectionNodes[0] = nodes[1]; 395 otherDirectionNodes[1] = nodes[0]; 396 nodePairs.insertAtBottom(otherDirectionNodes); 397 latencies.insertAtBottom(latency); 398 bw_multis.insertAtBottom(bw_multiplier); 399 } 400 } 401 402 // add links 403 ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size()) 404 for (int k = 0; k < nodePairs.size(); k++) { 405 ASSERT(nodePairs[k].size() == 2); 406 addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k]); 407 } 408} 409 410// make a network as described by the networkFile 411void Topology::makeFileSpecified() 412{ 413 414 Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file 415 Vector<int> latencies; // link latencies for each link extracted 416 Vector<int> bw_multis; // bw multipliers for each link extracted 417 Vector<int> weights; // link weights used to enfore e-cube deadlock free routing 418 Vector< SwitchID > int_network_switches; // internal switches extracted from the file 419 Vector<bool> endpointConnectionExist; // used to ensure all endpoints are connected to the network 420 421 endpointConnectionExist.setSize(m_nodes); 422 423 // initialize endpoint check vector 424 for (int k = 0; k < endpointConnectionExist.size(); k++) { 425 endpointConnectionExist[k] = false; 426 } 427 428 string filename = "network/simple/Network_Files/"; 429 filename = filename+g_CACHE_DESIGN 430 +"_Procs-"+int_to_string(RubyConfig::numberOfProcessors()) 431 +"_ProcsPerChip-"+int_to_string(RubyConfig::numberOfProcsPerChip()) 432 +"_L2Banks-"+int_to_string(RubyConfig::numberOfL2Cache()) 433 +"_Memories-"+int_to_string(RubyConfig::numberOfMemories()) 434 +".txt"; 435 436 if (g_SIMICS) { 437 filename = "../../../ruby/"+filename; 438 } 439 ifstream networkFile( filename.c_str() , ios::in); 440 if (!networkFile.is_open()) { 441 cerr << "Error: Could not open network file: " << filename << endl; 442 cerr << "Probably no network file exists for " << RubyConfig::numberOfProcessors() 443 << " processors and " << RubyConfig::numberOfProcsPerChip() << " procs per chip " << endl; 444 exit(1); 445 } 446 447 string line = ""; 448 449 while (!networkFile.eof()) { 450 451 Vector < SwitchID > nodes; 452 nodes.setSize(2); 453 int latency = -1; // null latency 454 int weight = -1; // null weight 455 int bw_multiplier = DEFAULT_BW_MULTIPLIER; // default multiplier incase the network file doesn't define it 456 int i = 0; // node pair index 457 int varsFound = 0; // number of varsFound on the line 458 int internalNodes = 0; // used to determine if the link is between 2 internal nodes 459 std::getline(networkFile, line, '\n'); 460 string varStr = string_split(line, ' '); 461 462 // parse the current line in the file 463 while (varStr != "") { 464 string label = string_split(varStr, ':'); 465 466 // valid node labels 467 if (label == "ext_node" || label == "int_node") { 468 ASSERT(i < 2); // one link between 2 switches per line 469 varsFound++; 470 bool isNewIntSwitch = true; 471 if (label == "ext_node") { // input link to node 472 MachineType machine = string_to_MachineType(string_split(varStr, ':')); 473 string nodeStr = string_split(varStr, ':'); 474 if (string_split(varStr, ':') == "bank") { 475 nodes[i] = MachineType_base_number(machine) 476 + atoi(nodeStr.c_str()) 477 + atoi((string_split(varStr, ':')).c_str())*RubyConfig::numberOfChips(); 478 } else { 479 nodes[i] = MachineType_base_number(machine) 480 + atoi(nodeStr.c_str()); 481 } 482 // in nodes should be numbered 0 to m_nodes-1 483 ASSERT(nodes[i] >= 0 && nodes[i] < m_nodes); 484 isNewIntSwitch = false; 485 endpointConnectionExist[nodes[i]] = true; 486 } 487 if (label == "int_node") { // interior node 488 nodes[i] = atoi((string_split(varStr, ':')).c_str())+m_nodes*2; 489 // in nodes should be numbered >= m_nodes*2 490 ASSERT(nodes[i] >= m_nodes*2); 491 for (int k = 0; k < int_network_switches.size(); k++) { 492 if (int_network_switches[k] == nodes[i]) { 493 isNewIntSwitch = false; 494 } 495 } 496 if (isNewIntSwitch) { // if internal switch 497 m_number_of_switches++; 498 int_network_switches.insertAtBottom(nodes[i]); 499 } 500 internalNodes++; 501 } 502 i++; 503 } else if (label == "link_latency") { 504 latency = atoi((string_split(varStr, ':')).c_str()); 505 varsFound++; 506 } else if (label == "bw_multiplier") { // not necessary, defaults to DEFAULT_BW_MULTIPLIER 507 bw_multiplier = atoi((string_split(varStr, ':')).c_str()); 508 } else if (label == "link_weight") { // not necessary, defaults to link_latency 509 weight = atoi((string_split(varStr, ':')).c_str()); 510 } else if (label == "processors") { 511 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfProcessors()); 512 } else if (label == "bw_unit") { 513 ASSERT(atoi((string_split(varStr, ':')).c_str()) == g_endpoint_bandwidth); 514 } else if (label == "procs_per_chip") { 515 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfProcsPerChip()); 516 } else if (label == "L2banks") { 517 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfL2Cache()); 518 } else if (label == "memories") { 519 ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfMemories()); 520 } else { 521 cerr << "Error: Unexpected Identifier: " << label << endl; 522 exit(1); 523 } 524 varStr = string_split(line, ' '); 525 } 526 if (varsFound == 3) { // all three necessary link variables where found so add the link 527 nodePairs.insertAtBottom(nodes); 528 latencies.insertAtBottom(latency); 529 if (weight != -1) { 530 weights.insertAtBottom(weight); 531 } else { 532 weights.insertAtBottom(latency); 533 } 534 bw_multis.insertAtBottom(bw_multiplier); 535 Vector < SwitchID > otherDirectionNodes; 536 otherDirectionNodes.setSize(2); 537 otherDirectionNodes[0] = nodes[1]; 538 if (internalNodes == 2) { // this is an internal link 539 otherDirectionNodes[1] = nodes[0]; 540 } else { 541 otherDirectionNodes[1] = nodes[0]+m_nodes; 542 } 543 nodePairs.insertAtBottom(otherDirectionNodes); 544 latencies.insertAtBottom(latency); 545 if (weight != -1) { 546 weights.insertAtBottom(weight); 547 } else { 548 weights.insertAtBottom(latency); 549 } 550 bw_multis.insertAtBottom(bw_multiplier); 551 } else { 552 if (varsFound != 0) { // if this is not a valid link, then no vars should have been found 553 cerr << "Error in line: " << line << endl; 554 exit(1); 555 } 556 } 557 } // end of file 558 559 // makes sure all enpoints are connected in the soon to be created network 560 for (int k = 0; k < endpointConnectionExist.size(); k++) { 561 if (endpointConnectionExist[k] == false) { 562 cerr << "Error: Unconnected Endpoint: " << k << endl; 563 exit(1); 564 } 565 } 566 567 ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size() && latencies.size() == weights.size()) 568 for (int k = 0; k < nodePairs.size(); k++) { 569 ASSERT(nodePairs[k].size() == 2); 570 addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k], weights[k]); 571 } 572 573 networkFile.close(); 574} 575 576void Topology::createLinks(bool isReconfiguration) 577{ 578 // Find maximum switchID 579 580 SwitchID max_switch_id = 0; 581 for (int i=0; i<m_links_src_vector.size(); i++) { 582 max_switch_id = max(max_switch_id, m_links_src_vector[i]); 583 max_switch_id = max(max_switch_id, m_links_dest_vector[i]); 584 } 585 586 // Initialize weight vector 587 Matrix topology_weights; 588 Matrix topology_latency; 589 Matrix topology_bw_multis; 590 int num_switches = max_switch_id+1; 591 topology_weights.setSize(num_switches); 592 topology_latency.setSize(num_switches); 593 topology_bw_multis.setSize(num_switches); 594 m_component_latencies.setSize(num_switches); // FIXME setting the size of a member variable here is a HACK! 595 m_component_inter_switches.setSize(num_switches); // FIXME setting the size of a member variable here is a HACK! 596 for(int i=0; i<topology_weights.size(); i++) { 597 topology_weights[i].setSize(num_switches); 598 topology_latency[i].setSize(num_switches); 599 topology_bw_multis[i].setSize(num_switches); 600 m_component_latencies[i].setSize(num_switches); 601 m_component_inter_switches[i].setSize(num_switches); // FIXME setting the size of a member variable here is a HACK! 602 for(int j=0; j<topology_weights[i].size(); j++) { 603 topology_weights[i][j] = INFINITE_LATENCY; 604 topology_latency[i][j] = -1; // initialize to an invalid value 605 topology_bw_multis[i][j] = -1; // initialize to an invalid value 606 m_component_latencies[i][j] = -1; // initialize to an invalid value 607 m_component_inter_switches[i][j] = 0; // initially assume direct connections / no intermediate switches between components 608 } 609 } 610 611 // Set identity weights to zero 612 for(int i=0; i<topology_weights.size(); i++) { 613 topology_weights[i][i] = 0; 614 } 615 616 // Fill in the topology weights and bandwidth multipliers 617 for (int i=0; i<m_links_src_vector.size(); i++) { 618 topology_weights[m_links_src_vector[i]][m_links_dest_vector[i]] = m_links_weight_vector[i]; 619 topology_latency[m_links_src_vector[i]][m_links_dest_vector[i]] = m_links_latency_vector[i]; 620 m_component_latencies[m_links_src_vector[i]][m_links_dest_vector[i]] = m_links_latency_vector[i]; // initialize to latency vector 621 topology_bw_multis[m_links_src_vector[i]][m_links_dest_vector[i]] = m_bw_multiplier_vector[i]; 622 } 623 624 // Walk topology and hookup the links 625 Matrix dist = shortest_path(topology_weights, m_component_latencies, m_component_inter_switches); 626 for(int i=0; i<topology_weights.size(); i++) { 627 for(int j=0; j<topology_weights[i].size(); j++) { 628 int weight = topology_weights[i][j]; 629 int bw_multiplier = topology_bw_multis[i][j]; 630 int latency = topology_latency[i][j]; 631 if (weight > 0 && weight != INFINITE_LATENCY) { 632 NetDest destination_set = shortest_path_to_node(i, j, topology_weights, dist); 633 assert(latency != -1); 634 makeLink(i, j, destination_set, latency, weight, bw_multiplier, isReconfiguration); 635 } 636 } 637 } 638} 639 640SwitchID Topology::newSwitchID() 641{ 642 m_number_of_switches++; 643 return m_number_of_switches-1+m_nodes+m_nodes; 644} 645 646void Topology::addLink(SwitchID src, SwitchID dest, int link_latency) 647{ 648 addLink(src, dest, link_latency, DEFAULT_BW_MULTIPLIER, link_latency); 649} 650 651void Topology::addLink(SwitchID src, SwitchID dest, int link_latency, int bw_multiplier) 652{ 653 addLink(src, dest, link_latency, bw_multiplier, link_latency); 654} 655 656void Topology::addLink(SwitchID src, SwitchID dest, int link_latency, int bw_multiplier, int link_weight) 657{ 658 ASSERT(src <= m_number_of_switches+m_nodes+m_nodes); 659 ASSERT(dest <= m_number_of_switches+m_nodes+m_nodes); 660 m_links_src_vector.insertAtBottom(src); 661 m_links_dest_vector.insertAtBottom(dest); 662 m_links_latency_vector.insertAtBottom(link_latency); 663 m_links_weight_vector.insertAtBottom(link_weight); 664 m_bw_multiplier_vector.insertAtBottom(bw_multiplier); 665} 666 667void Topology::makeLink(SwitchID src, SwitchID dest, const NetDest& routing_table_entry, int link_latency, int link_weight, int bw_multiplier, bool isReconfiguration) 668{ 669 // Make sure we're not trying to connect two end-point nodes directly together 670 assert((src >= 2*m_nodes) || (dest >= 2*m_nodes)); 671 672 if (src < m_nodes) { 673 m_network_ptr->makeInLink(src, dest-(2*m_nodes), routing_table_entry, link_latency, bw_multiplier, isReconfiguration); 674 } else if (dest < 2*m_nodes) { 675 assert(dest >= m_nodes); 676 NodeID node = dest-m_nodes; 677 m_network_ptr->makeOutLink(src-(2*m_nodes), node, routing_table_entry, link_latency, link_weight, bw_multiplier, isReconfiguration); 678 } else { 679 assert((src >= 2*m_nodes) && (dest >= 2*m_nodes)); 680 m_network_ptr->makeInternalLink(src-(2*m_nodes), dest-(2*m_nodes), routing_table_entry, link_latency, link_weight, bw_multiplier, isReconfiguration); 681 } 682} 683 684void Topology::printConfig(ostream& out) const 685{ 686 assert(m_component_latencies.size() > 0); 687 688 out << "--- Begin Topology Print ---" << endl; 689 out << endl; 690 out << "Topology print ONLY indicates the _NETWORK_ latency between two machines" << endl; 691 out << "It does NOT include the latency within the machines" << endl; 692 out << endl; 693 for (int m=0; m<MachineType_NUM; m++) { 694 for (int i=0; i<MachineType_base_count((MachineType)m); i++) { 695 MachineID cur_mach = {(MachineType)m, i}; 696 out << cur_mach << " Network Latencies" << endl; 697 for (int n=0; n<MachineType_NUM; n++) { 698 for (int j=0; j<MachineType_base_count((MachineType)n); j++) { 699 MachineID dest_mach = {(MachineType)n, j}; 700 if (cur_mach != dest_mach) { 701 int link_latency = m_component_latencies[MachineType_base_number((MachineType)m)+i][MachineType_base_number(MachineType_NUM)+MachineType_base_number((MachineType)n)+j]; 702 int intermediate_switches = m_component_inter_switches[MachineType_base_number((MachineType)m)+i][MachineType_base_number(MachineType_NUM)+MachineType_base_number((MachineType)n)+j]; 703 out << " " << cur_mach << " -> " << dest_mach << " net_lat: " 704 << link_latency+intermediate_switches << endl; // NOTE switches are assumed to have single cycle latency 705 } 706 } 707 } 708 out << endl; 709 } 710 } 711 712 out << "--- End Topology Print ---" << endl; 713} 714 715/**************************************************************************/ 716 717// The following all-pairs shortest path algorithm is based on the 718// discussion from Cormen et al., Chapter 26.1. 719 720static void extend_shortest_path(Matrix& current_dist, Matrix& latencies, Matrix& inter_switches) 721{ 722 bool change = true; 723 int nodes = current_dist.size(); 724 725 while (change) { 726 change = false; 727 for (int i=0; i<nodes; i++) { 728 for (int j=0; j<nodes; j++) { 729 int minimum = current_dist[i][j]; 730 int previous_minimum = minimum; 731 int intermediate_switch = -1; 732 for (int k=0; k<nodes; k++) { 733 minimum = min(minimum, current_dist[i][k] + current_dist[k][j]); 734 if (previous_minimum != minimum) { 735 intermediate_switch = k; 736 inter_switches[i][j] = inter_switches[i][k] + inter_switches[k][j] + 1; 737 } 738 previous_minimum = minimum; 739 } 740 if (current_dist[i][j] != minimum) { 741 change = true; 742 current_dist[i][j] = minimum; 743 assert(intermediate_switch >= 0); 744 assert(intermediate_switch < latencies[i].size()); 745 latencies[i][j] = latencies[i][intermediate_switch] + latencies[intermediate_switch][j]; 746 } 747 } 748 } 749 } 750} 751 752static Matrix shortest_path(const Matrix& weights, Matrix& latencies, Matrix& inter_switches) 753{ 754 Matrix dist = weights; 755 extend_shortest_path(dist, latencies, inter_switches); 756 return dist; 757} 758 759static bool link_is_shortest_path_to_node(SwitchID src, SwitchID next, SwitchID final, 760 const Matrix& weights, const Matrix& dist) 761{ 762 return (weights[src][next] + dist[next][final] == dist[src][final]); 763} 764 765static NetDest shortest_path_to_node(SwitchID src, SwitchID next, 766 const Matrix& weights, const Matrix& dist) 767{ 768 NetDest result; 769 int d = 0; 770 int machines; 771 int max_machines; 772 773 machines = MachineType_NUM; 774 max_machines = MachineType_base_number(MachineType_NUM); 775 776 for (int m=0; m<machines; m++) { 777 for (int i=0; i<MachineType_base_count((MachineType)m); i++) { 778 // we use "d+max_machines" below since the "destination" switches for the machines are numbered 779 // [MachineType_base_number(MachineType_NUM)...2*MachineType_base_number(MachineType_NUM)-1] 780 // for the component network 781 if (link_is_shortest_path_to_node(src, next, 782 d+max_machines, 783 weights, dist)) { 784 MachineID mach = {(MachineType)m, i}; 785 result.add(mach); 786 } 787 d++; 788 } 789 } 790 791 DEBUG_MSG(NETWORK_COMP, MedPrio, "returning shortest path"); 792 DEBUG_EXPR(NETWORK_COMP, MedPrio, (src-(2*max_machines))); 793 DEBUG_EXPR(NETWORK_COMP, MedPrio, (next-(2*max_machines))); 794 DEBUG_EXPR(NETWORK_COMP, MedPrio, src); 795 DEBUG_EXPR(NETWORK_COMP, MedPrio, next); 796 DEBUG_EXPR(NETWORK_COMP, MedPrio, result); 797 DEBUG_NEWLINE(NETWORK_COMP, MedPrio); 798 799 return result; 800} 801 802