From e34de0691b3bd94720c44c1efad47c3d39ff4134 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 9 Jul 2019 16:06:17 -0400 Subject: CTest: Allocate hardware to tests --- Source/CTest/cmCTestMultiProcessHandler.cxx | 140 +++++++++++++++++++++++++++- Source/CTest/cmCTestMultiProcessHandler.h | 33 ++++++- Source/CTest/cmCTestRunTest.cxx | 43 +++++++++ Source/CTest/cmCTestRunTest.h | 19 +++- Source/CTest/cmCTestTestCommand.cxx | 4 + Source/CTest/cmCTestTestCommand.h | 1 + Source/CTest/cmCTestTestHandler.cxx | 15 +++ Source/CTest/cmCTestTestHandler.h | 4 + Source/cmCTest.cxx | 9 ++ Source/ctest.cxx | 1 + 10 files changed, 262 insertions(+), 7 deletions(-) diff --git a/Source/CTest/cmCTestMultiProcessHandler.cxx b/Source/CTest/cmCTestMultiProcessHandler.cxx index 1902500..aee6d67 100644 --- a/Source/CTest/cmCTestMultiProcessHandler.cxx +++ b/Source/CTest/cmCTestMultiProcessHandler.cxx @@ -3,8 +3,10 @@ #include "cmCTestMultiProcessHandler.h" #include +#include #include #include +#include #include #include #include @@ -27,6 +29,7 @@ #include "cmAffinity.h" #include "cmAlgorithms.h" #include "cmCTest.h" +#include "cmCTestBinPacker.h" #include "cmCTestRunTest.h" #include "cmCTestTestHandler.h" #include "cmDuration.h" @@ -133,6 +136,12 @@ void cmCTestMultiProcessHandler::RunTests() uv_run(&this->Loop, UV_RUN_DEFAULT); uv_loop_close(&this->Loop); + if (!this->StopTimePassed) { + assert(this->Completed == this->Total); + assert(this->Tests.empty()); + } + assert(this->AllHardwareAvailable()); + this->MarkFinished(); this->UpdateCostData(); } @@ -168,6 +177,10 @@ bool cmCTestMultiProcessHandler::StartTestProcess(int test) } testRun->SetIndex(test); testRun->SetTestProperties(this->Properties[test]); + if (this->TestHandler->UseHardwareSpec) { + testRun->SetUseAllocatedHardware(true); + testRun->SetAllocatedHardware(this->AllocatedHardware[test]); + } // Find any failed dependencies for this test. We assume the more common // scenario has no failed tests, so make it the outer loop. @@ -179,7 +192,13 @@ bool cmCTestMultiProcessHandler::StartTestProcess(int test) // Always lock the resources we'll be using, even if we fail to set the // working directory because FinishTestProcess() will try to unlock them - this->LockResources(test); + this->AllocateResources(test); + + if (!this->TestsHaveSufficientHardware[test]) { + testRun->StartFailure("Insufficient hardware"); + this->FinishTestProcess(testRun, false); + return false; + } cmWorkingDirectory workdir(this->Properties[test]->Directory); if (workdir.Failed()) { @@ -199,6 +218,110 @@ bool cmCTestMultiProcessHandler::StartTestProcess(int test) return false; } +bool cmCTestMultiProcessHandler::AllocateHardware(int index) +{ + if (!this->TestHandler->UseHardwareSpec) { + return true; + } + + std::map> allocations; + if (!this->TryAllocateHardware(index, allocations)) { + return false; + } + + auto& allocatedHardware = this->AllocatedHardware[index]; + allocatedHardware.resize(this->Properties[index]->Processes.size()); + for (auto const& it : allocations) { + for (auto const& alloc : it.second) { + bool result = this->HardwareAllocator.AllocateResource( + it.first, alloc.Id, alloc.SlotsNeeded); + (void)result; + assert(result); + allocatedHardware[alloc.ProcessIndex][it.first].push_back( + { alloc.Id, static_cast(alloc.SlotsNeeded) }); + } + } + + return true; +} + +bool cmCTestMultiProcessHandler::TryAllocateHardware( + int index, + std::map>& allocations) +{ + allocations.clear(); + + std::size_t processIndex = 0; + for (auto const& process : this->Properties[index]->Processes) { + for (auto const& requirement : process) { + for (int i = 0; i < requirement.UnitsNeeded; ++i) { + allocations[requirement.ResourceType].push_back( + { processIndex, requirement.SlotsNeeded, "" }); + } + } + ++processIndex; + } + + auto const& availableHardware = this->HardwareAllocator.GetResources(); + for (auto& it : allocations) { + if (!availableHardware.count(it.first)) { + return false; + } + if (!cmAllocateCTestHardwareRoundRobin(availableHardware.at(it.first), + it.second)) { + return false; + } + } + + return true; +} + +void cmCTestMultiProcessHandler::DeallocateHardware(int index) +{ + if (!this->TestHandler->UseHardwareSpec) { + return; + } + + { + auto& allocatedHardware = this->AllocatedHardware[index]; + for (auto const& processAlloc : allocatedHardware) { + for (auto const& it : processAlloc) { + auto resourceType = it.first; + for (auto const& it2 : it.second) { + bool success = this->HardwareAllocator.DeallocateResource( + resourceType, it2.Id, it2.Slots); + (void)success; + assert(success); + } + } + } + } + this->AllocatedHardware.erase(index); +} + +bool cmCTestMultiProcessHandler::AllHardwareAvailable() +{ + for (auto const& it : this->HardwareAllocator.GetResources()) { + for (auto const& it2 : it.second) { + if (it2.second.Locked != 0) { + return false; + } + } + } + + return true; +} + +void cmCTestMultiProcessHandler::CheckHardwareAvailable() +{ + for (auto test : this->SortedTests) { + std::map> allocations; + this->TestsHaveSufficientHardware[test] = + !this->TestHandler->UseHardwareSpec || + this->TryAllocateHardware(test, allocations); + } +} + bool cmCTestMultiProcessHandler::CheckStopTimePassed() { if (!this->StopTimePassed) { @@ -223,7 +346,7 @@ void cmCTestMultiProcessHandler::SetStopTimePassed() } } -void cmCTestMultiProcessHandler::LockResources(int index) +void cmCTestMultiProcessHandler::AllocateResources(int index) { this->LockedResources.insert( this->Properties[index]->LockedResources.begin(), @@ -234,7 +357,7 @@ void cmCTestMultiProcessHandler::LockResources(int index) } } -void cmCTestMultiProcessHandler::UnlockResources(int index) +void cmCTestMultiProcessHandler::DeallocateResources(int index) { for (std::string const& i : this->Properties[index]->LockedResources) { this->LockedResources.erase(i); @@ -281,12 +404,20 @@ bool cmCTestMultiProcessHandler::StartTest(int test) } } + // Allocate hardware + if (this->TestsHaveSufficientHardware[test] && + !this->AllocateHardware(test)) { + this->DeallocateHardware(test); + return false; + } + // if there are no depends left then run this test if (this->Tests[test].empty()) { return this->StartTestProcess(test); } // This test was not able to start because it is waiting // on depends to run + this->DeallocateHardware(test); return false; } @@ -471,7 +602,8 @@ void cmCTestMultiProcessHandler::FinishTestProcess(cmCTestRunTest* runner, this->TestFinishMap[test] = true; this->TestRunningMap[test] = false; this->WriteCheckpoint(test); - this->UnlockResources(test); + this->DeallocateHardware(test); + this->DeallocateResources(test); this->RunningCount -= GetProcessorsUsed(test); for (auto p : properties->Affinity) { diff --git a/Source/CTest/cmCTestMultiProcessHandler.h b/Source/CTest/cmCTestMultiProcessHandler.h index be31c75..da716f0 100644 --- a/Source/CTest/cmCTestMultiProcessHandler.h +++ b/Source/CTest/cmCTestMultiProcessHandler.h @@ -14,10 +14,13 @@ #include "cm_uv.h" +#include "cmCTestHardwareAllocator.h" #include "cmCTestTestHandler.h" #include "cmUVHandlePtr.h" class cmCTest; +struct cmCTestBinPackerAllocation; +class cmCTestHardwareSpec; class cmCTestRunTest; /** \class cmCTestMultiProcessHandler @@ -44,6 +47,11 @@ public: : public std::map { }; + struct HardwareAllocation + { + std::string Id; + unsigned int Slots; + }; cmCTestMultiProcessHandler(); virtual ~cmCTestMultiProcessHandler(); @@ -79,6 +87,13 @@ public: void SetQuiet(bool b) { this->Quiet = b; } + void InitHardwareAllocator(const cmCTestHardwareSpec& spec) + { + this->HardwareAllocator.InitializeFromHardwareSpec(spec); + } + + void CheckHardwareAvailable(); + protected: // Start the next test or tests as many as are allowed by // ParallelLevel @@ -119,8 +134,17 @@ protected: bool CheckStopTimePassed(); void SetStopTimePassed(); - void LockResources(int index); - void UnlockResources(int index); + void AllocateResources(int index); + void DeallocateResources(int index); + + bool AllocateHardware(int index); + bool TryAllocateHardware( + int index, + std::map>& + allocations); + void DeallocateHardware(int index); + bool AllHardwareAvailable(); + // map from test number to set of depend tests TestMap Tests; TestList SortedTests; @@ -141,6 +165,11 @@ protected: std::vector* Failed; std::vector LastTestsFailed; std::set LockedResources; + std::map>>> + AllocatedHardware; + std::map TestsHaveSufficientHardware; + cmCTestHardwareAllocator HardwareAllocator; std::vector* TestResults; size_t ParallelLevel; // max number of process that can be run at once unsigned long TestLoad; diff --git a/Source/CTest/cmCTestRunTest.cxx b/Source/CTest/cmCTestRunTest.cxx index 0188fe0..7f7f736 100644 --- a/Source/CTest/cmCTestRunTest.cxx +++ b/Source/CTest/cmCTestRunTest.cxx @@ -3,6 +3,7 @@ #include "cmCTestRunTest.h" #include +#include #include #include #include @@ -689,10 +690,52 @@ bool cmCTestRunTest::ForkProcess(cmDuration testTimeOut, bool explicitTimeout, cmSystemTools::AppendEnv(*environment); } + if (this->UseAllocatedHardware) { + this->SetupHardwareEnvironment(); + } else { + cmSystemTools::UnsetEnv("CTEST_PROCESS_COUNT"); + } + return this->TestProcess->StartProcess(this->MultiTestHandler.Loop, affinity); } +void cmCTestRunTest::SetupHardwareEnvironment() +{ + std::string processCount = "CTEST_PROCESS_COUNT="; + processCount += std::to_string(this->AllocatedHardware.size()); + cmSystemTools::PutEnv(processCount); + + std::size_t i = 0; + for (auto const& process : this->AllocatedHardware) { + std::string prefix = "CTEST_PROCESS_"; + prefix += std::to_string(i); + std::string resourceList = prefix + '='; + prefix += '_'; + bool firstType = true; + for (auto const& it : process) { + if (!firstType) { + resourceList += ','; + } + firstType = false; + auto resourceType = it.first; + resourceList += resourceType; + std::string var = prefix + cmSystemTools::UpperCase(resourceType) + '='; + bool firstName = true; + for (auto const& it2 : it.second) { + if (!firstName) { + var += ';'; + } + firstName = false; + var += "id:" + it2.Id + ",slots:" + std::to_string(it2.Slots); + } + cmSystemTools::PutEnv(var); + } + cmSystemTools::PutEnv(resourceList); + ++i; + } +} + void cmCTestRunTest::WriteLogOutputTop(size_t completed, size_t total) { std::ostringstream outputStream; diff --git a/Source/CTest/cmCTestRunTest.h b/Source/CTest/cmCTestRunTest.h index c770bac..085a6b8 100644 --- a/Source/CTest/cmCTestRunTest.h +++ b/Source/CTest/cmCTestRunTest.h @@ -5,6 +5,7 @@ #include "cmConfigure.h" // IWYU pragma: keep +#include #include #include #include @@ -12,12 +13,12 @@ #include +#include "cmCTestMultiProcessHandler.h" #include "cmCTestTestHandler.h" #include "cmDuration.h" #include "cmProcess.h" class cmCTest; -class cmCTestMultiProcessHandler; /** \class cmRunTest * \brief represents a single test to be run @@ -83,6 +84,16 @@ public: bool TimedOutForStopTime() const { return this->TimeoutIsForStopTime; } + void SetUseAllocatedHardware(bool use) { this->UseAllocatedHardware = use; } + void SetAllocatedHardware( + const std::vector< + std::map>>& + hardware) + { + this->AllocatedHardware = hardware; + } + private: bool NeedsToRerun(); void DartProcessing(); @@ -94,6 +105,8 @@ private: // Run post processing of the process output for MemCheck void MemCheckPostProcess(); + void SetupHardwareEnvironment(); + // Returns "completed/total Test #Index: " std::string GetTestPrefix(size_t completed, size_t total) const; @@ -112,6 +125,10 @@ private: std::string StartTime; std::string ActualCommand; std::vector Arguments; + bool UseAllocatedHardware = false; + std::vector>> + AllocatedHardware; bool RunUntilFail; int NumberOfRunsLeft; bool RunAgain; diff --git a/Source/CTest/cmCTestTestCommand.cxx b/Source/CTest/cmCTestTestCommand.cxx index d200b40..5496353 100644 --- a/Source/CTest/cmCTestTestCommand.cxx +++ b/Source/CTest/cmCTestTestCommand.cxx @@ -32,6 +32,7 @@ void cmCTestTestCommand::BindArguments() this->Bind("SCHEDULE_RANDOM"_s, this->ScheduleRandom); this->Bind("STOP_TIME"_s, this->StopTime); this->Bind("TEST_LOAD"_s, this->TestLoad); + this->Bind("HARDWARE_SPEC_FILE"_s, this->HardwareSpecFile); } cmCTestGenericHandler* cmCTestTestCommand::InitializeHandler() @@ -87,6 +88,9 @@ cmCTestGenericHandler* cmCTestTestCommand::InitializeHandler() if (!this->ScheduleRandom.empty()) { handler->SetOption("ScheduleRandom", this->ScheduleRandom.c_str()); } + if (!this->HardwareSpecFile.empty()) { + handler->SetOption("HardwareSpecFile", this->HardwareSpecFile.c_str()); + } if (!this->StopTime.empty()) { this->CTest->SetStopTime(this->StopTime); } diff --git a/Source/CTest/cmCTestTestCommand.h b/Source/CTest/cmCTestTestCommand.h index cb65c0b..dc15279 100644 --- a/Source/CTest/cmCTestTestCommand.h +++ b/Source/CTest/cmCTestTestCommand.h @@ -58,6 +58,7 @@ protected: std::string ScheduleRandom; std::string StopTime; std::string TestLoad; + std::string HardwareSpecFile; }; #endif diff --git a/Source/CTest/cmCTestTestHandler.cxx b/Source/CTest/cmCTestTestHandler.cxx index 927e086..2be62ae 100644 --- a/Source/CTest/cmCTestTestHandler.cxx +++ b/Source/CTest/cmCTestTestHandler.cxx @@ -289,6 +289,7 @@ cmCTestTestHandler::cmCTestTestHandler() this->UseIncludeRegExpFlag = false; this->UseExcludeRegExpFlag = false; this->UseExcludeRegExpFirst = false; + this->UseHardwareSpec = false; this->CustomMaximumPassedTestOutputSize = 1 * 1024; this->CustomMaximumFailedTestOutputSize = 300 * 1024; @@ -509,6 +510,16 @@ bool cmCTestTestHandler::ProcessOptions() } this->SetRerunFailed(cmIsOn(this->GetOption("RerunFailed"))); + val = this->GetOption("HardwareSpecFile"); + if (val) { + this->UseHardwareSpec = true; + if (!this->HardwareSpec.ReadFromJSONFile(val)) { + cmCTestLog(this->CTest, ERROR_MESSAGE, + "Could not read hardware spec file: " << val << std::endl); + return false; + } + } + return true; } @@ -1226,6 +1237,9 @@ void cmCTestTestHandler::ProcessDirectory(std::vector& passed, } else { parallel->SetTestLoad(this->CTest->GetTestLoad()); } + if (this->UseHardwareSpec) { + parallel->InitHardwareAllocator(this->HardwareSpec); + } *this->LogFile << "Start testing: " << this->CTest->CurrentTime() << std::endl @@ -1269,6 +1283,7 @@ void cmCTestTestHandler::ProcessDirectory(std::vector& passed, parallel->SetPassFailVectors(&passed, &failed); this->TestResults.clear(); parallel->SetTestResults(&this->TestResults); + parallel->CheckHardwareAvailable(); if (this->CTest->ShouldPrintLabels()) { parallel->PrintLabels(); diff --git a/Source/CTest/cmCTestTestHandler.h b/Source/CTest/cmCTestTestHandler.h index 2602c30..525215c 100644 --- a/Source/CTest/cmCTestTestHandler.h +++ b/Source/CTest/cmCTestTestHandler.h @@ -19,6 +19,7 @@ #include "cmsys/RegularExpression.hxx" #include "cmCTestGenericHandler.h" +#include "cmCTestHardwareSpec.h" #include "cmDuration.h" #include "cmListFileCache.h" @@ -335,6 +336,9 @@ private: cmsys::RegularExpression IncludeTestsRegularExpression; cmsys::RegularExpression ExcludeTestsRegularExpression; + bool UseHardwareSpec; + cmCTestHardwareSpec HardwareSpec; + void GenerateRegressionImages(cmXMLWriter& xml, const std::string& dart); cmsys::RegularExpression DartStuff1; void CheckLabelFilter(cmCTestTestProperties& it); diff --git a/Source/cmCTest.cxx b/Source/cmCTest.cxx index 0d1ecca..10b7646 100644 --- a/Source/cmCTest.cxx +++ b/Source/cmCTest.cxx @@ -2090,6 +2090,15 @@ bool cmCTest::HandleCommandLineArguments(size_t& i, "ExcludeFixtureCleanupRegularExpression", args[i].c_str()); } + if (this->CheckArgument(arg, "--hardware-spec-file") && + i < args.size() - 1) { + i++; + this->GetTestHandler()->SetPersistentOption("HardwareSpecFile", + args[i].c_str()); + this->GetMemCheckHandler()->SetPersistentOption("HardwareSpecFile", + args[i].c_str()); + } + if (this->CheckArgument(arg, "--rerun-failed")) { this->GetTestHandler()->SetPersistentOption("RerunFailed", "true"); this->GetMemCheckHandler()->SetPersistentOption("RerunFailed", "true"); diff --git a/Source/ctest.cxx b/Source/ctest.cxx index 54549a1..91ee598 100644 --- a/Source/ctest.cxx +++ b/Source/ctest.cxx @@ -103,6 +103,7 @@ static const char* cmDocumentationOptions[][2] = { "times without failing in order to pass" }, { "--max-width ", "Set the max width for a test name to output" }, { "--interactive-debug-mode [0|1]", "Set the interactive mode to 0 or 1." }, + { "--hardware-spec-file ", "Set the hardware spec file to use." }, { "--no-label-summary", "Disable timing summary information for labels." }, { "--no-subproject-summary", "Disable timing summary information for " -- cgit v0.12