// NOTE: Many parts of this copied from the TMVARegression.C file // grabbed from the ROOT source github site. #include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/Tools.h" #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/TMVARegGui.h" using namespace TMVA; void BookMethods(std::map &Use, TMVA::Factory *factory, TMVA::DataLoader *dataloader); void trainMulti(void) { TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map Use; // Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; // succeeded Use["PDEFoam"] = 0; // failed DL Use["KNN"] = 0; // succeeded // // Linear Discriminant Analysis Use["LD"] = 0; // succeeded // // Function Discriminant analysis Use["FDA_GA"] = 0; // succeeded Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // Neural Network Use["MLP"] = 1; // succeeded Use["DNN"] = 0; // // Support Vector Machine Use["SVM"] = 0; // failed // // Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 0; // succeeded // --------------------------------------------------------------- // auto inputFile1 = TFile::Open("ML_track_input_042513_125.root"); // auto inputFile2 = TFile::Open("ML_track_input_042513_126.root"); auto outputFile = TFile::Open("TMVAMulti.root", "RECREATE"); TMVA::Factory factory("TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression"); TMVA::DataLoader loader("dataset"); loader.AddVariable("L01"); loader.AddVariable("L02"); loader.AddVariable("L03"); loader.AddVariable("L04"); loader.AddVariable("L05"); loader.AddVariable("L06"); loader.AddVariable("L07"); loader.AddVariable("L08"); loader.AddVariable("L09"); loader.AddVariable("L10"); loader.AddVariable("L11"); loader.AddVariable("L12"); loader.AddVariable("L13"); loader.AddVariable("L14"); loader.AddVariable("L15"); loader.AddVariable("L16"); loader.AddVariable("L17"); loader.AddVariable("L18"); loader.AddVariable("L19"); loader.AddVariable("L20"); loader.AddVariable("L21"); loader.AddVariable("L22"); loader.AddVariable("L23"); loader.AddVariable("L24"); loader.AddTarget("target := p"); loader.AddSpectator( "p_wbt:=p_wbt"); loader.AddSpectator( "p_can:=p_can"); auto chain = new TChain("t"); chain->Add("../DATA/hd_root_*_0*.root"); TCut mycuts("p>0.200 && p<5.0"); // Use with TMVA loader.AddRegressionTree( chain, 1.0 ); loader.PrepareTrainingAndTestTree( mycuts, "nTrain_Regression=100000:nTest_Regression=50000:SplitMode=Random:NormMode=NumEvents:!V"); BookMethods(Use, &factory, &loader); // factory.BookMethod( &loader, TMVA::Types::kBDT, "BDTG", // TString("!H:!V:Ntrees=1024::BoostType=Grad:Shrinkage=0.1:ncuts=20:MaxDepth=32:")+ // TString("RegressionLossFunctionBDTG=AbsoluteDeviation")); factory.TrainAllMethods(); factory.TestAllMethods(); factory.EvaluateAllMethods(); outputFile->Close(); } void BookMethods(std::map &Use, TMVA::Factory *factory, TMVA::DataLoader *dataloader) { // Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( dataloader, TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( dataloader, TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); if (Use["DNN"]) { /* TString layoutString ("Layout=TANH|(N+100)*2,LINEAR"); TString layoutString ("Layout=SOFTSIGN|100,SOFTSIGN|50,SOFTSIGN|20,LINEAR"); TString layoutString ("Layout=RELU|300,RELU|100,RELU|30,RELU|10,LINEAR"); TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|30,SOFTSIGN|20,SOFTSIGN|10,LINEAR"); TString layoutString ("Layout=TANH|50,TANH|30,TANH|20,TANH|10,LINEAR"); TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|20,LINEAR"); TString layoutString ("Layout=TANH|100,TANH|30,LINEAR"); */ TString layoutString ("Layout=TANH|100,LINEAR"); TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2,Multithreading=True"); TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=170,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1,Multithreading=True"); TString training2 ("LearningRate=1e-5,Momentum=0.3,Repetitions=1,ConvergenceSteps=150,BatchSize=40,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,Multithreading=True"); TString training3 ("LearningRate=1e-6,Momentum=0.1,Repetitions=1,ConvergenceSteps=500,BatchSize=100,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; // TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5"); TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN", nnOptions ); // NN } // Support Vector Machine if (Use["SVM"]) factory->BookMethod( dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" ); // -------------------------------------------------------------------------------------------------- }