Skip to content

Commit

Permalink
Update catboost
Browse files Browse the repository at this point in the history
  • Loading branch information
exprmntr committed Nov 29, 2017
1 parent 77fd52e commit c0df0a8
Show file tree
Hide file tree
Showing 108 changed files with 164 additions and 4,716 deletions.
4 changes: 2 additions & 2 deletions build/scripts/build_mn.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def InitBase(self, listname, mnrankingSuffix):
self.mnrankingSuffix = mnrankingSuffix
self.mnlistname = listname + mnrankingSuffix
self.mnlistelem = "const NMatrixnet::TMnSsePtr*"
mnlisttype = "ymap< TString, {0} >".format(self.mnlistelem)
mnlisttype = "TMap< TString, {0} >".format(self.mnlistelem)
self.mnlist = "const {0} {1}".format(mnlisttype, self.mnlistname)

self.mnmultilistname = "{0}{1}Multi".format(listname, self.mnrankingSuffix)
self.mnmultilistelem = "const NMatrixnet::TMnMultiCategPtr*"
mnmultilisttype = "ymap< TString, {0} >".format(self.mnmultilistelem)
mnmultilisttype = "TMap< TString, {0} >".format(self.mnmultilistelem)
self.mnmultilist = "const {0} {1}".format(mnmultilisttype, self.mnmultilistname)

def InitForAll(self, argv):
Expand Down
4 changes: 4 additions & 0 deletions build/scripts/clang_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ def fix(s):
if s == '/Z7':
return None

# disable sanitizers for generated code
if s.startswith('-fsanitize') or s == '-Dmemory_sanitizer_enabled' or s.startswith('-fsanitize-blacklist'):
return None

# Paths under .ya/tools/v3/.../msvc/include are divided with '\'
return s.replace('\\', '/')

Expand Down
2 changes: 1 addition & 1 deletion build/ymake.core.conf
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ otherwise {
}

when ($CODENAVIGATION) {
PYTHON_YNDEXER_ARGS=${cwd:ARCADIA_BUILD_ROOT} $YMAKE_PYTHON ${input:"build/scripts/python_yndexer.py"} $PYNDEXER_RESOURCE_GLOBAL/pyxref 1500 ${output;noext;rootrel:REALPRJNAME.ydx.pb2} $TARGET
PYTHON_YNDEXER_ARGS=${cwd:ARCADIA_BUILD_ROOT} ${env:"XDG_CACHE_HOME=$ARCADIA_BUILD_ROOT"} $YMAKE_PYTHON ${input:"build/scripts/python_yndexer.py"} $PYNDEXER_RESOURCE_GLOBAL/pyxref 1500 ${output;noext;rootrel:REALPRJNAME.ydx.pb2} $TARGET
YNDEXER_ARGS=$YMAKE_PYTHON ${input:"build/scripts/yndexer.py"} $CPPYNDEXER_RESOURCE_GLOBAL/yndexer 1500 $(SOURCE_ROOT) $ARCADIA_BUILD_ROOT ${input:SRC}
YNDEXER_OUTPUT=$YNDEXER_OUTPUT_FILE
}
Expand Down
2 changes: 1 addition & 1 deletion build/ymake_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,7 @@ def print_compiler(self):
}
'''

flags = ['/nologo', '/Zm1000', '/GR', '/bigobj', '/FC', '/EHsc', '/errorReport:prompt', '$MSVC_INLINE_FLAG', '/DFAKEID=$FAKEID', '/Dymap=TMap']
flags = ['/nologo', '/Zm500', '/GR', '/bigobj', '/FC', '/EHsc', '/errorReport:prompt', '$MSVC_INLINE_FLAG', '/DFAKEID=$FAKEID', '/Dymap=TMap']
flags += ['/we{}'.format(code) for code in warns_as_error]
flags += ['/w1{}'.format(code) for code in warns_enabled]
flags += ['/wd{}'.format(code) for code in warns_disabled]
Expand Down
4 changes: 2 additions & 2 deletions catboost/cuda/cpu_compatibility_helpers/model_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ namespace NCatboostCuda
const TBinarizedFeaturesManager& FeaturesManager;
const TDataProvider& DataProvider;
TVector<TVector<int>> CatFeatureBinToHashIndex;
ymap<ui32, ui32> CatFeaturesRemap;
ymap<ui32, ui32> FloatFeaturesRemap;
TMap<ui32, ui32> CatFeaturesRemap;
TMap<ui32, ui32> FloatFeaturesRemap;
TVector<TVector<float>> Borders;
};

Expand Down
4 changes: 2 additions & 2 deletions catboost/cuda/ctrs/ctr.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ inline TCtrConfig RemovePrior(const TCtrConfig& ctrConfig) {
return result;
}

inline ymap<TCtrConfig, TVector<TCtrConfig>> CreateEqualUpToPriorAndBinarizationCtrsGroupping(const TVector<TCtrConfig>& configs) {
ymap<TCtrConfig, TVector<TCtrConfig>> result;
inline TMap<TCtrConfig, TVector<TCtrConfig>> CreateEqualUpToPriorAndBinarizationCtrsGroupping(const TVector<TCtrConfig>& configs) {
TMap<TCtrConfig, TVector<TCtrConfig>> result;
for (auto& config : configs) {
TCtrConfig withoutPriorConfig = RemovePrior(config);
withoutPriorConfig.CtrBinarizationConfigId = -1;
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/cuda_lib/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class TScopedCacheHolder {
}

private:
THashMap<TGUID, ymap<ui64, THolder<IScopedCache>>> ScopeCaches;
THashMap<TGUID, TMap<ui64, THolder<IScopedCache>>> ScopeCaches;

public:
template <class TScope, class TKey, class TBuilder>
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/cuda_lib/cuda_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ namespace NCudaLib {

class TCudaProfiler {
private:
ymap<TString, THolder<TLabeledInterval>> Labels;
TMap<TString, THolder<TLabeledInterval>> Labels;
EProfileMode DefaultProfileMode;
ui64 MinProfileLevel;
TLabeledInterval EmptyLabel;
Expand Down
24 changes: 12 additions & 12 deletions catboost/cuda/data/binarizations_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ namespace NCatboostCuda
yset<ui32> resultIds;

if (PerFeatureCtrs.has(featureId)) {
const ymap<ECtrType, yset<TCtrConfig>>& perFeatureCtrs = PerFeatureCtrs.at(featureId);
const TMap<ECtrType, yset<TCtrConfig>>& perFeatureCtrs = PerFeatureCtrs.at(featureId);
if (perFeatureCtrs.has(type)) {
CreateSimpleCtrs(featureId, perFeatureCtrs.at(type), &resultIds);
}
Expand Down Expand Up @@ -496,7 +496,7 @@ namespace NCatboostCuda
}

//store perfect hash by featureManager id
const ymap<int, ui32>& GetCategoricalFeaturesPerfectHash(const ui32 featureId) const
const TMap<int, ui32>& GetCategoricalFeaturesPerfectHash(const ui32 featureId) const
{
CB_ENSURE(CatFeaturesPerfectHash.HasFeature(featureId));
return CatFeaturesPerfectHash.GetFeatureIndex(featureId);
Expand Down Expand Up @@ -542,7 +542,7 @@ namespace NCatboostCuda


TBinarizedFeaturesManager& CreateCtrConfigsFromDescription(const NCatboostOptions::TCtrDescription& ctrDescription,
ymap<ECtrType, yset<TCtrConfig>>* grouppedConfigs)
TMap<ECtrType, yset<TCtrConfig>>* grouppedConfigs)
{
for (const auto& prior : ctrDescription.GetPriors())
{
Expand Down Expand Up @@ -621,18 +621,18 @@ namespace NCatboostCuda
friend class TCatFeaturesPerfectHashHelper;

private:
mutable ymap<TCtr, ui32> KnownCtrs;
mutable ymap<ui32, TCtr> InverseCtrs;
mutable TMap<TCtr, ui32> KnownCtrs;
mutable TMap<ui32, TCtr> InverseCtrs;

mutable ymap<ui32, ui32> DataProviderFloatFeatureIdToFeatureManagerId;
mutable ymap<ui32, ui32> DataProviderCatFeatureIdToFeatureManagerId;
mutable ymap<ui32, ui32> FeatureManagerIdToDataProviderId;
mutable TMap<ui32, ui32> DataProviderFloatFeatureIdToFeatureManagerId;
mutable TMap<ui32, ui32> DataProviderCatFeatureIdToFeatureManagerId;
mutable TMap<ui32, ui32> FeatureManagerIdToDataProviderId;

mutable ui32 Cursor = 0;

ymap<ECtrType, yset<TCtrConfig>> TreeCtrConfigs;
ymap<ECtrType, yset<TCtrConfig>> DefaultCtrConfigs;
ymap<ui32, ymap<ECtrType, yset<TCtrConfig>>> PerFeatureCtrs;
TMap<ECtrType, yset<TCtrConfig>> TreeCtrConfigs;
TMap<ECtrType, yset<TCtrConfig>> DefaultCtrConfigs;
TMap<ui32, TMap<ECtrType, yset<TCtrConfig>>> PerFeatureCtrs;

TVector<NCatboostOptions::TBinarizationOptions> CtrBinarizationOptions;

Expand All @@ -642,7 +642,7 @@ namespace NCatboostCuda
const NCatboostOptions::TBinarizationOptions& FloatFeaturesBinarization;

//float and ctr features
ymap<ui32, TVector<float>> Borders;
TMap<ui32, TVector<float>> Borders;
TCatFeaturesPerfectHash CatFeaturesPerfectHash;

};
Expand Down
10 changes: 5 additions & 5 deletions catboost/cuda/data/cat_feature_perfect_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace NCatboostCuda {

~TCatFeaturesPerfectHash() = default;

const ymap<int, ui32>& GetFeatureIndex(ui32 featureId) const {
const TMap<int, ui32>& GetFeatureIndex(ui32 featureId) const {
if (!HasHashInRam) {
Load();
}
Expand All @@ -25,7 +25,7 @@ namespace NCatboostCuda {

void RegisterId(ui32 featureId) {
CB_ENSURE(HasHashInRam, "Can't register new features if hash is stored in file");
FeaturesPerfectHash[featureId] = ymap<int, ui32>();
FeaturesPerfectHash[featureId] = TMap<int, ui32>();
CatFeatureUniqueValues[featureId] = 0;
}

Expand All @@ -44,7 +44,7 @@ namespace NCatboostCuda {

void FreeRam() const {
Save();
ymap<ui32, ymap<int, ui32>> empty;
TMap<ui32, TMap<int, ui32>> empty;
FeaturesPerfectHash.swap(empty);
HasHashInRam = false;
}
Expand All @@ -70,8 +70,8 @@ namespace NCatboostCuda {
friend class TCatFeaturesPerfectHashHelper;
private:
TTempFile StorageTempFile;
ymap<ui32, ui32> CatFeatureUniqueValues;
mutable ymap<ui32, ymap<int, ui32>> FeaturesPerfectHash;
TMap<ui32, ui32> CatFeatureUniqueValues;
mutable TMap<ui32, TMap<int, ui32>> FeaturesPerfectHash;
mutable bool HasHashInRam = true;
};
}
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/data/cat_feature_perfect_hash_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace NCatboostCuda
const ui32 featureId = FeaturesManager.GetFeatureManagerIdForCatFeature(dataProviderId);
auto& featuresHash = FeaturesManager.CatFeaturesPerfectHash;

ymap<int, ui32> binarization;
TMap<int, ui32> binarization;
{
TGuard<TAdaptiveLock> guard(UpdateLock);
if (!featuresHash.HasHashInRam)
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/data/data_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ namespace NCatboostCuda
TVector<float> Weights;
TVector<TVector<float>> Baseline;

ymap<ui32, ui32> IndicesToLocalIndicesRemap;
TMap<ui32, ui32> IndicesToLocalIndicesRemap;

void BuildIndicesRemap()
{
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/gpu_data/binarized_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ namespace NCatboostCuda

private:
TVector<ui32> FeatureIds;
ymap<ui32, ui32> LocalFeatureIndex;
TMap<ui32, ui32> LocalFeatureIndex;
TCudaBuffer<ui32, TCompressedIndexMapping> CompressedIndex;
//features
TCudaBuffer<TCFeature, TFeaturesMapping> Grid;
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/gpu_data/cat_features_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ namespace NCatboostCuda
private:
TVector<TCompressedCatFeatureVec> CompressedCatIndex;
TVector<TVector<ui32>> DeviceFeatures;
ymap<ui32, TCatFeature> Features;
TMap<ui32, TCatFeature> Features;
const TDataProvider* DataProvider = nullptr;

template<NCudaLib::EPtrType Type>
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/gpu_data/ut/test_bin_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ SIMPLE_UNIT_TEST_SUITE(BinBuilderTest) {

inline TTreeCtrSplit BuildTreeCtrSplitCpu(const TFeatureTensor& featureTensor) {
TTreeCtrSplit ctrSplit;
ymap<ui64, ui32> uniqueBins;
TMap<ui64, ui32> uniqueBins;

const size_t sampleCount = DataProvider.GetSampleCount();
TVector<ui64> keys(sampleCount, 0);
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/gpu_data/ut/test_binarization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ SIMPLE_UNIT_TEST_SUITE(BinarizationsTests) {
TVector<ui32> indices;
permutation.FillOrder(indices);

ymap<ui32, TArray2D<float>> ctrsCache;
TMap<ui32, TArray2D<float>> ctrsCache;

for (ui32 dev = 0; dev < GetDeviceCount(); ++dev) {
TSlice featuresSlice = featuresMapping.DeviceSlice(dev);
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/gpu_data/ut/test_data_provider_load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ SIMPLE_UNIT_TEST_SUITE(TDataProviderTest) {
}

{
ymap<ui32, ui32> gidsBins;
TMap<ui32, ui32> gidsBins;

for (size_t i = 0; i < pool.Gids.size(); ++i) {
if (gidsBins.count(pool.Gids[i]) == 0) {
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/methods/boosting.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ namespace NCatboostCuda
{
if (docCount < Config.MinFoldSize)
{
return docCount / 2;
return NHelpers::CeilDivide(docCount, 50);
}
const ui32 maxFolds = 18;
const ui32 folds = IntLog2(NHelpers::CeilDivide(docCount, Config.MinFoldSize));
Expand Down
2 changes: 1 addition & 1 deletion catboost/cuda/methods/oblivious_tree_structure_searcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ namespace NCatboostCuda
CB_ENSURE(BestBin <= 255);
}

void CacheCtrBorders(const ymap<TCtr, TVector<float>>& bordersMap)
void CacheCtrBorders(const TMap<TCtr, TVector<float>>& bordersMap)
{
for (auto& entry : bordersMap)
{
Expand Down
6 changes: 3 additions & 3 deletions catboost/cuda/methods/serialization_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ namespace NCatboostCuda

};

ymap<ui32, TCtrFeature> Ctrs;
ymap<ui32, TFloatFeature> FloatFeatures;
ymap<ui32, ui32> CatFeaturesMap;
TMap<ui32, TCtrFeature> Ctrs;
TMap<ui32, TFloatFeature> FloatFeatures;
TMap<ui32, ui32> CatFeaturesMap;

Y_SAVELOAD_DEFINE(Ctrs, FloatFeatures, CatFeaturesMap);
};
Expand Down
6 changes: 3 additions & 3 deletions catboost/cuda/methods/tree_ctrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ namespace NCatboostCuda
}

private:
TVector<TCtrConfig> GetVisitOrder(const ymap<TCtrConfig, TVector<TCtrConfig>>& ctrs)
TVector<TCtrConfig> GetVisitOrder(const TMap<TCtrConfig, TVector<TCtrConfig>>& ctrs)
{
TVector<TCtrConfig> freqCtrs;
TVector<TCtrConfig> restCtrs;
Expand Down Expand Up @@ -366,7 +366,7 @@ namespace NCatboostCuda
using TCtrHelperPtr = THolder<TCalcCtrHelper<TMapping>>;
const TCtrTargets<TMapping>& Target;
const THashMap<TFeatureTensor, TVector<TCtrConfig>>& CtrConfigs;
ymap<ui32, TCtrHelperPtr> CtrHelpers;
TMap<ui32, TCtrHelperPtr> CtrHelpers;
TCtrVisitor& CtrVisitor;
};

Expand Down Expand Up @@ -1029,7 +1029,7 @@ namespace NCatboostCuda
yset<ui32> UsedPermutations;

TFeatureTensorTracker<CatFeaturesStoragePtrType> EmptyTracker;
ymap<TFeatureTensor, TFeatureTensorTracker<CatFeaturesStoragePtrType>> TensorTrackers;
TMap<TFeatureTensor, TFeatureTensorTracker<CatFeaturesStoragePtrType>> TensorTrackers;

TFeatureTensorTracker<CatFeaturesStoragePtrType> PureTreeCtrTensorTracker;
TFeatureTensor CurrentTensor;
Expand Down
4 changes: 2 additions & 2 deletions catboost/cuda/methods/tree_ctrs_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ namespace NCatboostCuda
return PermutationKey;
}

ymap<TCtr, TVector<float>> ReadBorders(const TVector<ui32>& ids) const
TMap<TCtr, TVector<float>> ReadBorders(const TVector<ui32>& ids) const
{
TVector<float> allBorders;
CtrBorders.Read(allBorders);
ymap<TCtr, TVector<float>> result;
TMap<TCtr, TVector<float>> result;

for (auto id : ids)
{
Expand Down
6 changes: 3 additions & 3 deletions catboost/cuda/tsv_to_proto_converter/pool_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ namespace NCatboostCuda
{
}

TCatFeatureColumnConverter& AddExistingBinarization(ymap<TString, ui32>&& binarization)
TCatFeatureColumnConverter& AddExistingBinarization(TMap<TString, ui32>&& binarization)
{
Binarization = std::move(binarization);
return *this;
Expand Down Expand Up @@ -353,7 +353,7 @@ namespace NCatboostCuda
}

private:
ymap<TString, ui32> Binarization;
TMap<TString, ui32> Binarization;
TVector<ui32> BinarizedData;
};

Expand Down Expand Up @@ -575,7 +575,7 @@ namespace NCatboostCuda
if (InputBinarization)
{
ReadMessage(*InputBinarization, binarization);
ymap<TString, ui32> bins;
TMap<TString, ui32> bins;
for (int i = 0; i < binarization.GetBins().size(); ++i)
{
bins[binarization.GetKeys(i)] = binarization.GetBins().Get(i);
Expand Down
2 changes: 1 addition & 1 deletion catboost/libs/algo/ctr_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void TCtrHelper::Init(const NCatboostOptions::TCatFeatureParams& catFeatureParam
using TCtrsDescription = TVector<NCatboostOptions::TCtrDescription>;
const TCtrsDescription& treeCtrs = catFeatureParams.CombinationCtrs;
const TCtrsDescription& simpleCtrs = catFeatureParams.SimpleCtrs;
const ymap<ui32, TCtrsDescription>& perFeatureCtrs = catFeatureParams.PerFeatureCtrs;
const TMap<ui32, TCtrsDescription>& perFeatureCtrs = catFeatureParams.PerFeatureCtrs;

THashMap<NCatboostOptions::TBinarizationOptions, ui32> targetClassifierIds;
{
Expand Down
2 changes: 1 addition & 1 deletion catboost/libs/algo/ctr_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ class TCtrHelper {
TVector<TTargetClassifier> TargetClassifiers;

TVector<TCtrInfo> SimpleCtrs;
ymap<int, TVector<TCtrInfo>> PerFeatureCtrs;
TMap<int, TVector<TCtrInfo>> PerFeatureCtrs;
TVector<TCtrInfo> TreeCtrs;
};
8 changes: 4 additions & 4 deletions catboost/libs/logging/profile_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

struct TProfileInfoData {
TProfileInfoData() = default;
TProfileInfoData(const ymap<TString, double>& operationToTimeInAllIterations,
TProfileInfoData(const TMap<TString, double>& operationToTimeInAllIterations,
const TVector<TVector<ui64>>& timeLeftHistory, int passedIterations,
int badIterations, double passedTime)
: OperationToTimeInAllIterations(operationToTimeInAllIterations)
Expand All @@ -28,7 +28,7 @@ struct TProfileInfoData {
::LoadMany(s, OperationToTimeInAllIterations, TimeLeftHistory, PassedIterations, BadIterations, PassedTime);
}

ymap<TString, double> OperationToTimeInAllIterations;
TMap<TString, double> OperationToTimeInAllIterations;
TVector<TVector<ui64>> TimeLeftHistory;
int PassedIterations;
int BadIterations;
Expand Down Expand Up @@ -155,8 +155,8 @@ class TProfileInfo {

private:
static constexpr int MAX_TIME_RATIO = 100;
ymap<TString, double> OperationToTime;
ymap<TString, double> OperationToTimeInAllIterations;
TMap<TString, double> OperationToTime;
TMap<TString, double> OperationToTimeInAllIterations;
TVector<TVector<ui64>> TimeLeftHistory;
THPTimer Timer;
int PassedIterations;
Expand Down
Loading

0 comments on commit c0df0a8

Please sign in to comment.