Skip to content

Commit

Permalink
Fix compilation for larger UNITY_BUILD_BATCH_SIZE (#985)
Browse files Browse the repository at this point in the history
Signed-off-by: Valerii Fediunin <valery.fedyunin@abbyy.com>
  • Loading branch information
Valeriy Fedyunin authored Oct 13, 2023
1 parent 531733b commit 44c2d9f
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 47 deletions.
24 changes: 12 additions & 12 deletions NeoML/src/TraditionalML/BytePairEncoderTrainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ limitations under the License.

namespace NeoML {
// Start-of-Word token for the internal dictionary
static const CString BowTokenStr( "/\xFF" );
static const CString BpeBowTokenStr( "/\xFF" );
// End-of-Word token for the internal dictionary
static const CString EowTokenStr( "\\\xFF" );
static const CString BpeEowTokenStr( "\\\xFF" );
// SentencePiece special token
static const CString SpSpaceStr( "\xE2\x96\x81" );
static const CString BpeSpSpaceStr( "\xE2\x96\x81" );

//--------------------

Expand Down Expand Up @@ -93,21 +93,21 @@ CBpeTrainer::CBpeTrainer( int vocabSize, CSubwordEncoderTrainer::TBorderHandling
switch( borderHandling ) {
case TBorderHandling::EndOfWord:
eowToken = vocabulary.Size();
vocabulary.Add( { EowTokenStr, false } );
vocabulary.Add( { BpeEowTokenStr, false } );
break;
case TBorderHandling::BeginOfWord:
bowToken = vocabulary.Size();
vocabulary.Add( { BowTokenStr, false } );
vocabulary.Add( { BpeBowTokenStr, false } );
break;
case TBorderHandling::SentencePiece:
bowToken = vocabulary.Size();
vocabulary.Add( { SpSpaceStr, false } );
vocabulary.Add( { BpeSpSpaceStr, false } );
break;
case TBorderHandling::BeginAndEndOfWord:
bowToken = vocabulary.Size();
vocabulary.Add( { BowTokenStr, false } );
vocabulary.Add( { BpeBowTokenStr, false } );
eowToken = vocabulary.Size();
vocabulary.Add( { EowTokenStr, false } );
vocabulary.Add( { BpeEowTokenStr, false } );
break;
case TBorderHandling::None:
break;
Expand Down Expand Up @@ -396,14 +396,14 @@ CPtr<IBytePairEncoder> CBpeTrainer::createEncoder()

switch( borderHandling ) {
case TBorderHandling::EndOfWord:
params.EndOfWordToken = EowTokenStr;
params.EndOfWordToken = BpeEowTokenStr;
break;
case TBorderHandling::BeginOfWord:
params.StartOfWordToken = BowTokenStr;
params.StartOfWordToken = BpeBowTokenStr;
break;
case TBorderHandling::BeginAndEndOfWord:
params.EndOfWordToken = EowTokenStr;
params.StartOfWordToken = BowTokenStr;
params.EndOfWordToken = BpeEowTokenStr;
params.StartOfWordToken = BpeBowTokenStr;
break;
case TBorderHandling::SentencePiece:
// SentencePiece treats space as normal symbol. It should be inserted by user.
Expand Down
19 changes: 10 additions & 9 deletions NeoML/src/TraditionalML/UnigramTrainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ limitations under the License.

namespace NeoML {
// Start-of-Word token for the internal dictionary
static const CString BowTokenStr( "/\xFF" );
static const CString UnigramBowTokenStr( "/\xFF" );
// End-of-Word token for the internal dictionary
static const CString EowTokenStr( "\\\xFF" );
static const CString UnigramEowTokenStr( "\\\xFF" );
// SentencePiece special token
static const CString SpSpaceStr( "\xE2\x96\x81" );
static const CString UnigramSpSpaceStr( "\xE2\x96\x81" );

//----------

Expand Down Expand Up @@ -91,9 +91,10 @@ CUnigramTrainer::CUnigramTrainer( int vocabSize, TBorderHandling b, bool useByte
desiredVocabSize( vocabSize )
{
const bool addBow = b == TBorderHandling::BeginOfWord || b == TBorderHandling::BeginAndEndOfWord;
params.StartOfWordToken = addBow ? BowTokenStr : ( b == TBorderHandling::SentencePiece ? SpSpaceStr : "" );
params.StartOfWordToken = addBow ? UnigramBowTokenStr
: ( b == TBorderHandling::SentencePiece ? UnigramSpSpaceStr : "" );
const bool addEow = b == TBorderHandling::EndOfWord || b == TBorderHandling::BeginAndEndOfWord;
params.EndOfWordToken = addEow ? EowTokenStr : "";
params.EndOfWordToken = addEow ? UnigramEowTokenStr : "";
params.UseRawBytes = useByteBpe;
params.UnknownTokenId = unknownTokenId;

Expand Down Expand Up @@ -125,7 +126,7 @@ CPtr<IUnigramEncoder> CUnigramTrainer::Train( const CWordDictionary& frequencyDi
addChars( resultVocab );

CPtr<CUnigramEncoder> encoder = new CUnigramEncoder;
if( params.EndOfWordToken == SpSpaceStr ) {
if( params.EndOfWordToken == UnigramSpSpaceStr ) {
params.EndOfWordToken.Empty();
}
encoder->Initialize( resultVocab, params );
Expand All @@ -139,13 +140,13 @@ int CUnigramTrainer::getTokenLength( const CString& str, int pos ) const
}

if( !params.EndOfWordToken.IsEmpty() &&
str.CompareSubstr( pos, EowTokenStr, EowTokenStr.Length() ) == 0 )
str.CompareSubstr( pos, UnigramEowTokenStr, UnigramEowTokenStr.Length() ) == 0 )
{
return EowTokenStr.Length();
return UnigramEowTokenStr.Length();
}

if( !params.StartOfWordToken.IsEmpty() &&
str.CompareSubstr( pos, params.StartOfWordToken, params.StartOfWordToken.Length() ) == 0 )
str.CompareSubstr( pos, params.StartOfWordToken, params.StartOfWordToken.Length() ) == 0 )
{
return params.StartOfWordToken.Length();
}
Expand Down
15 changes: 15 additions & 0 deletions NeoMathEngine/test/src/common/TestFixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,21 @@ inline bool FloatEq( float val1, float val2, float precision = 1e-05 )
#define FLT_MIN_LOG -87.33654474f
#define FLT_MAX_LOG 88.f

inline float ExponentFunc(float f)
{
if (f < FLT_MIN_LOG) {
return 0;
}
else if (f > FLT_MAX_LOG) {
return FLT_MAX;
}
else {
return expf(f);
}
}

//------------------------------------------------------------------------------------------------------------

#define CARRAY_WRAPPER(TYPE, arr) CBufferWrapper<TYPE>( MathEngine(), ( arr.data() ), ( static_cast<int>( arr.size() ) ) )
#define CARRAY_FLOAT_WRAPPER(arr) CARRAY_WRAPPER(float, arr)
#define CARRAY_INT_WRAPPER(arr) CARRAY_WRAPPER(int, arr)
Expand Down
13 changes: 0 additions & 13 deletions NeoMathEngine/test/src/inference/VectorExpTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,6 @@ limitations under the License.
using namespace NeoML;
using namespace NeoMLTest;

inline static float ExponentFunc(float f)
{
if (f < FLT_MIN_LOG) {
return 0;
}
else if (f > FLT_MAX_LOG) {
return FLT_MAX;
}
else {
return expf(f);
}
}

static void vectorExpImpl( const CTestParams& params, int seed )
{
CRandom random( seed );
Expand Down
13 changes: 0 additions & 13 deletions NeoMathEngine/test/src/inference/VectorTanhDiffTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,6 @@ limitations under the License.
using namespace NeoML;
using namespace NeoMLTest;

inline static float ExponentFunc(float f)
{
if (f < FLT_MIN_LOG) {
return 0;
}
else if (f > FLT_MAX_LOG) {
return FLT_MAX;
}
else {
return expf(f);
}
}

static void vectorTanhDiffImpl( const CTestParams& params, int seed )
{
CRandom random( seed );
Expand Down

0 comments on commit 44c2d9f

Please sign in to comment.