Skip to content

Commit

Permalink
String Validation Methods
Browse files Browse the repository at this point in the history
  • Loading branch information
harshsingh-24 committed Apr 24, 2023
1 parent baf30f7 commit a2b8249
Show file tree
Hide file tree
Showing 64 changed files with 1,493 additions and 1,229 deletions.
106 changes: 106 additions & 0 deletions integration_tests/test_str_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,108 @@ def partition():
seperator = "apple"
assert s.partition(seperator) == ("rendezvous 5", "", "")

def islower():
# Case 1: When constant string is present
assert "".islower() == False
assert "APPLE".islower() == False
assert "4432632479".islower() == False
assert "%#$#$#32a".islower() == True
assert "apple".islower() == True
assert "apple is a fruit".islower() == True

# Case 2: When variable string is present
s: str
s = ""
assert s.islower() == False
s = "APPLE"
assert s.islower() == False
s = "238734587"
assert s.islower() == False
s = "%#$#$#32a"
assert s.islower() == True
s = "apple"
assert s.islower() == True
s = "apple is a fruit"
assert s.islower() == True

def isupper():
# Case 1: When constant string is present
assert "".isupper() == False
assert "apple".isupper() == False
assert "4432632479".isupper() == False
assert "%#$#$#32A".isupper() == True
assert "APPLE".isupper() == True
assert "APPLE IS A FRUIT".isupper() == True

# Case 2: When variable string is present
s: str
s = ""
assert s.isupper() == False
s = "apple"
assert s.isupper() == False
s = "238734587"
assert s.isupper() == False
s = "%#$#$#32A"
assert s.isupper() == True
s = "APPLE"
assert s.isupper() == True
s = "APPLE IS A FRUIT"
assert s.isupper() == True

def isdecimal():
# Case 1: When constant string is present
assert "".isdecimal() == False
assert "apple".isdecimal() == False
assert "4432632479".isdecimal() == True
assert "%#$#$#32A".isdecimal() == False
assert "1.25".isdecimal() == False
assert "-325".isdecimal() == False
assert "12 35".isdecimal() == False

# Case 2: When variable string is present
s: str
s = ""
assert s.isdecimal() == False
s = "apple"
assert s.isdecimal() == False
s = "238734587"
assert s.isdecimal() == True
s = "%#$#$#32A"
assert s.isdecimal() == False
s = "1.35"
assert s.isdecimal() == False
s = "-42556"
assert s.isdecimal() == False
s = "12 34"
assert s.isdecimal() == False

def isascii():
# Case 1: When constant string is present
assert "".isascii() == True
assert " ".isascii() == True
assert "Hello, World123!".isascii() == True
assert "Hëllö, Wörld!".isascii() == False
assert "This is a test string with some non-ASCII characters: 🚀".isascii() == False
assert "\t\n\r".isascii() == True
assert "12 35".isascii() == True

# # Case 2: When variable string is present
s: str
s = ""
assert s.isascii() == True
s = " "
assert s.isascii() == True
s = "Hello, World!"
assert s.isascii() == True
s = "Hëllö, Wörld!"
assert s.isascii() == False
s = "This is a test string with some non-ASCII characters: 🚀"
assert s.isascii() == False
s = "\t\n\r"
assert s.isascii() == True
s = "123 45 6"
assert s.isascii() == True

def check():
capitalize()
lower()
Expand All @@ -192,5 +294,9 @@ def check():
startswith()
endswith()
partition()
islower()
isupper()
isdecimal()
isascii()

check()
111 changes: 111 additions & 0 deletions src/lpython/semantics/python_ast_to_asr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6048,6 +6048,24 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
fn_args.push_back(al, str);
fn_args.push_back(al, seperator);

} else if(attr_name.size() > 2 && attr_name[0] == 'i' && attr_name[1] == 's') {
/*
String Validation Methods i.e all "is" based functions are handled here
*/
std::vector<std::string> validation_methods{"lower", "upper", "decimal", "ascii"}; // Database of validation methods supported
std::string method_name = attr_name.substr(2);

if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) {
throw SemanticError("String method not implemented: " + attr_name, loc);
}
if (args.size() != 0) {
throw SemanticError("str." + attr_name + "() takes no arguments", loc);
}
fn_call_name = "_lpython_str_" + attr_name;
ASR::call_arg_t arg;
arg.loc = loc;
arg.m_value = s_var;
fn_args.push_back(al, arg);
} else {
throw SemanticError("String method not implemented: " + attr_name,
loc);
Expand Down Expand Up @@ -6402,6 +6420,99 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
}
create_partition(loc, s_var, arg_seperator, arg_seperator_type);
return;
} else if (attr_name.size() > 2 && attr_name[0] == 'i' && attr_name[1] == 's') {
/*
* Specification -
Return True if all cased characters [lowercase, uppercase, titlecase] in the string
are lowercase and there is at least one cased character, False otherwise.
* islower() method is limited to English Alphabets currently
* TODO: We can support other characters from Unicode Library
*/
std::vector<std::string> validation_methods{"lower", "upper", "decimal", "ascii"}; // Database of validation methods supported
std::string method_name = attr_name.substr(2);
if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) {
throw SemanticError("String method not implemented: " + attr_name, loc);
}
if (args.size() != 0) {
throw SemanticError("str." + attr_name + "() takes no arguments", loc);
}

if(attr_name == "islower") {
/*
* Specification:
Return True if all cased characters in the string are lowercase and there is at least one cased character, False otherwise.
*/
bool is_cased_present = false;
bool is_lower = true;
for (auto &i : s_var) {
if ((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z')) {
is_cased_present = true;
if(!(i >= 'a' && i <= 'z')) {
is_lower = false;
break;
}
}
}
is_lower = is_lower && is_cased_present;
tmp = ASR::make_LogicalConstant_t(al, loc, is_lower,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4, nullptr, 0)));
return;
} else if(attr_name == "isupper") {
/*
* Specification:
Return True if all cased characters in the string are uppercase and there is at least one cased character, False otherwise.
*/
bool is_cased_present = false;
bool is_lower = true;
for (auto &i : s_var) {
if ((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z')) {
is_cased_present = true;
if(!(i >= 'A' && i <= 'Z')) {
is_lower = false;
break;
}
}
}
is_lower = is_lower && is_cased_present;
tmp = ASR::make_LogicalConstant_t(al, loc, is_lower,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4, nullptr, 0)));
return;
} else if(attr_name == "isdecimal") {
/*
* Specification:
Return True if all characters in the string are decimal characters and there is at least one character, False otherwise.
*/
bool is_decimal = (s_var.size() != 0);
for(auto &i: s_var) {
if(i < '0' || i > '9') {
is_decimal = false;
break;
}
}
tmp = ASR::make_LogicalConstant_t(al, loc, is_decimal,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4, nullptr, 0)));
return;
} else if(attr_name == "isascii") {
/*
* Specification -
Return True if the string is empty or all characters in the string are ASCII, False otherwise.
ASCII characters have code points in the range U+0000-U+007F.
*/
bool is_ascii = true;
for(char i: s_var) {
if (static_cast<unsigned int>(i) > 127) {
is_ascii = false;
break;
}
}
tmp = ASR::make_LogicalConstant_t(al, loc, is_ascii,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4, nullptr, 0)));
return;
} else {
throw SemanticError("'str' object has no attribute '" + attr_name + "'", loc);
}
} else {
throw SemanticError("'str' object has no attribute '" + attr_name + "'",
loc);
Expand Down
6 changes: 5 additions & 1 deletion src/lpython/semantics/python_comptime_eval.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ struct PythonIntrinsicProcedures {
{"_lpython_str_swapcase", {m_builtin, &not_implemented}},
{"_lpython_str_startswith", {m_builtin, &not_implemented}},
{"_lpython_str_endswith", {m_builtin, &not_implemented}},
{"_lpython_str_partition", {m_builtin, &not_implemented}}
{"_lpython_str_partition", {m_builtin, &not_implemented}},
{"_lpython_str_islower", {m_builtin, &not_implemented}},
{"_lpython_str_isupper", {m_builtin, &not_implemented}},
{"_lpython_str_isdecimal", {m_builtin, &not_implemented}},
{"_lpython_str_isascii", {m_builtin, &not_implemented}}
};
}

Expand Down
43 changes: 43 additions & 0 deletions src/runtime/lpython_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,49 @@ def _lpython_str_partition(s:str, sep: str) -> tuple[str, str, str]:
res = (s[0:ind], sep, s[ind+len(sep): len(s)])
return res

@overload
def _lpython_str_islower(s: str) -> bool:
is_cased_present: bool
is_cased_present = False
i:str
for i in s:
if (ord(i) >= 97 and ord(i) <= 122) or (ord(i) >= 65 and ord(i) <= 90): # Implies it is a cased letter
is_cased_present = True
if not(ord(i) >= 97 and ord(i) <= 122): # Not lowercase
return False
return is_cased_present

@overload
def _lpython_str_isupper(s: str) -> bool:
is_cased_present: bool
is_cased_present = False
i:str
for i in s:
if (ord(i) >= 97 and ord(i) <= 122) or (ord(i) >= 65 and ord(i) <= 90): # Implies it is a cased letter
is_cased_present = True
if not(ord(i) >= 65 and ord(i) <= 90): # Not lowercase
return False
return is_cased_present

@overload
def _lpython_str_isdecimal(s: str) -> bool:
if len(s) == 0:
return False
i:str
for i in s:
if (ord(i) < 48 or ord(i) > 57): # Implies it is not a digit
return False
return True

@overload
def _lpython_str_isascii(s: str) -> bool:
if(len(s) == 0):
return True
i: str
for i in s:
if ord(i) < 0 or ord(i) > 127:
return False
return True

def list(s: str) -> list[str]:
l: list[str] = []
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/asr-array_01_decl-f955627.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "asr-array_01_decl-f955627.stdout",
"stdout_hash": "39595a55fb5e863bd95ffde37cacead64fd9aaaff3581e74a163abaf",
"stdout_hash": "c446111843674f99b678a4719a280095d6dcd5426250fd2634b3a1ef",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
Loading

0 comments on commit a2b8249

Please sign in to comment.