Skip to content

Commit

Permalink
Merge pull request Alfanous-team#372 from Ahimta/master
Browse files Browse the repository at this point in the history
More tests and faster implementation on PyArabic/Araby_statistics
  • Loading branch information
assem-ch committed Jan 29, 2014
2 parents 266074b + f097085 commit 89a8928
Showing 1 changed file with 55 additions and 10 deletions.
65 changes: 55 additions & 10 deletions src/alfanous-labs/PyArabic/Araby_Statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

word_pattern = re.compile( "\S+" )
gword_pattern = re.compile( u"لله" )
GWORDS_FORMS = set( [u"أبالله", u"وتالله", u"بالله", u"تالله", u"والله", u"الله", u"ولله", u"اللهم", u"آلله", u"فلله", u"لله", u"فالله", ] )
GWORDS_FORMS = set( [u"أبالله", u"وتالله", u"بالله", u"تالله", u"والله", u"الله",
u"ولله", u"اللهم", u"آلله", u"فلله", u"لله", u"فالله", ] )

araby = araby()

Expand All @@ -26,6 +27,21 @@
TEST_FIXTURES = {'text': u" اللّهم يضلله يً ْيسئء سبي شبيشيش شسيشسي",
'letter': u"ش"}


def count( f, iterable ):
''' ((object) -> boolean, iterable) -> int
Return the count of elements in the given iterable that return True for
the function f.
'''
count = 0

for elt in iterable:
if f( elt ):
count += 1

return count


def letters( text ):
''' (string) -> int
Return the number of arabic letters in the given text.
Expand All @@ -43,8 +59,8 @@ def letters( text ):
>>> letters( TEST_FIXTURES['text'] )
30
'''
return count( lambda char: char in araby.LETTERS, text )

return len( [char for char in text if char in araby.LETTERS] )

def diacritics( text ):
''' (string) -> int
Expand All @@ -62,8 +78,8 @@ def diacritics( text ):
>>> diacritics( TEST_FIXTURES['text'] )
3
'''
return count( lambda char: char in araby.TASHKEEL, text )

return len( [char for char in text if char in araby.TASHKEEL] )

def letter_count( text, letter ):
''' (string, string) -> int
Expand All @@ -75,13 +91,14 @@ def letter_count( text, letter ):
0
>>> letter_count( '', 'abc' )
0
>>> letter_count( araby.ALEF + 'a' + araby.ALEF + 'b' + araby.ALEF, araby.ALEF )
>>> letter_count( araby.ALEF + 'a' + araby.ALEF + 'b' + araby.ALEF,\
araby.ALEF )
3
>>> letter_count( TEST_FIXTURES['text'], TEST_FIXTURES['letter'] )
5
'''
return count( lambda char: char == letter, text )

return len( [char for char in text if char == letter] )

def hamza_count( text ):
''' (string) -> int
Expand All @@ -94,13 +111,13 @@ def hamza_count( text ):
0
>>> hamza_count( araby.HAMZA )
1
>>> hamza_count( araby.HAMZA + 'ab' + araby.ALEF + araby.WAW_HAMZA + araby.YEH_HAMZA )
>>> hamza_count( araby.HAMZA + 'ab' + araby.ALEF + araby.WAW_HAMZA +\
araby.YEH_HAMZA )
3
>>> hamza_count( TEST_FIXTURES['text'] )
2
'''

return len( [char for char in text if char in araby.HAMZAT] )
return count( lambda char: char in araby.HAMZAT, text )


def words( text ):
Expand All @@ -109,6 +126,8 @@ def words( text ):
>>> words( '' )
0
>>> words( ' \\n\\t\\r' )
0
>>> words( araby.ALEF )
1
>>> words( 'ab c' + araby.ALEF + 'd' )
Expand All @@ -121,17 +140,43 @@ def words( text ):

return len( word_pattern.findall( text ) )


#FIXME: currently this function only returns either 0 or 1
def gwords( text ):
''' (string) -> int
>>> gwords( '' )
0
>>> gwords( ' abc ' )
0
'''
""" Search by regular expression then filter the possibilities """
results = set( gword_pattern.findall( araby.stripTashkeel( text ) ) ) & GWORDS_FORMS
return len( results )


def sunletters( text ):
return len( [char for char in text if char in araby.SUN] )
''' (string) -> int
Return the number of occurrences of sun letters in the given text.
>>> sunletters( '' )
0
>>> sunletters( TEST_FIXTURES['text'] )
14
'''
return count( lambda char: char in araby.SUN, text )


def moonletters( text ):
return len( [char for char in text if char in araby.MOON] )
''' (string) -> int
Return the number of occurrences of moon letters in the given text.
>>> moonletters( '' )
0
>>> moonletters( TEST_FIXTURES['text'] )
15
'''
return count( lambda char: char in araby.MOON, text )


if __name__ == "__main__":
Expand Down

0 comments on commit 89a8928

Please sign in to comment.