From 2339377128f2ab889aff13a11d4db99af2c9f789 Mon Sep 17 00:00:00 2001
From: Sudarshan <sudarshansk14@gmail.com>
Date: Mon, 3 Apr 2017 00:54:14 +0530
Subject: [PATCH 1/4] Fixed absolute addressing

---
 Lexrank/summa/pagerank_weighted.pyc | Bin 3656 -> 3656 bytes
 Lexrank/summa/textrank.py           |   2 +-
 Lexrank/summa/textrank.pyc          | Bin 2836 -> 2573 bytes
 3 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lexrank/summa/pagerank_weighted.pyc b/Lexrank/summa/pagerank_weighted.pyc
index 6db70016dc9e39afa15641edd0fef5dcb9fc4427..543674e3a0e17a9af12d0d5af96b1fae5f5f1a3a 100644
GIT binary patch
delta 15
WcmX>hb3%rV`7<xq2H%ZrPJ93>X$0T^

delta 15
WcmX>hb3%rV`7<xqH=m7cPJ93@2n8ho

diff --git a/Lexrank/summa/textrank.py b/Lexrank/summa/textrank.py
index d6facda..eba99a2 100644
--- a/Lexrank/summa/textrank.py
+++ b/Lexrank/summa/textrank.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python -W ignore::DeprecationWarning
 import sys
-sys.path.append('/home/sudarshan/Desktop/TheChainRanker/Lexrank')
+sys.path.append('../')
 
 
 import sys, getopt
diff --git a/Lexrank/summa/textrank.pyc b/Lexrank/summa/textrank.pyc
index ee8e98644c98d5d7d1c0d58c92f39569890c106f..d9481724a1a11d14c36bba76ec3c8a3851ec916e 100644
GIT binary patch
delta 83
zcmbOt)+@rs{F#?)m;Xezi|ot{3=DdD`kOf!Z!)uqfH|8*Sm!Z8*^{5JZ-y~Xa=eBy
NpK<PjF)wjB0RSL*6D0ru

delta 345
zcmeAbnIgu<{F#@F+iN1*MI${11_u3%{M=Oi;?k7FqT-CiJbjnc;_Q<A0{xJTROgJu
z%)Fq)yzJB>eV^2dA|Pwyy_?KN4kT;ayq9Gj6B+s@C$euQN1GJKYjU)4bM7KX+i6ZG
E0C=BzKL7v#


From 143be5d5c6aa748bdd57844ae9c838734ae20314 Mon Sep 17 00:00:00 2001
From: Sudarshan <sudarshansk14@gmail.com>
Date: Mon, 3 Apr 2017 01:55:57 +0530
Subject: [PATCH 2/4] Boochain perfectly alright!

---
 LexChain/Boochain.py         | 344 ++++++++++++++++++-----------------
 LexChain/Boochain.pyc        | Bin 0 -> 5567 bytes
 Lexrank/summa/summarizer.py  |   4 +-
 Lexrank/summa/summarizer.pyc | Bin 4362 -> 4342 bytes
 Lexrank/summa/textrank.py    |   5 +-
 Lexrank/summa/textrank.pyc   | Bin 2573 -> 2573 bytes
 6 files changed, 185 insertions(+), 168 deletions(-)
 create mode 100644 LexChain/Boochain.pyc

diff --git a/LexChain/Boochain.py b/LexChain/Boochain.py
index a33f1e1..2073200 100644
--- a/LexChain/Boochain.py
+++ b/LexChain/Boochain.py
@@ -6,172 +6,186 @@
 import sys
 reload(sys)
 sys.setdefaultencoding('utf8')
+sys.path.append('../Lexrank')
 
-threshold = 0.6 #treshold for wup
-jcnTreshold = 0.09 #jcn
-pathTeshold = 0.1 #path
-brown_ic = wordnet_ic.ic('ic-brown.dat') #load the brown corpus
-lexical_chains = [] #empty list to hold all the chains
-dictionary = {} #empty dictionart to hold the count of each word encountered
-
-
-def findWholeWord(w):
-	return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search
-
-#class Chain 
-class Chain(): 
-    def __init__(self, words, senses, count = 0):
-    	self.words = set(words)
-    	self.senses = set(senses)
-    	dictionary[words[0]] = 1 #initialize counter
-	
-    def addWord(self, word):
-        
-        if(len(self.words.intersection([word])) > 0):
-            dictionary[word] += 1
-        else:
-            dictionary[word] = 1
-        
-        self.words.add(word)
-	
-
-    def addSense(self, sense):
-	   self.senses.add(sense)
-
-    def getWords(self):
-	   return self.words
-
-    def getSenses(self):
-	   return self.getSenses
-
-    def incCount(self):
-		self.count += 1
-
-    def setScore(self, sc):
-		self.score = sc
-
-    def mfword(self):
-		maxfreq = 0
-		for word in self.getWords():
-			if dictionary[word] > maxfreq:
-				maxword = word	
-				maxfreq = dictionary[word]
-		return maxword
-
-
-def add_word(word):
-    maximum = 0 
-    maxJCN = 0
-    flag = 0
-    for chain in lexical_chains: #for all chains that are present
-	for synset in wn.synsets(word): #for all synsets of current word
-	    for sense in chain.senses:  #for all senses of the current word in current element of the current chain
-	        similarity = sense.wup_similarity(synset) #using wup_similarity
-	        
-	        if(similarity >= maximum):
-	            if similarity >= threshold:
-	                #print word, synset, sense, sense.jcn_similarity(synset, brown_ic)
-	                JCN = sense.jcn_similarity(synset, brown_ic) #using jcn_similarity
-	                if JCN >= jcnTreshold: 
-	                    if sense.path_similarity(synset) >= 0.2: #using path similarity
-	                        if JCN >= maxJCN:
-	                            maximum = similarity
-	                            maxJCN = JCN
-	                            maxChain = chain
-	                            flag = 1
-    if flag == 1:	               	                    
-        maxChain.addWord(word)
-        maxChain.addSense(synset)
-        return
-		    
-    lexical_chains.append(Chain([word], wn.synsets(word)))
-
-def count_words(summary):
-	count = 0
-	for line in summary:
-		count = count + len(line.split(' '))
-	return count
-#fileName = raw_input("Enter file path + name, if file name is 'nlp.txt', type 'nlp' \n \n")
-#n = raw_input("Enter number of sentences in summary.\n")
-word_count=50
-fileName = "amazon.txt"
-print ("\n\n")
-#fileName = "nlp.txt"
-File = open(fileName) #open file
-lines = File.read() #read all lines
-#dec_lines =  [line.decode('utf-8') for line in lines] 
-
-line_list = lines.split('. ')
-
-
-is_noun = lambda x: True if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS') else False
-nouns = [word for (word, pos) in nltk.pos_tag(nltk.word_tokenize(lines)) if is_noun(pos)]  #extract all nouns
-
-for word in nouns:
-    add_word(word)
-
-#print all chains
-for chain in lexical_chains:
-	chain_length = 0
-	dis_word = 0
-	for word in chain.getWords():
-		#print str(word + "(" + str(dictionary[word]) + ")") + ',',
-		chain_length = chain_length + dictionary[word]
-		dis_word = dis_word + 1
-	#print 'Length =' + str(chain_length)
-	hom = 1 - (dis_word*1.0/chain_length)
-	#print 'Homogeneity =' + str(hom)
-	score = 1.0*chain_length*hom
-	#print 'Score =' + str(score)
-	chain.setScore(score)
-
-print 'Sorted start '
-lexical_chains.sort(key=lambda x: x.score, reverse=True)
-
-for chain in lexical_chains:
-	if(chain.score>0.0):
-		for word in chain.getWords():
-			print str(word + "(" + str(dictionary[word]) + ")") + ',',
-		print 'Score=' + str(chain.score)
-
-summary = []
-line_flags = []
-line_score=[]
-
-for line in line_list:
-	line_flags.append(0)
-	line_score.append(0)
-
-for chain in lexical_chains:
-	bigword = chain.mfword()
-	chain_score = chain.score
-	print '\nMF word ', bigword
-	for i in range(len(line_list)):
-		line=line_list[i]
-		if findWholeWord(bigword)(line)!=None:
-			#((line.find(' '+str(bigword)+' ')!=-1) or (line.find(' '+str(bigword)+'.')!=-1)):
-			if line_flags[i]==0:
-				#summary.append(line)
-				#print 'i  ', count_words(summary)
-				line_flags[i] = 1
-				line_score[i] = chain_score
-				#print 'line_score ', line_score
-				#print 'line_flags ', line_flags
-
-				break
-			elif line_flags[i]==1:
-				line_score[i] = line_score[i] + chain.score
-				#print '\nline_score ', line_score
-				#print 'line_flags ', line_flags
-		
+from summa.preprocessing.textcleaner import clean_text_by_sentences as clean
 
-'''
-	if(count_words(summary)>word_count):
-		break			
-'''
 
-print len(summary)
-print line_score
+def LexicalChain(fileName="amazon.txt", verbose=0):
 
-final_summary = ' '.join(summary)
-#print final_summary
\ No newline at end of file
+
+	def findWholeWord(w):
+		return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search
+
+	#class Chain 
+	class Chain(): 
+	    def __init__(self, words, senses, count = 0):
+	    	self.words = set(words)
+	    	self.senses = set(senses)
+	    	dictionary[words[0]] = 1 #initialize counter
+		
+	    def addWord(self, word):
+	        
+	        if(len(self.words.intersection([word])) > 0):
+	            dictionary[word] += 1
+	        else:
+	            dictionary[word] = 1
+	        
+	        self.words.add(word)
+		
+
+	    def addSense(self, sense):
+		   self.senses.add(sense)
+
+	    def getWords(self):
+		   return self.words
+
+	    def getSenses(self):
+		   return self.getSenses
+
+	    def incCount(self):
+			self.count += 1
+
+	    def setScore(self, sc):
+			self.score = sc
+
+	    def mfword(self):
+			maxfreq = 0
+			for word in self.getWords():
+				if dictionary[word] > maxfreq:
+					maxword = word	
+					maxfreq = dictionary[word]
+			return maxword
+
+	def add_word(word):
+	    maximum = 0 
+	    maxJCN = 0
+	    flag = 0
+	    for chain in lexical_chains: #for all chains that are present
+		for synset in wn.synsets(word): #for all synsets of current word
+		    for sense in chain.senses:  #for all senses of the current word in current element of the current chain
+		        similarity = sense.wup_similarity(synset) #using wup_similarity
+		        
+		        if(similarity >= maximum):
+		            if similarity >= threshold:
+		                #print word, synset, sense, sense.jcn_similarity(synset, brown_ic)
+		                JCN = sense.jcn_similarity(synset, brown_ic) #using jcn_similarity
+		                if JCN >= jcnTreshold: 
+		                    if sense.path_similarity(synset) >= 0.2: #using path similarity
+		                        if JCN >= maxJCN:
+		                            maximum = similarity
+		                            maxJCN = JCN
+		                            maxChain = chain
+		                            flag = 1
+	    if flag == 1:	               	                    
+	        maxChain.addWord(word)
+	        maxChain.addSense(synset)
+	        return
+			    
+	    lexical_chains.append(Chain([word], wn.synsets(word)))
+
+
+	def count_words(summary):
+		count = 0
+		for line in summary:
+			count = count + len(line.split(' '))
+		return count
+	#fileName = raw_input("Enter file path + name, if file name is 'nlp.txt', type 'nlp' \n \n")
+	#n = raw_input("Enter number of sentences in summary.\n")
+
+	#fileName = "nlp.txt"
+	threshold = 0.6 #treshold for wup
+	jcnTreshold = 0.09 #jcn
+	pathTeshold = 0.1 #path
+	brown_ic = wordnet_ic.ic('ic-brown.dat') #load the brown corpus
+	lexical_chains = [] #empty list to hold all the chains
+	dictionary = {} #empty dictionart to hold the count of each word encountered
+	word_count=50
+	File = open(fileName) #open file
+	lines = File.read() #read all lines
+	#dec_lines =  [line.decode('utf-8') for line in lines] 
+
+	line_list = lines.split('. ')
+	clean_lines = clean(lines)
+
+	is_noun = lambda x: True if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS') else False
+	nouns = [word for (word, pos) in nltk.pos_tag(nltk.word_tokenize(lines)) if is_noun(pos)]  #extract all nouns
+
+
+	for word in nouns:
+	    add_word(word)
+
+	#print all chains
+	for chain in lexical_chains:
+		chain_length = 0
+		dis_word = 0
+		for word in chain.getWords():
+			#print str(word + "(" + str(dictionary[word]) + ")") + ',',
+			chain_length = chain_length + dictionary[word]
+			dis_word = dis_word + 1
+		#print 'Length =' + str(chain_length)
+		hom = 1 - (dis_word*1.0/chain_length)
+		#print 'Homogeneity =' + str(hom)
+		score = 1.0*chain_length*hom
+		#print 'Score =' + str(score)
+		chain.setScore(score)
+
+	#print 'Sorted start '
+	lexical_chains.sort(key=lambda x: x.score, reverse=True)
+
+	if verbose==1:
+		for chain in lexical_chains:
+			if(chain.score>0.0):
+				for word in chain.getWords():
+					print str(word + "(" + str(dictionary[word]) + ")") + ',',
+				print 'Score=' + str(chain.score)
+
+	summary = []
+	line_flags = []
+	line_score=[]
+
+	for line in line_list:
+		line_flags.append(0)
+		line_score.append(0)
+
+	for chain in lexical_chains:
+		if chain.score>0.0:
+			bigword = chain.mfword()
+			chain_score = chain.score
+			#print '\nMF word ', bigword
+			for i in range(len(line_list)):
+				line=line_list[i]
+				if findWholeWord(bigword)(line)!=None:
+					#((line.find(' '+str(bigword)+' ')!=-1) or (line.find(' '+str(bigword)+'.')!=-1)):
+					if line_flags[i]==0:
+						#summary.append(line)
+						#print 'i  ', count_words(summary)
+						line_flags[i] = 1
+						line_score[i] = chain_score
+						#print 'line_score ', line_score
+						#print 'line_flags ', line_flags
+
+						break
+					elif line_flags[i]==1:
+						line_score[i] = line_score[i] + chain.score
+						#print '\nline_score ', line_score
+						#print 'line_flags ', line_flags
+				
+
+	'''
+		if(count_words(summary)>word_count):
+			break			
+
+	'''
+
+	namscores = dict(zip([sentence.token for sentence in clean_lines],line_score))
+
+	#print namscores
+	#print len(summary)
+	#print line_score
+
+	#final_summary = ' '.join(summary)
+	#print final_summary
+	return namscores
+
+print LexicalChain(verbose=1)
\ No newline at end of file
diff --git a/LexChain/Boochain.pyc b/LexChain/Boochain.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d81da96f83fe45ebe9e08c875c8f1862284e66f8
GIT binary patch
literal 5567
zcmbVQOKcn06}@jreMph|Sh8g`ZX%~`N+*tN)Ise6vX$71odg9Nj0`9htPVz;ku(%J
zBxgo3<&Z16zeP7KP@q|K(?z@LqMNp>qKh^#8X!P6T^HSSS)f3+?K$_6lG-k8h2%Wm
z_rLeN`+Jl7SGoAp_x|=?O$MJF{$IzF{s~2ZpOJ>hhNT^87->4PkrTtdoYwQQk+*e>
z9g&TZ;k;4V7?p5L!cjRiV(?cVmmq&sDM-C2!HCwm+^94rq*;=UGL{>Y$JlpLf^j9L
zBq&HQEkTjhLy>Q9pfaI-GZK`PI3qz>0?e6`$KZ07+^<z;AW8IZe5w@#G8;qju$2UH
znAwKnaf27tvOKE$+4ha}c@RrWSH2l`6F=U&lI}N~{*`tTwv$#ZOw%advGeCKuGR?s
z*vrCh=4~B#X&7f=j6sSkH+1ma!ING`0k--m9qAi_*mooose@K*`Z?JV>E|Uv2{1>b
zKT1f~Dx;WN-MOZpzpv~R|N4ve&EjuYc0T+WKfhl|OBfK<UfoJs596i4&!VrPp~Agg
z6`fR6tB|3rz+yKE)7@4h$R<#$*W&vp>uo>Vy+7z5N53vhgeFmGgx#p-H@w=eAH``#
z3<GpUt=LZvv`UP<T5CVfD(Dm~(l3AlrWV#aoO|VVtEG!9we7qpj<U#aM32HU2B+Li
z%&2~+w5(AUK0GM}^+gZ^2#wSo=|M#Rr^Ph~+SK#FFNa~(l!8Cls(f&@f9b(iHiFuA
z!{13Oq%tCugsLO8R<j*7!i=bIx00rxsaS8{tA5LU^UfP<Z)#5(`bllqDgt?W`0T<`
zyzM9s)^P&iI?y_YhLjwLQN9!(RUx*q<2Igj4Mhm!fP|_`kBw|Nl=d+ibJQpjd8!i$
zRUs14qET%blYx1`9Ibg3#9gZp80Z|7IgeuaD8N_I0DyajxB^|zXw?~3u{~OH8~{ER
zmIJD*!W|J5UA4j$F$NNZnaW2ENe3v#X_&gyB0G#@)3CAa(zx97D0Idt)rnf`c~mIR
zdyXseEX7RWTO*u&xRc80f}o6Qic}&aJ1Q?#$;e(_lGg?@V)6pyyq;T*;lZG0D*5tY
zYJPwdedG`YtSt(KRWqW{2xFBlQ>%r;NYYRZNMTT*{2;K_yFlK^DIt4y4WprB4eHh+
z$dq`9bJ5H?Mdt_rf}jA*fuINqfIVs}0*|n28R1E|bc`qXqT#^frQx$PMn=Hc8fU%C
z>10C8k!5p7<%tqlp4QaHtPfY*wC)m_)9t$xW&|XC6*Et{uR2Z4SH9EF!GQZ^Y)WJL
z-;JpdD)1}_2%N_$K<TbK9{#CD1gL5?h~T(%q@RO<jL1iE?GF9&HPAywxc=}_M(CZO
z1mI=Jf>i^SYKv*Dm4qkMfV0!u>COV4V2#UsopZUsKnJMmN06+d0xS?LyVnQGa4536
zc}XKDScEl#QyS<jD+r*OZN-$%EPw00+!2P{#j^b+P{%$;RY^U6)9-F4VaI&|{lkXS
z86N#ibDML%0ZQXCW7w>jMmb|DbaZ+Yo)`Pg(DO93cwVy=>^D$#>8;#F7B8|;PN_TY
zS6C2a_Z1Y!Xe;%U@amq$6e><SjbhZ4MheA3ZunK>BV%O#4o5BFtS#^?)yD(d?i0Gn
z)y+RLN9q2KSRaMl)y-crXX*Z2dU>hmFzP4FUV2FV;1xC1>yJqfKb+TyBrBw$O{727
zVHi9@&<}Br<I)=^Ozizcdkcd_;B(pdF$dtpNk8FIIB>LpRczl!!ieN5C!HtK`B*xi
zNas`Otnjzqxq@dw)dFMo97)DaPxDdeJN&&?CI*|S6NJe1l4NBnAi}~)p}{JrsF_l6
zs~YYGS=o~nYqU&bTt-+;F&@UMSLp%#UzXbV#t{$q+g=(qqlTYE*?~*Xs@WD&vy<)P
zT#Uz*O`%|(cCvSoy;K_wt$w>5#(^Has@;ESb=2H%>L{rEJ9nxYVwertV9WH(hG*r|
zy~s7GA;&CgnBq*bghe!EIo8UjD5TCp9Pnr<x4H0T6vJ$<Y|g=6rks)~IcH4CoW*1F
zz>A>D_~so|+8pK(DplIoKn%RaS9w^gn=hzB3^Nwy*?M3&vw1p*GSq<g1g>I_4u>nU
zXnn*5kk;L`8&Rf%?9uTykQ}HVR@fR*e2M_bd|Q(l6+PWV)kbGXST<iYDPi`T{-ajB
zlyx(kmeD~nA7W4>x9LC70>wN8Wb}Bdx>SCL7Kv6UKO7(I5P(2j*eh5?3+WO0KGIJG
z<9ir6%1Z|Xt&eI63)Nv5xw`qhJb*Qh$qx|`M&$>(O|nIM`;H@>EgEX~JPmJkbDHCD
ztmwm3)A^CCci-n_V|6ngS|m~?*yV(DFN?J~aLP^S8H$3d2qrJg_lwe-U=BNvp`DLp
z9V8n@WDcXwp6{1PgPiM?WUddxg#DJJSGF7fO4hM!9rr<`y?U8Msta2>6vyDWu^6rX
zH>TUR+=b+hOXy9LvE;7l-Z;tN%PQJ5xx+1VKARr03PI;_6`jXFE~4}II=f~a7f}qb
z>#_gdTb;WGzx5dI18`4jIi=;abSrdNtDB3`pOM~_)N!R%_)Ty6M5Xf=v)=v4^81JX
z<M%UlT4(g!&kVNi&r0{llFdps#~r{4eyf`wNMGx{Ia$Dt*;(mf9yDl%B^=paPLgX5
zIN=DeLaYuQo`5zj_Yi0y-kdDI+xhj;IEyLj+O=S%pC1gwxXv~H);pgW{MNI1$<7l1
zIQ-SkDd~fYvr=DBrTQ}$v5vKWPI~8HpY)GaO%j$CYqU^4Gzq3m=?xUnOei{dO|L4A
zz;HyX41;#dCz5$2R<N5j&B?3kn5$LwXw~Ywga0)(Zq#iGrnURYeyBd-uHQh`OFq<P
zbS+8KY`0SNg3sarmm7X_EAVf%C<h*UUDB`tbolw_igL3T9vlhIP1o?GV<=$xbtZ{y
z9AwBZfwcMITcq$j(+%8x2Lr#0M;TF1mxS;0YBFGDAZkSgzNF=4y;f-+ceBFu!sU%d
zk!m6KD3OGI;ND_09ksqK(e}8J?ddK6%*zmXn07K6cv)*NjPVk{T}F#WPMZbMF=_&&
z^V3$6xp&z_ffc1ROWbR0VS=RF;q}c<=w4vk9NRQVxA6Xl1m9Zdyhl-6N9)EF`dJN6
z+js|1MUJLi0*S2_LN+Jf<;wv|L(iY;)qs@Oh|)~or`UHwwwI?ArCyA*+Xi4xWG!|1
zMo8~~`WD2)wQwif)vW`}AQsBrZmX#>x+@${6m&OzcX3?Rq}+X-L%8Kuw4)1XxM4W=
zbbuaPq<aisRVn|-MYZ4Ru2rA6vCPx7nCFexWPBl{3BWezJU}64457M|n?eYmGB=&F
znaq_nh|fBcX3{z1Ogb~U1%&uA?&PzkVlFru;xV#pW>8|LDL4zJXwEy+xuPlN^GFI_
zns{a0>NF43r6DNfRHr3eDBAleuk+$Av7lMFbRSkf4(vOJBy6<&AUlJ0oM;ek`}+;P
zwQB`<v!^G_#7b33*XGMTJ<>37Y2q$-P*qp0(hSQg>l^H#H6NRuQcr_%98;F<72p<m
ePN}jbh&v7VN`Rnjrk!c0P$=YQD~0C@h5rCG4Db{H

literal 0
HcmV?d00001

diff --git a/Lexrank/summa/summarizer.py b/Lexrank/summa/summarizer.py
index e26db4d..1aa2d62 100644
--- a/Lexrank/summa/summarizer.py
+++ b/Lexrank/summa/summarizer.py
@@ -92,12 +92,12 @@ def summarize(text, ratio=0.2, words=None, language="english", split=False, scor
     # Gets a list of processed sentences.
     sentences = _clean_text_by_sentences(text, language)
 
+    namscores=[]
+
     # Creates the graph and calculates the similarity coefficient for every pair of nodes.
     graph = _build_graph([sentence.token for sentence in sentences])
     _set_graph_edge_weights(graph)
 
-    print '\nSudo', graph.nodes() , '\n'
-
     # Remove all nodes with all edges weights equal to zero.
     _remove_unreachable_nodes(graph)
 
diff --git a/Lexrank/summa/summarizer.pyc b/Lexrank/summa/summarizer.pyc
index 509ff278de90f12b09490bbacc8e42bada0fa562..1bdcb25a850ae417ecf7dc97d3b69725c175ea34 100644
GIT binary patch
delta 255
zcmX|*u}T9$5Qe|mO*VI%yEC1on9549*218*MFhJDVnhh2=xw=+$k`}jvz4W_mG~MK
zK8LMf?XHM0|M1PsKf~OQzgLa^s%?AlYL=_(3g{*6B(+Z|Iq)eoogx@cAq;Sm!5emG
zON0VnuuvQ-E?4M;P~tSc%q*_=PyIg%7H9IhxB0BN!EYh>4MnF3L;R3M!CaUtZlOcO
zmAUKFxqde7uj%yOUi_?{#m0wvGrrdosbbTtH^~a;M`!Kv#nnk0x0*+zj2?@9`)FRo
MY^UjT5^qL#KZgb|n*aa+

delta 272
zcmXv|u}T9$6r8tvx4GNoVjx9GX{uEIfQ5pAvj{1YI;j={2O)%8yIm~SMhlz$0vj!?
zByE1dkFd8Btn)4?ycuTR8;19L{xnkZr_f*14>joAw*e%K8%F)gWC6z`5d^$~C9K&h
zOqOTxEw~mcftA2+h(Pcetc1%NO@>gitP%vEGy>(VP5+Np{VAWrau3@aVJ1K}J2`?A
z2oheyX;e9kgh}uKZX>d%txMq2CbSr*{Ohc4STF9(wx&kK&&~_A<F>dI^EAE{6E@$)
aMI3jZl^l^4{r#c)PU?!o^Xs_jJstta3^IcN

diff --git a/Lexrank/summa/textrank.py b/Lexrank/summa/textrank.py
index eba99a2..2f7189a 100644
--- a/Lexrank/summa/textrank.py
+++ b/Lexrank/summa/textrank.py
@@ -2,11 +2,13 @@
 import sys
 sys.path.append('../')
 
-
 import sys, getopt
 from summarizer import summarize
 from keywords import keywords
 
+#sys.path.append('../../LexChain')
+#from Boochain import LexicalChain
+
 # Types of summarization
 SENTENCE = 0
 WORD = 1
@@ -59,6 +61,7 @@ def usage():
 
 
 def textrank(text, summarize_by=SENTENCE, ratio=0.2, words=None):
+    #namscores = LexicalChain()
     if summarize_by == SENTENCE:
         return summarize(text, ratio, words)
     else:
diff --git a/Lexrank/summa/textrank.pyc b/Lexrank/summa/textrank.pyc
index d9481724a1a11d14c36bba76ec3c8a3851ec916e..3b99747bb257531eafa9f45b7f4edfb5e79606b3 100644
GIT binary patch
delta 74
zcmeAb=@nsP{>;m@FmfZ?Z5Bqs&5u~7F)><Cu4F&JZO_2KP|OA-7?>uza6DpkpRCS#
dUYtpSQGii^jg66wnNyREk&{J;gOiho5dgo?4uAjv

delta 74
zcmeAb=@nsP{>;m@%YP%=Z5BrU&5u~7F)><Bu4F&JZOg#GP|OA-7#JtJa6DpknXJxv
dUYuEiQGii^nT?T+nNyREk&{J;os*M?5dgv34uSvx


From 165805a53acf70f1864bd6b1e37ca0b0d0afad04 Mon Sep 17 00:00:00 2001
From: Sudarshan <sudarshansk14@gmail.com>
Date: Mon, 3 Apr 2017 02:02:28 +0530
Subject: [PATCH 3/4] Namma scores is entered into a textrank

---
 LexChain/Boochain.py       |   2 +-
 LexChain/Boochain.pyc      | Bin 5567 -> 5457 bytes
 Lexrank/summa/textrank.py  |  11 ++++++-----
 Lexrank/summa/textrank.pyc | Bin 2573 -> 2732 bytes
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/LexChain/Boochain.py b/LexChain/Boochain.py
index 2073200..5c1500b 100644
--- a/LexChain/Boochain.py
+++ b/LexChain/Boochain.py
@@ -188,4 +188,4 @@ def count_words(summary):
 	#print final_summary
 	return namscores
 
-print LexicalChain(verbose=1)
\ No newline at end of file
+#print LexicalChain(verbose=1)
\ No newline at end of file
diff --git a/LexChain/Boochain.pyc b/LexChain/Boochain.pyc
index d81da96f83fe45ebe9e08c875c8f1862284e66f8..bc5593208a097e41fcab93e0c6c6c3a2d1bdf66d 100644
GIT binary patch
delta 2577
zcmZWrO>7&-6@IfzF8{@UQ4}RvPTYo4RH|-mOKD{1$4Z<-nz*X~N`yKD%UxNds3p0(
zs+o2dYT(q84o#8Fy=V_X&p{Btm;51z9E!qdkwcM7(L+zop+J#ik$i8+7)ERCJk7j$
z^X7Zsy!Ysj9{tO5P5)0O*?#pOFSlv(PsiUUF}ye5s=Z#Y;$$W04UKlD$P%<wPrmif
z^{FKG8;#ZH?UCDZ_T>}e?2*U1)<NqReW&4%{0>&s)*$9EVi@e#+ku3eH;6)wEPOL`
ztkJF@OQ$<Tw~1akB|63@=vb%Y2pt==8zn2k+o4WYR2`0xWzeZXR*X)ejMGsVC7-AZ
zPYJp$=v2^6qSFWg$SLkgQfQE`QGiPXg)s_Z)YbXIFit_7y6~7%TM6=`tYNbx9Y+<q
zw7NdUK7E6_F(uVOnBm(zy0Jh%)hJBUDNd*8B+6>{a|+V*sz$@lDa_DlhJp-j-pH|C
zpz|J##PDm{9{rq7BG_^$1ko7^vr6V@v_ioYbrBR{dqJJd(g`9-Q?&xKAV<~|9aX{{
z4IdNRn6oBJr65b?nJ~|DOiMwYN?}1cOHxo6-#ja}acwt67S80ETB|@lV38&Qa5+W$
z8oB4R(O?YEW_XMzLspUYBjnx{qjhMG?n7K=h(4^+WR<9b*#s;?typEM;IB1Jm21N<
zfVQGcl|$aY_T2EUz_;xXa~&3yEGb#0QJsPWSXiWRnt~E_6-i8Ca8!O^HN32Ck3JhC
z{^<V@?`a%BPQ86)5==NlqqoV=lMjYe0>SI%jbBn&<#kY{DqjiKD@<H4&C(*;*U5cH
z1C%2+!MibCUupaZB|fzg;K|Esc73?0kCoL4tyrtA>X^{V#1MAFcQkz4!#xe(wqGK@
z%oGGkV9HWBOTjF4r&VIDvy<D}83q*AD5#-Et0=V>+5(bC1Y=8ns{Kji<!|-*Rg@rc
zVR|+M#*T{Qm{g(*x+7>u;}Ybm)aZ`9r~hDSvW#;X>?FwCswZ%GT;%Nk+fkI~B1_r{
z$rmFFLYGct@lu`ld={SF+cl>!F|(|SLbUsbj$db#KKBo+)Al=kr{%sT{~TLNG@Bi#
z<2Rer^2<m~6lFZRbo~T02<|LZfLqfyoFt<hYd28c=m4l|n1ByqA@VxM`+|bn;fuz2
zSlP23b4tD*Ej{G(vSnFy=EclGD8RS+uBE`41^JUqUH(0q5;gg+=)BmH38S9lFwAL)
z$vOFyaaDXr{@19;5A$;=4t0DyCtKz7iJP{+4Ub-ZVo{T;v6A4ul?9XA<f9lKLm+Yg
z^%&Wfd^5I^%47DRyN=Vo%Jenl*Rcu^RpQ@=v*+SZR?oxoK|f4B`zgvl#Lvz!SD+6R
zas3u~fyRnA`yzg3jTtg)5Q?TT6)`LiTBBXp9-0-*{fXj#=t&uDx69fKvYoi}!y?w^
z42xM7>{W$n&hu=M#lsMFj7eZj^CLww4MiEl<Cic*A)1vUIfDe4P0n9D(NP4SP#B@n
z-AT(IH}%aMe<b`0jDAM}TABtc$xS~%zt&|hSzSS=+mDj_SEY(j5Tz5O38Ghx99@cc
z@2TCWd^g!_;IR6+5NHAD`8e=xQUOFauCfk&N{nI1E2$OPPt7c=?B>V6bvS5xor6xV
z<#znn&<T;AZRY`%{5Vyv-De2DEXcyaz;>)USA$uRU!@)sRk@n3U*l>~#Q1T{e&+UX
zIZaec3Jdsr$+f+`e$P^O^z2cm-Rd>ld##S+`CO^~-o!xF6*9fa&$J<blYR=Bb1%Jh
zw!~I^i8ttVd{s2+oxQ_@1Jt9Lmsc~-<X>h320+3P88I*9d*b|i@8*_7Y5~e|dTw$?
z8|Jf6+>w>S<4bH`=gcuxrE`V@I%j1c!K#UCw=DBVvR}BkuJU9No8zX@cYX6|tW3T@
z6{_dECYOrJ9)%hI3%O~_e;1a;oU9e+8Ya_XuCutzr+7JN`Fk%-SYmdWfTrp{a;<nJ
zrxKgvcTiKFxhj8MTsHVh^Ktop@gcDx|5kkV9B00I1@i*W3M?+NxWwWa7Hjfe>|uGW
zl=~jPw>N+wJrSusQkzR84A5qXtdfREi?p5<X)P-*>vB-~vC16tIu0nrPS4+OwEOPh
z&@<VK`CS&OLZkA_(%J<k!9N3`9;q>K?Sb2G+n(2PZZ>>-<hOfv%duUPi{E^ag(E*m
ZoR`1K8<~T?b=b47usg3He_t*M{Tshn>FNLg

delta 2683
zcmZ`*O>7(25uUgF6Un7WiIgZ>mMqzEO3RKE*RUO<YT}rx9XE~<JO`j_m?3EHS|TMa
z#ogz4oh%^~a+CIu7S0QD>@@|7=FnbxC}6aJ69fSgB)8t0Q+q911TD~_{pOKz1+*o1
zhcj>H&6_vB{Cw%*@!8m4a+&w8e|f!4qfZRK=kW%Q9?X9K@_nKk<K&DHBYASt<P^xs
zFu6zcy&E91hBrZuZIDTFa^zr7p6*+8qezaRYt@MdA3Sq-9%`4DSFX6B-|{wBf;-#W
zt(6_$-SK;EHwZf3`h&kNUNe@YAZp>Qu_b2#pk)AP(Lj(Bqk%<=s0%z!dtk+AooEoJ
z4q$?u1Pzkp5Drda?^XS2{^V|z^+^Gs80)mFe27Xj*znz8qqpU#EU0eVdu2bp(^4C+
zjQC^V-|~BRy=JGa#zAbkVW-{NYPL689WPKBEID9xdS1)FXUMxf-)Xjcu{)l6XKLvb
zOLV-B>a?~xue&)=0qY@=G~c{|Z8nqNINqB6Mf?*nt?wn~j<Dqy@CMfaT*L$(FwVIz
z=!V7qxvw83z8U9;X_1^4HNB8LW~rEd5ky>flMCOFY(kYaD+YnPbz7E!$QfQr0JM9?
z*yW?mCY#!9&axc4l0+WgHk-lk(7f@~v7;(RksvTCu;FZM)yuFk!`0QugsL0YH&b&L
z_yDZ4>pXL=vO#n_<@$kZJZB6vA}z-mhONSz_%!w=+-P&`z^#K^(0@+N;iCDmYJsmH
zInPGh^rf*{WeIe^=8|~qSaxnr=O$*eeDx}mvZDW4I=Q&+D%Ki2g{A+u>wrF;E(w-D
zF)yEh;M4y@nD1EA@1{>=4*=VDg#Jf*ta57Kj<4OxzcoFVc^ay(X3mQ<`X`w(ZuWj=
zaYhM>;9WR*GfsYy31qzz`VW~yr+K41!r*airEwX%X}cBP_T9cb1YC_4MsC!vW;5cP
z?qttA!6B9l3=T6e%J^pTD1$14<N9x<lSMfNoNp8q0FuH^q_u6&?ACZ`8II&qMOp$P
zAx&+DA9J;?HGaz_8vYtqQ`aJ71xk0V@jjPq_*07F)Qy4GT)a_|x^apUrn*xU;la%l
z<UhwavXt>Jj2MIyq$(-egG<IJ8Y4Ak*q<48TK~pwE@wu^i6~9o>`1Y2&?2~x*rakS
zur7wS>C3qj^IVqFv<G(YUC;O~xaS3~3gnFbac;UUO)|3}nnp9|Y<IR=en;Jt+$SU#
zjLHLW2iebEU@p5^u}EuY$MqcJ)+&cj9@2lzofC_CalER<)Kh8Xd6NTnLH}<2d89);
ze@z_J*Yk(O0UhSQv)jO?fKTKmk`r?xU^T7n*6TfQS%nHs5jD-1;9Tbn;M5HMgcV$V
z58F7oOf(QOBo=~$RisxG<B%YT++*WhfmmTYL2nV=MwBgja~GmX?!SmiH)$ZEW#mkJ
z=jkQ_lcFCH$K!|Q9RC)F@l}iZw>W0u42Ny4QD8Yl4?JuY{kN$ezQ#Rzt>MuC!v~!o
zjRi1E;R)hcW@s0-t~KU)Ym_0haO|#>As1zNJd`2Z|A^{9c_Be1NKMTQY{r3>BAZGB
zgdfUmigLq~e@1m2+r=ouAW89LLrX=+-y=&P6QOL=BL9~n+@W1#pUd{K4l^aWMlXu+
znURUc!X<OL0-J{kT2_UxZN*i~7^7wUVu+S6^*2NvLlgo-eaye~LjR(`Q}>XiaQL`^
zd4{`#0)<sBpS8vU4JIhcQy25tL_kD^eMJ8gQ4c>FI`rXx8T^$gj!E2+29u++2Sp0s
zAyp(*;uByBp0&msG%$2jqRK9gR8tgTBhqMsA?j)~M*b-armzTXVR!9W90aqp8li>Z
zf+bpgx&OgqxuIJ@8&+j_|MEx>a{W{M)caovJattjHA5@#0Xo21BToaEF-6^q$*d1q
z3wi?V8B9|&jei4NsaJpXulzBQeG=%fC%?dLJSo46g&$~l;#+gv+$0YrGjV#TPJ{Sv
z%aPCOk0(xFS!D@s4gYDnrbQaLpyw-jk(qpknPY+SB{zI|oL9JM;2+SsD-QzGXC{|K
zS+^z+>Yq<OTQFUkB|Hq_K%UiqpIl0^BKe#?Ry-`u>np{JNi(l5>t^PN{(Z4fxeb1B
z6M#fY*uswGg>B`<w=F$gdTE8%t0e%*{~FAH9B)f)F1LIB&YeIqTP`y&TF3R<rL*VQ
zS|h34P_?(*mgo91#zHJ_yd#ZDo(9tR<9Wt<m=U!FzLzZu_}Z2&GH6N#t6-(m>3Ff4
Io=vC!4S!JWlK=n!

diff --git a/Lexrank/summa/textrank.py b/Lexrank/summa/textrank.py
index 2f7189a..2afff29 100644
--- a/Lexrank/summa/textrank.py
+++ b/Lexrank/summa/textrank.py
@@ -6,8 +6,8 @@
 from summarizer import summarize
 from keywords import keywords
 
-#sys.path.append('../../LexChain')
-#from Boochain import LexicalChain
+sys.path.append('../../LexChain')
+from Boochain import LexicalChain
 
 # Types of summarization
 SENTENCE = 0
@@ -60,8 +60,9 @@ def usage():
     print help_text
 
 
-def textrank(text, summarize_by=SENTENCE, ratio=0.2, words=None):
-    #namscores = LexicalChain()
+def textrank(text, path, summarize_by=SENTENCE, ratio=0.2, words=None):
+    namscores = LexicalChain(fileName=path)
+    print namscores
     if summarize_by == SENTENCE:
         return summarize(text, ratio, words)
     else:
@@ -74,7 +75,7 @@ def main():
     with open(path) as file:
         text = file.read()
 
-    print textrank(text, summarize_by, ratio, words)
+    print textrank(text, path, summarize_by, ratio, words)
 
 
 if __name__ == "__main__":
diff --git a/Lexrank/summa/textrank.pyc b/Lexrank/summa/textrank.pyc
index 3b99747bb257531eafa9f45b7f4edfb5e79606b3..1dda07a241e3ad6f33ee1604e52ea7f1f2eea360 100644
GIT binary patch
delta 686
zcmYk3zi!k(5XQf;@9f+AlRJ(C5J!SSB;rUYAlei}BoYyV1NMqTmm*l+4Y_cC!r29d
zQHo1T5xIh%j*h2DLBS*N0yMk;GYb+}p3gJm^?twcepvjn)bW2cs{Ml>FBAN|O8ngB
zNWYzT`ll8+CLDV(Wy}d=025#oVqC$X!gYv&g5jbHqi_ZcYA{s{>M%7|Heh)Hmju(m
znU6seX2HE`!Cr!C;*E#d1#Am*fF|rf=tV7N7@mK>zmYC*wz+wi?<e-%&QUy^M8vJE
z&cdBx5|3Q>t50u(Q^nmaj?`lTz5eu3u8RjrnahAf2HfFDcUdy$mJ?gk`!ez}9_{5G
zGICJ&ka@^~+(%x*+($3t?EjkoM=uhmZdPGE9IlRRHy+zaT(rE<H4zSIp^k#$XV|%{
zuX3IV>m(kh$#iDZ;(54M+^MuztK0=OsZ4chdG26@JLGWOFN*!j_5o867?s)PLhVSz
zq5H@CI;WXXOi%2@B{LfveUm$pY^N7RmPD?II3QVnI2>8MTs#eL7t2b~Ua_uLI&E1Z
zf6#rF$p-NWZ)|j_c%^P^x#=HHr->ZTzqy@n|C!#^f}=YkGPIVnb1ZtT_^ck^TH#uz
Vy+$oo4nLs4R~rGTQd=oir{9}#ZL0tP

delta 497
zcmYjMyGjE=6g`t&H#3`ut0ofN7%VI{l~&dPg@{OmNfBMO2+j(k#2B|sAtbFum@lvt
z6a{<#C8dRxKj7R^Aq)4+eVudmK9?U#q4QnvACIR;=lJ_v^EOP}y-dQ+iJC*4h4K({
zs4T`nETA}IA4=FVhr}$Hl0oG#b`Te!@^(~2G7nY2RR)76Bt?t>)&s4!qvGUi`~863
zdW`FjY^6)S|FJ{<TLy*<DJ;K=lwoK(84Mka9JEuD&DIJ=l^IeiatunJzyu^UBUhjC
zP>;lhe&q{m9t|>i7G&(<R8m;umY(v)sd4+Ya?#wI(WZZtD+W@^Zu&{jCWEA_WYu&c
z(veF9C4w@+vH`nxae1A{P~VG9*3@sJ76vr7nnX#7-0d4tB^~`Ks&#vYrFIVGt~Hjc
k#Nh-v0&<n>y5aBFXhKI-Sil0$W1b`G9uw{&7b4Go06nTnvH$=8


From 1cb5ddca63a03f10200f46cdff8f87d5ec6c95b8 Mon Sep 17 00:00:00 2001
From: Sudarshan <sudarshansk14@gmail.com>
Date: Mon, 3 Apr 2017 02:50:52 +0530
Subject: [PATCH 4/4] Completed code. Cleanup yet to be done.

---
 LexChain/Boochain.py                |  57 ++++++++++++++++------------
 LexChain/Boochain.pyc               | Bin 5457 -> 5573 bytes
 Lexrank/amazon.txt                  |  27 ++++++++++++-
 Lexrank/summa/pagerank_weighted.py  |  16 +++++++-
 Lexrank/summa/pagerank_weighted.pyc | Bin 3656 -> 3691 bytes
 Lexrank/summa/summarizer.py         |   8 ++--
 Lexrank/summa/summarizer.pyc        | Bin 4342 -> 4337 bytes
 Lexrank/summa/textrank.py           |   4 +-
 Lexrank/summa/textrank.pyc          | Bin 2732 -> 2728 bytes
 9 files changed, 80 insertions(+), 32 deletions(-)

diff --git a/LexChain/Boochain.py b/LexChain/Boochain.py
index 5c1500b..41ceeb3 100644
--- a/LexChain/Boochain.py
+++ b/LexChain/Boochain.py
@@ -104,10 +104,10 @@ def count_words(summary):
 	File = open(fileName) #open file
 	lines = File.read() #read all lines
 	#dec_lines =  [line.decode('utf-8') for line in lines] 
+	#print [clean_line.token for clean_line in clean_lines]
 
-	line_list = lines.split('. ')
 	clean_lines = clean(lines)
-
+	line_list = [clean_line.text for clean_line in clean_lines]
 	is_noun = lambda x: True if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS') else False
 	nouns = [word for (word, pos) in nltk.pos_tag(nltk.word_tokenize(lines)) if is_noun(pos)]  #extract all nouns
 
@@ -149,34 +149,43 @@ def count_words(summary):
 		line_score.append(0)
 
 	for chain in lexical_chains:
-		if chain.score>0.0:
-			bigword = chain.mfword()
-			chain_score = chain.score
-			#print '\nMF word ', bigword
-			for i in range(len(line_list)):
-				line=line_list[i]
-				if findWholeWord(bigword)(line)!=None:
-					#((line.find(' '+str(bigword)+' ')!=-1) or (line.find(' '+str(bigword)+'.')!=-1)):
-					if line_flags[i]==0:
-						#summary.append(line)
-						#print 'i  ', count_words(summary)
-						line_flags[i] = 1
-						line_score[i] = chain_score
-						#print 'line_score ', line_score
-						#print 'line_flags ', line_flags
-
-						break
-					elif line_flags[i]==1:
-						line_score[i] = line_score[i] + chain.score
-						#print '\nline_score ', line_score
-						#print 'line_flags ', line_flags
-				
+	
+		bigword = chain.mfword()
+		chain_score = chain.score
+		#print '\nMF word ', bigword
+		for i in range(len(line_list)):
+			line=line_list[i]
+			if findWholeWord(bigword)(line)!=None:
+				#((line.find(' '+str(bigword)+' ')!=-1) or (line.find(' '+str(bigword)+'.')!=-1)):
+				if line_flags[i]==0:
+					#summary.append(line)
+					#print 'i  ', count_words(summary)
+					line_flags[i] = 1
+					line_score[i] = chain_score
+					#print 'line_score ', line_score
+					#print 'line_flags ', line_flags
+
+					break
+				#elif line_flags[i]==1:
+					#line_score[i] = line_score[i] + chain.score
+					#print '\nline_score ', line_score
+					#print 'line_flags ', line_flags
+			
 
 	'''
 		if(count_words(summary)>word_count):
 			break			
 
 	'''
+	tot_score = 0
+	for i in range(len(line_score)):
+		line_score[i] = line_score[i]+1
+
+	for score in line_score:		
+		tot_score = tot_score + score
+
+	for i in range(len(line_score)):
+		line_score[i] = line_score[i]/tot_score
 
 	namscores = dict(zip([sentence.token for sentence in clean_lines],line_score))
 
diff --git a/LexChain/Boochain.pyc b/LexChain/Boochain.pyc
index bc5593208a097e41fcab93e0c6c6c3a2d1bdf66d..74740219a7efa5cee1575e5c3f787a42e9f6c189 100644
GIT binary patch
delta 1002
zcmZuwJ#Q015PiF6oA`WZpY8LVoeu*QR+5GSgiy+eAP^Fe;zW@|5obc2$l>e*U9l?{
z<|9Oc<|G=*P|~3M0t){C8Y-GploUu2bj+?}J|v{Qz1^Le_ukCxewrUG<c#mOIePdZ
z*J5Xz0M5{7(AR%YbI9-%kQ^ZmKfT3<m*Wh?6ow2kk1astvAkA-G$BlMvRLPc-a!`d
zB8vwMVFAwxz)F9z^8*_%vk(^cvnsxK3}I{kW5^<sIz*AM97r3&P&RU6k;Q`K3HCE&
z>Oz(fT9A&O%SaX>O6cS<1eu#U(!oAg!403>L>ZwCUzQPFV}pR!>ZAx)IH=WSMXhP+
ziV9p=)nJrJb<BK$1w`8^AS6~t6(TGnwup=~L8;l;;pkoD$)mBnwvwmx*a$1w$)Go4
z$(26}2g_^oN@7f$<WhxCt5dO*svo-W=^xhMuS8#25PI5G11kH<^=QOs1YM}gOIOb|
zJ$p!65Cy8jIY=L(hK??kDxvVko547(8$3u}jTJweD0Wl@C3QfP$p(@w#FT7BVG;(7
z<+WAFX|)&AXebUUb6r`4ujUZgG0;2Y#H~E2PpUj8Toqj(4VFX$B!;X@x1$pt9jXyT
zk9nY)I-}ZoO6vV4t!q*p)KINFjgNdIP&dXSU36opbu+4OKA}Wc$@D*@n}0!(du;->
zX#}y0SQm0~oo69t(ed>4JTS;}$dx2H2Qi1$R{iL0CM+D~oR6#;XX&-v-)(Pp>1hCQ
zJBicJ<pb7CH!JIlg^6)vce~r}$L2r#I8R$;AZtHPzf|7X>LlKaH~RPYdhIl-N|vPG
zs?BW1VzVr*xR>U+#qzwtOjhTPVKOsQvU04!JZ`fhoqK#?s#PMCyLZxU_ce>sAMU+h
D>qN_p

delta 887
zcmZWnL2DC16#iy5Nt@jyyGeGl*|ydyDzO!BLM`=Hs28Eu!=loPbfRq<LN{4t!5MRi
zX!YR11^+?sN-w>M2k|WQB<MfzB%bxnrp1Hs_U(J~zBlu|Z|3vD*ZG3+t88v<ekioq
z*%<&6^cnOG?vWP^&jH0D$eQUbHo7b)(aAwr7%`|K;yh?Y$0lCp@rXf`@Pc}*w7|~|
zOL#>93;TJE=^sOuu*ne}LzR&<A<J0VaG*+%hOQBaMSl<`Pq3e(QV*&EQHHWJ?;yDX
zSwW|W5vV+yb=uhHx;gL(CLM?heB~gz&W0hydc}q-ZFo&p)iH&xtin~bY%>$GHa>ZZ
zg>-Dkf}ozNo?X-s6D@EiX|xJ<Ir<lPvNxvJ*7P)=GDID_IrNWMviOhTaAjj&Q;Z2!
zf^gwebV8O!^@RtY{-ObYHTuRv;b+inLtS6J867dQum?OQQR|s^Gw&g3K^94cIj9Dt
zi%uqzmQZ@*WH=t{1`m@jW5yr<W4z7v0FD+^`6t7uCX%;^tB6S<(wb>o*?0;S=v)Q}
z^pT{xuPd^ly#RI%^ba^u(FnS>p_c16*O!p3Elav`Pgj@MqB-LfIF^<2tQ3p|^-q9=
ziw-zh7;SU1Fm@4pS^{YjCmU)CatfVB_OKT9CL^x-sA&}BH14#5!=E`}O$<0a>#K8z
zx1CSSUSFbw9xt%fZ9nX#7i+KCO!~gId8SBf-JM?hL3d{m$JQx=(q4OzF4f<qKkAb0
zrT5+0ykWBvPoKHh&NGTwkq69VP3{;bGjloSuuIm$WXq;K*4NVS?i<!iyWZWu`Bbk!

diff --git a/Lexrank/amazon.txt b/Lexrank/amazon.txt
index 810c6c4..01ecd34 100644
--- a/Lexrank/amazon.txt
+++ b/Lexrank/amazon.txt
@@ -1 +1,26 @@
-No billionaire has had a better year than Amazon CEO Jeff Bezos. Bezos is the biggest dollar gainer on the 2017 World’s Billionaires List, in a year when 56 percentage of billionaires saw their fortunes increase. Bezos' net worth jumped by 27.6 billion in the past year more than the total net worth of all but the 24 richest billionaires. Amazon stock climbed 67 percentage in the past year in part because of the success of its cloud computing unit, Amazon Web Services. With a net worth of 72.8 billion, Bezos is now the third richest billionaire in the world, behind Microsoft cofounder Bill Gates and Berkshire Hathaway Chairman and CEO Warren Buffett. Last year, Bezos cracked the ranks of the top 10 richest billionaires in the world for the first time, ranking fifth on Forbes 2016 World’s Billionaires List. Now he is two spots higher in the ranks and nearly 30 billion richer. A decade ago, on the 2007 Billionaires List, he had a net worth of 4.4 billion.
+   A coalition of members of Congress announced
+Wednesday that they plan to sue the Census Bureau in an effort to
+force the agency to delete illegal aliens from its count in 1990.
+   Some 40 members of the House joined the Federation for American
+Immigration Reform in announcing that the suit would be filed
+Thursday in U.S. District Court in Pittsburgh, spokesmen said at a
+news conference here.
+   The group contends that including the estimated 2 million or
+more illegal aliens in the national head count, which is used to
+distribute seats in the House of Representatives, will cause unfair
+shifts of seats from one state to another.
+   Census officials say they are required to count everyone by the
+U.S. Constitution, which does not mention citizenship but only
+instructs that the House apportionment be based on the ``whole
+number of persons'' residing in the various states. That approach
+was upheld by a federal court in a similar suit, brought by the
+same immigration reform group, before the 1980 Census.
+   Nonetheless, Dan Stein of the immigration reform federation
+contended that illegal aliens should not be allowed to be part of
+determining the political structure of the United States.
+   Rep. Tom Ridge, R-Pa., said the Census Bureau should actually
+count everyone but that it should develop a method to determine how
+many people are illegally in the country, and them deduct that
+number from the figures used for reapportioning Congress.
+   Rep. Jan Meyers, R-Kan., suggested including a question on the
+Census form asking whether respondents are U.S. citizerns.
diff --git a/Lexrank/summa/pagerank_weighted.py b/Lexrank/summa/pagerank_weighted.py
index 124d8ad..dc0cbd8 100644
--- a/Lexrank/summa/pagerank_weighted.py
+++ b/Lexrank/summa/pagerank_weighted.py
@@ -14,11 +14,22 @@
 
 
-def pagerank_weighted(graph, initial_value=None, damping=0.85):
+def pagerank_weighted(graph, namscores, initial_value=None, damping=0.85):
     """Calculates PageRank for an undirected graph"""
+    #print namscores
+    '''
     if initial_value == None: initial_value = 1.0 / len(graph.nodes())
     scores = dict.fromkeys(graph.nodes(), initial_value)
-
+    '''
+    #print graph.nodes()
+    score_list=[]
+    for i in graph.nodes():
+        score_list.append(namscores[i])
+
+    scores = dict(zip(graph.nodes(),score_list))
+    #print 'Built scores ', scores
+    
+    
     iteration_quantity = 0
     for iteration_number in xrange(100):
         iteration_quantity += 1
@@ -37,6 +48,7 @@ def pagerank_weighted(graph, initial_value=None, damping=0.85):
         if convergence_achieved == len(graph.nodes()):
             break
 
+    print iteration_quantity        
     return scores
 
 
diff --git a/Lexrank/summa/pagerank_weighted.pyc b/Lexrank/summa/pagerank_weighted.pyc
index 543674e3a0e17a9af12d0d5af96b1fae5f5f1a3a..4919ce3dd5f8cdada7f661d3b25af3b2f85f5590 100644
GIT binary patch
delta 832
zcmYLGO=}ZT6g_V;GwDnczmn2M1F6)Oh@ZGH2rdKz){V5IkRnY>$V}R_X+ED2Wri#Y
zwtg(c2kK66<0cy^xNzlC{VOUiT-c4!d!IsJ-kG`go_FrKcfRDl=j_<;lv~{VU@wcI
z2_Q*lf=-N1e^~(QKxASthdp2u*a1qFDMS`R6Pp4dJr=S%0-~EnSsIYBF_kzw2B;X>
z@CoA>{Dm~(>tw0&3(*9^IQ$rj(U9K6aWMEaRwz|Q+>8ie4AYT|!G<nUe~X1nl`@_P
zNE^Gru0Y*@Z{jijllFur5s^(K5W1`r+Mq%<!z0D{leFr_kU5z!fypBE>At0D$egX`
zqkGd-l@7K9dZ$I`{PXOf1ud#+*SkZMsU+R;0Ca21KyL+$-4)PNrg3jAknG)p#XCMn
zaI@O1J!q<L#+*7Zeq1eZ-XO41NYG0%vDNm2K8*=fJDs5A(-r~0QIoW9;8CNa=FNo}
z9W;Yhf&ZiD5RhfMT;1;1+P$E!24+6vQbvbs%|>5J`pIahSLTxXX}&(f%`st%w3swb
znd*g=Gc2(`veH6Ksn7OTQQUtT&l%#jdhg`Vvpp}vaFpRV!!ZJFx!!D7rN@>%wxbT5
zvzPQdoFbF!a>L7VHP67VG+8?+@ciCowdj_uS^B)8M()d;?xZDHFxc+Mhilta*=r2E
zE9!pYjdh07_tc-n^SpL;f<nD!y$bIFh2BYqta_IWEbg?blBp$8RI90LvuTR;UhlM9
afj7g%iwtbQV`;CTKBk_<=aY_;aoj`XSep?5

delta 806
zcmZ8eOKTHR6#nj=M>5llsChJLqrswOq`pwG2#O#DyRpP^k#33xXVRvRq)BFxzzkgk
zY<#SGH~kTUN_Q><cZ#}j=?`$Fpx|2YoQWbh-1E&j_q@ORwfwehYCj$O#qF2ove?l8
z*d%EZ)_b=E5DEAuHYB>I;0uHTBVb)(2n+@Cu|Nw5HLUCZ9F#MZh&A|xI{Lp58t^qv
zSXlXv*g&Mi*D+5{4l&Vxh0)I6x(h4M5T`gGk|@S$^w*NwCZg21Mh%Z>loWP>U4e##
zZ{Q*R1dXUWH6o!CTZqzJC(=NUQjVujvr4rnkwS8jh5US+zK@PVEu;jEoh8aEk~_4q
zA<)f>$Q;jX^2S^=3A@(mp+>I%h+)F2sCKHLQ=`zkZc2TWm#kGHmcAc1_Ox#|kCE1s
z1SXc;3HpaR!BV>w5Tn3m&?4f1R@)DHiOX+32t$6KMz_7S5e#~2MqjjDel^b`P>=PB
zdaYLtdO0+eW9+*pNrTLwB#P1$Sz(JY7KAI#$|>Q<L$WB+^5Bz^6{4WNnD4~g!ShsE
zitFlwRk`R;zL#M*%y5+92thJhW3yckJ(Ep_`u29v^3|k$`aGv4ceKT7xaLih?eW51
zGA&2DByf3u@0@DeHRA-Sm(_cFublKtO0X7eZHI$<TlKKp?0Xl~U}E2xq3cCe$ULni
z^JU3&nX7~bZ;rJ|hH3RC6Bx6ssyXM5xT0d`Y9+@u6Yd>nIK#lrJud6bs?W}o)Ktc@
Ha+duYUXYC6

diff --git a/Lexrank/summa/summarizer.py b/Lexrank/summa/summarizer.py
index 1aa2d62..3eac92f 100644
--- a/Lexrank/summa/summarizer.py
+++ b/Lexrank/summa/summarizer.py
@@ -88,11 +88,11 @@ def _extract_most_important_sentences(sentences, ratio, words):
         return _get_sentences_with_word_count(sentences, words)
 
 
-def summarize(text, ratio=0.2, words=None, language="english", split=False, scores=False):
+def summarize(text, namscores, ratio=0.2, words=None, language="english", split=False, scores=False):
     # Gets a list of processed sentences.
     sentences = _clean_text_by_sentences(text, language)
 
-    namscores=[]
+    #print namscores
 
     # Creates the graph and calculates the similarity coefficient for every pair of nodes.
     graph = _build_graph([sentence.token for sentence in sentences])
@@ -102,7 +102,9 @@ def summarize(text, ratio=0.2, words=None, language="english", split=False, scor
     _remove_unreachable_nodes(graph)
 
     # Ranks the tokens using the PageRank algorithm. Returns dict of sentence -> score
-    pagerank_scores = _pagerank(graph)
+    pagerank_scores = _pagerank(graph, namscores)
+
+    #print pagerank_scores
 
     # Adds the summa scores to the sentence objects.
     _add_scores_to_sentences(sentences, pagerank_scores)
diff --git a/Lexrank/summa/summarizer.pyc b/Lexrank/summa/summarizer.pyc
index 1bdcb25a850ae417ecf7dc97d3b69725c175ea34..44621da7f6bda700cd7960d11dd7cf3b5ba51397 100644
GIT binary patch
delta 195
zcmeyS_)(FK`7<vUN7hC*VIF&S1_lNmAZ7t#XCN+~1|&)t7;1osg`t^=p_ZMYgpnZ~
z$Yp16iREXg<zT4cV8~)(h+|?Xl$;#Lqo`QJ2v*3&P{IaMRl^11GL`^YK*YpQ!#sH|
zk0J{zLk-*HLp+NZOD3oC_Vbl+0xil*%q>pNFG?+*{DoI-vIbwffDllWfr+10iIIz$
ci<zHw@<F~<M!U)G{Dq8_n|JctFfz&k00mDchyVZp

delta 200
zcmeyU_)U?G`7<w9ao9#SVIEgD1_lNmAZ7t#XCN+~2_#Aw7;1osnW349p_Yvyoq?g2
zouPyg#He9oaEaw-sO4a&;b6#OVu)j6D3qTZ!J{bG%*as7#ZbZqQc}YO(#lxE4(2d2
z)G$q+#iPi=!cfCHc|Xr0#-hndy#15^^Q!Tca02biOUx}!&M!(Wo~+8(E+hgJVPN8C
iVq;cf<YMMx=3$<^hp&~<cCr(HA!GUGE&Mi&j4}Y|<R~@(

diff --git a/Lexrank/summa/textrank.py b/Lexrank/summa/textrank.py
index 2afff29..306710c 100644
--- a/Lexrank/summa/textrank.py
+++ b/Lexrank/summa/textrank.py
@@ -62,9 +62,9 @@ def usage():
 
 def textrank(text, path, summarize_by=SENTENCE, ratio=0.2, words=None):
     namscores = LexicalChain(fileName=path)
-    print namscores
+    #print namscores
     if summarize_by == SENTENCE:
-        return summarize(text, ratio, words)
+        return summarize(text, namscores, ratio, words)
     else:
         return keywords(text, ratio, words)
 
diff --git a/Lexrank/summa/textrank.pyc b/Lexrank/summa/textrank.pyc
index 1dda07a241e3ad6f33ee1604e52ea7f1f2eea360..8418c5449f32b9e43802dce2e04081acacf2e9dc 100644
GIT binary patch
delta 94
zcmZ1@x<ZtV`7<vUOTtFB8|;>>3=9m;KwRtuBuW?<QWzO(7#W%w7;9M>YM2;G7#Xse
t7>e{5N|+dG7#M1Rg3JsxEDX&olO;Kpb8`SSF);Eo@h}Q+KFe{R5dcVW5gz~m

delta 98
zcmZ1>x<-_Z`7<w9c>G4T8|>CB3=9m;KwRtsBuW?<QWzO(7#W%w7;9M>YFHWEJ!+U3
yN*EcknHY);7)qEJY8V)5m>FtV7@C<Ui*qdJ<pOGBVB}|HW#nNL*nE=XJ|h6J5fTId