Merge remote-tracking branch 'origin/master'

hakiour · Oct 6, 2019 · 0059ed4 · 0059ed4
2 parents 1498806 + e6119fd
commit 0059ed4
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 9 deletions.
diff --git a/linkDownload.py b/linkDownload.py
@@ -51,21 +51,21 @@ def urlToList(url, fileName, dominio):
 
 
 def readLinks():
-
 	rutaGoogle = 'http://www.google.com/search?btnG=1&q=site%3A'
 	datosForm = cgi.FieldStorage()
 
 	if datosForm:
-		print("adasd")
-		url = datosForm["url"]
+		url = datosForm["url"].value
 		arrayUrl = (url.split("//"))
 		if len(arrayUrl) == 2: 
 			url = arrayUrl[1]
 		else: 
 			print('Error url')
 			return 0
 
-		tema = datosForm["tema"]
+		print(url)
+
+		tema = datosForm["tema"].value
 
 		pagina = requests.get(rutaGoogle + url)
 		tree = html.fromstring(pagina.content)
@@ -83,12 +83,12 @@ def readLinks():
 			urlToList(link, "test"+str(i), url)
 			i+=1
 
+
 	else:
 		print('Error input')
 
 #url = "https://www.elmundo.es/pais-vasco/2019/07/10/5d25f9af21efa0c0578b456f.html"
 #urlToList(url,file)
 
-#readLinks()
-print("asd")
+readLinks()
 #map reduce para contar palabras
diff --git a/web/assets/js/analizador.js b/web/assets/js/analizador.js
@@ -13,7 +13,7 @@ function ejecutarAnalisis(){
 
     mostrarLoader();
 
-    iniciarProceso(dominio, tematica)
+    iniciarProceso(dominio.value, tematica.value)
 }
 
 function mostrarAlerta(mensaje){
@@ -27,9 +27,9 @@ function mostrarLoader(){
 
 function iniciarProceso(dominio, tematica) {
     $.ajax({
-        url: "./linkDownload.py",
+        url: "../linkDownload.py",
         type: "POST",
-        data:{ "url":dominio, "tema": tematica},
+        data: {"url": dominio, "tema": tematica},
         success: function (response) {
             alert(response)
         },

diff --git a/web/test.py b/web/test.py
@@ -0,0 +1,47 @@
+#!C:/Users/sergi/AppData/Local/Programs/Python/Python37-32/python.exe
+print("Content-Type: text/html\n")
+from urllib.request import Request, urlopen
+import re
+import cgi
+from bs4 import BeautifulSoup
+import json
+import sys
+from lxml import html
+import requests
+
+
+#Abrir pagina web i adquirir texto
+url = "https://www.freecodecamp.org/news/my-first-python-project-converting-a-disorganized-text-file-into-a-neatly-structured-csv-file-21f4c6af502d"
+file = 'test'
+
+
+def urlToList(url, fileName):
+	req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+	webpage = urlopen(req).read()
+	soup = BeautifulSoup(webpage, features="lxml")
+
+	# elimina elementos script y style
+	for script in soup(["script", "style"]):
+	    script.extract()    # rip
+
+	# sacar solo texto
+	text = soup.body.get_text()
+
+	# El siguiente proceso es para separar en lineas y unirlo en un solo string
+	lines = (line.strip() for line in text.splitlines())
+	chunks = (phrase.strip() for line in lines for phrase in line.split("  "))	# break multi-headlines into a line each
+	text = '/n'.join(chunk for chunk in chunks if chunk)	# drop blank lines
+
+	#trabajaremos siempre con las palabras en minúsculas para evitar possibles errores de búsqueda
+	text = text.lower()
+	x={
+		"lista" : text.split()
+	}
+
+	#Passamos el texto como lista de palabras en un fichero
+	with open(fileName+'.json', 'w') as filehandle:
+	    json.dump(x, filehandle, ensure_ascii = False)
+
+	return True
+
+print("test.com")