Skip to content

Commit

Permalink
Scan for XML files when generating the docset index
Browse files Browse the repository at this point in the history
generate_index.py scanned a fixed range of xeps.  The code has been
reworked slightly to scan the xeps dir for the right XML files, and
generate records for every one.

Test-Information:
Checked that it generated SQL for every XEP in xeps
  • Loading branch information
intosi committed Nov 5, 2016
1 parent 5f128b5 commit 8078601
Showing 1 changed file with 32 additions and 23 deletions.
55 changes: 32 additions & 23 deletions generate_index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import os.path
import re
import xml.dom.minidom

print 'CREATE TABLE IF NOT EXISTS searchIndex(id INTEGER PRIMARY KEY);'
Expand All @@ -6,27 +9,33 @@
print 'CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);'

def putSQL(name, type, path):
print "INSERT OR IGNORE INTO searchIndex('name', 'type', 'path') VALUES ('%s', '%s', '%s');" % (name, type, path)
print "INSERT OR IGNORE INTO searchIndex('name', 'type', 'path') VALUES ('%s', '%s', '%s');" % (name, type, path)

for i in range(1,380):
path = "xep-%04d.html" % i
type = 'Extension'
name = "xep%d" % i
putSQL(name, type, path)
name = "xep%04d" % i
putSQL(name, type, path)
dom = xml.dom.minidom.parse("xeps/xep-%04d.xml" % i)
header = dom.getElementsByTagName('header')
title = header[0].getElementsByTagName('title')[0]
for child in title.childNodes:
if child.nodeType == child.TEXT_NODE:
title = child.data
if title != "N/A":
putSQL(title, type, path)
shortname = header[0].getElementsByTagName('shortname')
if len(shortname) > 0:
for child in shortname[0].childNodes:
if child.nodeType == child.TEXT_NODE:
shortname = child.data
if shortname != 'NOT_YET_ASSIGNED':
putSQL(shortname, type, path)
xepre = re.compile('xep-0*([1-9][0-9]*).xml')
for root, dirs, files in os.walk('xeps'):
for path in files:
match = xepre.match(path)
if not match:
continue

i = int(match.group(1))
type = 'Extension'
name = "xep%d" % i
putSQL(name, type, path)
name = "xep%04d" % i
putSQL(name, type, path)
dom = xml.dom.minidom.parse(os.path.join(root, path))
header = dom.getElementsByTagName('header')
title = header[0].getElementsByTagName('title')[0]
for child in title.childNodes:
if child.nodeType == child.TEXT_NODE:
title = child.data
if title != "N/A":
putSQL(title, type, path)
shortname = header[0].getElementsByTagName('shortname')
if len(shortname) > 0:
for child in shortname[0].childNodes:
if child.nodeType == child.TEXT_NODE:
shortname = child.data
if shortname != 'NOT_YET_ASSIGNED':
putSQL(shortname, type, path)

0 comments on commit 8078601

Please sign in to comment.