Context Navigation

Back to Ticket #184

Ticket #184: bibtex2dox-table.py

File bibtex2dox-table.py, 25.1 KB (added by Peter Kovacs, 16 years ago)

Line
1	#!/usr/bin/env /usr/local/Python/bin/python2.1
2	"""
3	Decoder for bibliographic data, BibTeX
4	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
5
6	v.8
7	(c)2002-06-23 Vidar Bronken Gundersen
8	http://bibtexml.sf.net/
9	Reuse approved as long as this notification is kept.
10	Licence: GPL.
11
12	Contributions/thanks to:
13	Egon Willighagen, http://sf.net/projects/jreferences/
14	Richard Mahoney (for providing a test case)
15
16	Editted by Sara Sprenkle to be more robust and handle more bibtex features. (c) 2003-01-15
17	1. Changed bibtex: tags to bibxml: tags.
18	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
19	3. Allow spaces between @type and first {
20	4. "author" fields with multiple authors split by " and "
21	are put in separate xml "bibxml:author" tags.
22	5. Option for Titles: words are capitalized
23	only if first letter in title or capitalized inside braces
24	6. Removes braces from within field values
25	7. Ignores comments in bibtex file (including @comment{ or % )
26	8. Replaces some special latex tags, e.g., replaces ~ with ' '
27	9. Handles bibtex @string abbreviations
28	--> includes bibtex's default abbreviations for months
29	--> does concatenation of abbr # " more " and " more " # abbr
30	10. Handles @type( ... ) or @type{ ... }
31	11. The keywords field is split on , or ; and put into separate xml
32	"bibxml:keywords" tags
33	12. Ignores @preamble
34
35	Known Limitations
36	1. Does not transform Latex encoding like math mode and special latex symbols.
37	2. Does not parse author fields into first and last names.
38	E.g., It does not do anything special to an author whose name is in the form LAST_NAME, FIRST_NAME
39	In "author" tag, will show up as <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
40	3. Does not handle "crossref" fields other than to print <bibxml:crossref>...</bibxml:crossref>
41	4. Does not inform user of the input's format errors. You just won't be able to
42	transform the file later with XSL
43
44	You will have to manually edit the XML output if you need to handle
45	these (and unknown) limitations.
46
47	"""
48
49	import string, re
50
51	# set of valid name characters
52	valid_name_chars = '[\w\-:]'
53
54	#
55	# define global regular expression variables
56	#
57	author_rex = re.compile('\s+and\s+')
58	rembraces_rex = re.compile('[{}]')
59	capitalize_rex = re.compile('({\w*})')
60
61	# used by bibtexkeywords(data)
62	keywords_rex = re.compile('[,;]')
63
64	# used by concat_line(line)
65	concatsplit_rex = re.compile('\s#\s')
66
67	# split on {, }, or " in verify_out_of_braces
68	delimiter_rex = re.compile('([{}"])',re.I)
69
70	field_rex = re.compile('\s(\w)\s=\s(.*)')
71	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
72
73	url_rex = re.compile('\\\url\{([^}]*)\}')
74
75
76	#
77	# return the string parameter without braces
78	#
79	def transformurls(str):
80	return url_rex.sub(r'<a href="\1">\1</a>', str)
81
82	#
83	# return the string parameter without braces
84	#
85	def removebraces(str):
86	return rembraces_rex.sub('', str)
87
88	#
89	# latex-specific replacements
90	# (do this after braces were removed)
91	#
92	def latexreplacements(line):
93	line = string.replace(line, '~', ' ')
94	line = string.replace(line, '\\\'a', 'á')
95	line = string.replace(line, '\\"a', 'ä')
96	line = string.replace(line, '\\\'e', 'é')
97	line = string.replace(line, '\\"e', 'ë')
98	line = string.replace(line, '\\\'i', 'í')
99	line = string.replace(line, '\\"i', 'ï')
100	line = string.replace(line, '\\\'o', 'ó')
101	line = string.replace(line, '\\"o', 'ö')
102	line = string.replace(line, '\\\'u', 'ú')
103	line = string.replace(line, '\\"u', 'ü')
104	line = string.replace(line, '\\H o', 'õ')
105	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
106	line = string.replace(line, '\\\'A', 'Á')
107	line = string.replace(line, '\\"A', 'Ä')
108	line = string.replace(line, '\\\'E', 'É')
109	line = string.replace(line, '\\"E', 'Ë')
110	line = string.replace(line, '\\\'I', 'Í')
111	line = string.replace(line, '\\"I', 'Ï')
112	line = string.replace(line, '\\\'O', 'Ó')
113	line = string.replace(line, '\\"O', 'Ö')
114	line = string.replace(line, '\\\'U', 'Ú')
115	line = string.replace(line, '\\"U', 'Ü')
116	line = string.replace(line, '\\H O', 'Õ')
117	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
118
119	return line
120
121	#
122	# copy characters form a string decoding html expressions (&xyz;)
123	#
124	def copychars(str, ifrom, count):
125	result = ''
126	i = ifrom
127	c = 0
128	html_spec = False
129	while (i < len(str)) and (c < count):
130	if str[i] == '&':
131	html_spec = True;
132	if i+1 < len(str):
133	result += str[i+1]
134	c += 1
135	i += 2
136	else:
137	if not html_spec:
138	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
139	((str[i] >= 'a') and (str[i] <= 'z')):
140	result += str[i]
141	c += 1
142	elif str[i] == ';':
143	html_spec = False;
144	i += 1
145
146	return result
147
148
149	#
150	# Handle a list of authors (separated by 'and').
151	# It gives back an array of the follwing values:
152	# - num: the number of authors,
153	# - list: the list of the author names,
154	# - text: the bibtex text (separated by commas and/or 'and')
155	# - abbrev: abbreviation that can be used for indicate the
156	# bibliography entries
157	#
158	def bibtexauthor(data):
159	result = {}
160	bibtex = ''
161	result['list'] = author_rex.split(data)
162	result['num'] = len(result['list'])
163	for i, author in enumerate(result['list']):
164	# general transformations
165	author = latexreplacements(removebraces(author.strip()))
166	# transform "Xyz, A. B." to "A. B. Xyz"
167	pos = author.find(',')
168	if pos != -1:
169	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
170	result['list'][i] = author
171	bibtex += author + '#'
172	bibtex = bibtex[:-1]
173	if result['num'] > 1:
174	ix = bibtex.rfind('#')
175	if result['num'] == 2:
176	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
177	else:
178	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
179	bibtex = bibtex.replace('#', ', ')
180	result['text'] = bibtex
181
182	result['abbrev'] = ''
183	for author in result['list']:
184	pos = author.rfind(' ') + 1
185	count = 1
186	if result['num'] == 1:
187	count = 3
188	result['abbrev'] += copychars(author, pos, count)
189
190	return result
191
192
193	#
194	# data = title string
195	# @return the capitalized title (first letter is capitalized), rest are capitalized
196	# only if capitalized inside braces
197	#
198	def capitalizetitle(data):
199	title_list = capitalize_rex.split(data)
200	title = ''
201	count = 0
202	for phrase in title_list:
203	check = string.lstrip(phrase)
204
205	# keep phrase's capitalization the same
206	if check.find('{') == 0:
207	title += removebraces(phrase)
208	else:
209	# first word --> capitalize first letter (after spaces)
210	if count == 0:
211	title += check.capitalize()
212	else:
213	title += phrase.lower()
214	count = count + 1
215
216	return title
217
218
219	#
220	# @return the bibtex for the title
221	# @param data --> title string
222	# braces are removed from title
223	#
224	def bibtextitle(data, entrytype):
225	if entrytype in ('book', 'inbook'):
226	title = removebraces(data.strip())
227	else:
228	title = removebraces(capitalizetitle(data.strip()))
229	bibtex = title
230	return bibtex
231
232
233	#
234	# function to compare entry lists
235	#
236	def entry_cmp(x, y):
237	return cmp(x[0], y[0])
238
239
240	#
241	# print the XML for the transformed "filecont_source"
242	#
243	def bibtexdecoder(filecont_source):
244	filecont = []
245	file = []
246
247	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
248	pubtype_rex = re.compile('@(\w)\s{\s(.),')
249	endtype_rex = re.compile('}\s*$')
250	endtag_rex = re.compile('^\s}\s$')
251
252	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
253	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
254
255	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
256	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
257
258	for line in filecont_source:
259	line = line[:-1]
260
261	# encode character entities
262	line = string.replace(line, '&', '&')
263	line = string.replace(line, '<', '<')
264	line = string.replace(line, '>', '>')
265
266	# start entry: publication type (store for later use)
267	if pubtype_rex.match(line):
268	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
269	entrycont = {}
270	entry = []
271	entrytype = pubtype_rex.sub('\g<1>',line)
272	entrytype = string.lower(entrytype)
273	# entryid = pubtype_rex.sub('\g<2>', line)
274
275	# end entry if just a }
276	elif endtype_rex.match(line):
277	# generate doxygen code for the entry
278
279	# enty type related formattings
280	if entrytype in ('book', 'inbook'):
281	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
282	if not entrycont.has_key('author'):
283	entrycont['author'] = entrycont['editor']
284	entrycont['author']['text'] += ', editors'
285	elif entrytype == 'article':
286	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
287	elif entrytype in ('inproceedings', 'incollection', 'conference'):
288	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
289	elif entrytype == 'techreport':
290	if not entrycont.has_key('type'):
291	entrycont['type'] = 'Technical report'
292	elif entrytype == 'mastersthesis':
293	entrycont['type'] = 'Master\'s thesis'
294	elif entrytype == 'phdthesis':
295	entrycont['type'] = 'PhD thesis'
296
297	for eline in entrycont:
298	if eline != '':
299	eline = latexreplacements(eline)
300
301	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
302	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
303
304	if entrycont.has_key('author') and (entrycont['author'] != ''):
305	entry.append(entrycont['author']['text'] + '.')
306	if entrycont.has_key('title') and (entrycont['title'] != ''):
307	entry.append(entrycont['title'] + '.')
308	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
309	entry.append(entrycont['journal'] + ',')
310	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
311	entry.append('In ' + entrycont['booktitle'] + ',')
312	if entrycont.has_key('type') and (entrycont['type'] != ''):
313	eline = entrycont['type']
314	if entrycont.has_key('number') and (entrycont['number'] != ''):
315	eline += ' ' + entrycont['number']
316	eline += ','
317	entry.append(eline)
318	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
319	entry.append(entrycont['institution'] + ',')
320	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
321	entry.append(entrycont['publisher'] + ',')
322	if entrycont.has_key('school') and (entrycont['school'] != ''):
323	entry.append(entrycont['school'] + ',')
324	if entrycont.has_key('address') and (entrycont['address'] != ''):
325	entry.append(entrycont['address'] + ',')
326	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
327	entry.append(entrycont['edition'] + ' edition,')
328	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
329	entry.append(entrycont['howpublished'] + ',')
330	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
331	eline = entrycont['volume'];
332	if entrycont.has_key('number') and (entrycont['number'] != ''):
333	eline += '(' + entrycont['number'] + ')'
334	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
335	eline += ':' + entrycont['pages']
336	eline += ','
337	entry.append(eline)
338	else:
339	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
340	entry.append('pages ' + entrycont['pages'] + ',')
341	if entrycont.has_key('year') and (entrycont['year'] != ''):
342	if entrycont.has_key('month') and (entrycont['month'] != ''):
343	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
344	else:
345	entry.append(entrycont['year'] + '.')
346	if entrycont.has_key('note') and (entrycont['note'] != ''):
347	entry.append(entrycont['note'] + '.')
348
349	# generate keys for sorting and for the output
350	sortkey = ''
351	bibkey = ''
352	if entrycont.has_key('author'):
353	for author in entrycont['author']['list']:
354	sortkey += copychars(author, author.rfind(' ')+1, len(author))
355	bibkey = entrycont['author']['abbrev']
356	else:
357	bibkey = 'x'
358	if entrycont.has_key('year'):
359	sortkey += entrycont['year']
360	bibkey += entrycont['year'][-2:]
361	if entrycont.has_key('title'):
362	sortkey += entrycont['title']
363	if entrycont.has_key('key'):
364	sortkey = entrycont['key'] + sortkey
365	bibkey = entrycont['key']
366	entry.insert(0, sortkey)
367	entry.insert(1, bibkey)
368
369	# add the entry to the file contents
370	filecont.append(entry)
371
372	else:
373	# field, publication info
374	field = ''
375	data = ''
376
377	# field = {data} entries
378	if bracedata_rex.match(line):
379	field = bracefield_rex.sub('\g<1>', line)
380	field = string.lower(field)
381	data = bracedata_rex.sub('\g<2>', line)
382
383	# field = "data" entries
384	elif quotedata_rex.match(line):
385	field = quotefield_rex.sub('\g<1>', line)
386	field = string.lower(field)
387	data = quotedata_rex.sub('\g<2>', line)
388
389	# field = data entries
390	elif data_rex.match(line):
391	field = field_rex.sub('\g<1>', line)
392	field = string.lower(field)
393	data = data_rex.sub('\g<2>', line)
394
395	if field in ('author', 'editor'):
396	entrycont[field] = bibtexauthor(data)
397	line = ''
398	elif field == 'title':
399	line = bibtextitle(data, entrytype)
400	elif field != '':
401	line = removebraces(transformurls(data.strip()))
402
403	if line != '':
404	line = latexreplacements(line)
405	entrycont[field] = line
406
407
408	# sort entries
409	filecont.sort(entry_cmp)
410
411	# count the bibtex keys
412	keytable = {}
413	counttable = {}
414	for entry in filecont:
415	bibkey = entry[1]
416	if not keytable.has_key(bibkey):
417	keytable[bibkey] = 1
418	else:
419	keytable[bibkey] += 1
420
421	for bibkey in keytable.keys():
422	counttable[bibkey] = 0
423
424	# generate output
425	for entry in filecont:
426	# generate output key form the bibtex key
427	bibkey = entry[1]
428	if keytable[bibkey] == 1:
429	outkey = bibkey
430	else:
431	outkey = bibkey + chr(97 + counttable[bibkey])
432	counttable[bibkey] += 1
433
434	# append the entry code to the output
435	file.append('<tr valign="top">\n' + \
436	'<td>[' + outkey + ']</td>')
437	file.append('<td>')
438	file.append('\\anchor ' + outkey)
439	for line in entry[2:]:
440	file.append(line)
441	file.append('</td>\n</tr>')
442	file.append('')
443
444	return file
445
446
447	#
448	# return 1 iff abbr is in line but not inside braces or quotes
449	# assumes that abbr appears only once on the line (out of braces and quotes)
450	#
451	def verify_out_of_braces(line, abbr):
452
453	phrase_split = delimiter_rex.split(line)
454
455	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
456
457	open_brace = 0
458	open_quote = 0
459
460	for phrase in phrase_split:
461	if phrase == "{":
462	open_brace = open_brace + 1
463	elif phrase == "}":
464	open_brace = open_brace - 1
465	elif phrase == '"':
466	if open_quote == 1:
467	open_quote = 0
468	else:
469	open_quote = 1
470	elif abbr_rex.search(phrase):
471	if open_brace == 0 and open_quote == 0:
472	return 1
473
474	return 0
475
476
477	#
478	# a line in the form phrase1 # phrase2 # ... # phrasen
479	# is returned as phrase1 phrase2 ... phrasen
480	# with the correct punctuation
481	# Bug: Doesn't always work with multiple abbreviations plugged in
482	#
483	def concat_line(line):
484	# only look at part after equals
485	field = field_rex.sub('\g<1>',line)
486	rest = field_rex.sub('\g<2>',line)
487
488	concat_line = field + ' ='
489
490	pound_split = concatsplit_rex.split(rest)
491
492	phrase_count = 0
493	length = len(pound_split)
494
495	for phrase in pound_split:
496	phrase = phrase.strip()
497	if phrase_count != 0:
498	if phrase.startswith('"') or phrase.startswith('{'):
499	phrase = phrase[1:]
500	elif phrase.startswith('"'):
501	phrase = phrase.replace('"','{',1)
502
503	if phrase_count != length-1:
504	if phrase.endswith('"') or phrase.endswith('}'):
505	phrase = phrase[:-1]
506	else:
507	if phrase.endswith('"'):
508	phrase = phrase[:-1]
509	phrase = phrase + "}"
510	elif phrase.endswith('",'):
511	phrase = phrase[:-2]
512	phrase = phrase + "},"
513
514	# if phrase did have \#, add the \# back
515	if phrase.endswith('\\'):
516	phrase = phrase + "#"
517	concat_line = concat_line + ' ' + phrase
518
519	phrase_count = phrase_count + 1
520
521	return concat_line
522
523
524	#
525	# substitute abbreviations into filecont
526	# @param filecont_source - string of data from file
527	#
528	def bibtex_replace_abbreviations(filecont_source):
529	filecont = filecont_source.splitlines()
530
531	# These are defined in bibtex, so we'll define them too
532	abbr_list = ['jan','feb','mar','apr','may','jun',
533	'jul','aug','sep','oct','nov','dec']
534	value_list = ['January','February','March','April',
535	'May','June','July','August','September',
536	'October','November','December']
537
538	abbr_rex = []
539	total_abbr_count = 0
540
541	front = '\\b'
542	back = '(,?)\\b'
543
544	for x in abbr_list:
545	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
546	total_abbr_count = total_abbr_count + 1
547
548
549	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
550	re.I)
551
552	comment_rex = re.compile('@comment\s*{',re.I)
553	preamble_rex = re.compile('@preamble\s*{',re.I)
554
555	waiting_for_end_string = 0
556	i = 0
557	filecont2 = ''
558
559	for line in filecont:
560	if line == ' ' or line == '':
561	continue
562
563	if waiting_for_end_string:
564	if re.search('}',line):
565	waiting_for_end_string = 0
566	continue
567
568	if abbrdef_rex.search(line):
569	abbr = abbrdef_rex.sub('\g<1>', line)
570
571	if abbr_list.count(abbr) == 0:
572	val = abbrdef_rex.sub('\g<2>', line)
573	abbr_list.append(abbr)
574	value_list.append(string.strip(val))
575	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
576	total_abbr_count = total_abbr_count + 1
577	waiting_for_end_string = 1
578	continue
579
580	if comment_rex.search(line):
581	waiting_for_end_string = 1
582	continue
583
584	if preamble_rex.search(line):
585	waiting_for_end_string = 1
586	continue
587
588
589	# replace subsequent abbreviations with the value
590	abbr_count = 0
591
592	for x in abbr_list:
593
594	if abbr_rex[abbr_count].search(line):
595	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
596	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
597	# Check for # concatenations
598	if concatsplit_rex.search(line):
599	line = concat_line(line)
600	abbr_count = abbr_count + 1
601
602
603	filecont2 = filecont2 + line + '\n'
604	i = i+1
605
606
607	# Do one final pass over file
608
609	# make sure that didn't end up with {" or }" after the substitution
610	filecont2 = filecont2.replace('{"','{{')
611	filecont2 = filecont2.replace('"}','}}')
612
613	afterquotevalue_rex = re.compile('"\s,\s')
614	afterbrace_rex = re.compile('"\s*}')
615	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
616
617	# add new lines to data that changed because of abbreviation substitutions
618	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
619	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
620	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
621
622	return filecont2
623
624	#
625	# convert @type( ... ) to @type{ ... }
626	#
627	def no_outer_parens(filecont):
628
629	# do checking for open parens
630	# will convert to braces
631	paren_split = re.split('([(){}])',filecont)
632
633	open_paren_count = 0
634	open_type = 0
635	look_next = 0
636
637	# rebuild filecont
638	filecont = ''
639
640	at_rex = re.compile('@\w*')
641
642	for phrase in paren_split:
643	if look_next == 1:
644	if phrase == '(':
645	phrase = '{'
646	open_paren_count = open_paren_count + 1
647	else:
648	open_type = 0
649	look_next = 0
650
651	if phrase == '(':
652	open_paren_count = open_paren_count + 1
653
654	elif phrase == ')':
655	open_paren_count = open_paren_count - 1
656	if open_type == 1 and open_paren_count == 0:
657	phrase = '}'
658	open_type = 0
659
660	elif at_rex.search( phrase ):
661	open_type = 1
662	look_next = 1
663
664	filecont = filecont + phrase
665
666	return filecont
667
668
669	#
670	# make all whitespace into just one space
671	# format the bibtex file into a usable form.
672	#
673	def bibtexwasher(filecont_source):
674
675	space_rex = re.compile('\s+')
676	comment_rex = re.compile('\s*%')
677
678	filecont = []
679
680	# remove trailing and excessive whitespace
681	# ignore comments
682	for line in filecont_source:
683	line = string.strip(line)
684	line = space_rex.sub(' ', line)
685	# ignore comments
686	if not comment_rex.match(line) and line != '':
687	filecont.append(' '+ line)
688
689	filecont = string.join(filecont, '')
690
691	# the file is in one long string
692
693	filecont = no_outer_parens(filecont)
694
695	#
696	# split lines according to preferred syntax scheme
697	#
698	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
699
700	# add new lines after commas that are after values
701	filecont = re.sub('"\s*,', '",\n', filecont)
702	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
703	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
704	'\n\n\g<1>\g<2>,\n', filecont)
705
706	# add new lines after }
707	filecont = re.sub('"\s*}','"\n}\n', filecont)
708	filecont = re.sub('}\s*,','},\n', filecont)
709
710
711	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
712
713	# character encoding, reserved latex characters
714	filecont = re.sub('{\\\&}', '&', filecont)
715	filecont = re.sub('\\\&', '&', filecont)
716
717	# do checking for open braces to get format correct
718	open_brace_count = 0
719	brace_split = re.split('([{}])',filecont)
720
721	# rebuild filecont
722	filecont = ''
723
724	for phrase in brace_split:
725	if phrase == '{':
726	open_brace_count = open_brace_count + 1
727	elif phrase == '}':
728	open_brace_count = open_brace_count - 1
729	if open_brace_count == 0:
730	filecont = filecont + '\n'
731
732	filecont = filecont + phrase
733
734	filecont2 = bibtex_replace_abbreviations(filecont)
735
736	# gather
737	filecont = filecont2.splitlines()
738	i=0
739	j=0 # count the number of blank lines
740	for line in filecont:
741	# ignore blank lines
742	if line == '' or line == ' ':
743	j = j+1
744	continue
745	filecont[i] = line + '\n'
746	i = i+1
747
748	# get rid of the extra stuff at the end of the array
749	# (The extra stuff are duplicates that are in the array because
750	# blank lines were removed.)
751	length = len( filecont)
752	filecont[length-j:length] = []
753
754	return filecont
755
756
757	def filehandler(filepath):
758	try:
759	fd = open(filepath, 'r')
760	filecont_source = fd.readlines()
761	fd.close()
762	except:
763	print 'Could not open file:', filepath
764	washeddata = bibtexwasher(filecont_source)
765	outdata = bibtexdecoder(washeddata)
766	print '/**'
767	print '\page references References'
768	print
769	print '<table border="0" cellspacing="5px" width="100%">'
770	print
771	for line in outdata:
772	print line
773	print '</table>'
774	print
775	print '*/'
776
777
778	# main program
779
780	def main():
781	import sys
782	if sys.argv[1:]:
783	filepath = sys.argv[1]
784	else:
785	print "No input file"
786	sys.exit()
787	filehandler(filepath)
788
789	if __name__ == "__main__": main()
790
791
792	# end python script

Download in other formats:

Original Format