Context Navigation

Back to Ticket #184

Ticket #184: bibtex2dox.py

File bibtex2dox.py, 24.9 KB (added by Peter Kovacs, 17 years ago)

Line
1	#!/usr/bin/env /usr/local/Python/bin/python2.1
2	"""
3	Decoder for bibliographic data, BibTeX
4	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
5
6	v.8
7	(c)2002-06-23 Vidar Bronken Gundersen
8	http://bibtexml.sf.net/
9	Reuse approved as long as this notification is kept.
10	Licence: GPL.
11
12	Contributions/thanks to:
13	Egon Willighagen, http://sf.net/projects/jreferences/
14	Richard Mahoney (for providing a test case)
15
16	Editted by Sara Sprenkle to be more robust and handle more bibtex features. (c) 2003-01-15
17	1. Changed bibtex: tags to bibxml: tags.
18	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
19	3. Allow spaces between @type and first {
20	4. "author" fields with multiple authors split by " and "
21	are put in separate xml "bibxml:author" tags.
22	5. Option for Titles: words are capitalized
23	only if first letter in title or capitalized inside braces
24	6. Removes braces from within field values
25	7. Ignores comments in bibtex file (including @comment{ or % )
26	8. Replaces some special latex tags, e.g., replaces ~ with ' '
27	9. Handles bibtex @string abbreviations
28	--> includes bibtex's default abbreviations for months
29	--> does concatenation of abbr # " more " and " more " # abbr
30	10. Handles @type( ... ) or @type{ ... }
31	11. The keywords field is split on , or ; and put into separate xml
32	"bibxml:keywords" tags
33	12. Ignores @preamble
34
35	Known Limitations
36	1. Does not transform Latex encoding like math mode and special latex symbols.
37	2. Does not parse author fields into first and last names.
38	E.g., It does not do anything special to an author whose name is in the form LAST_NAME, FIRST_NAME
39	In "author" tag, will show up as <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
40	3. Does not handle "crossref" fields other than to print <bibxml:crossref>...</bibxml:crossref>
41	4. Does not inform user of the input's format errors. You just won't be able to
42	transform the file later with XSL
43
44	You will have to manually edit the XML output if you need to handle
45	these (and unknown) limitations.
46
47	"""
48
49	import string, re
50
51	# set of valid name characters
52	valid_name_chars = '[\w\-:]'
53
54	#
55	# define global regular expression variables
56	#
57	author_rex = re.compile('\s+and\s+')
58	rembraces_rex = re.compile('[{}]')
59	capitalize_rex = re.compile('({\w*})')
60
61	# used by bibtexkeywords(data)
62	keywords_rex = re.compile('[,;]')
63
64	# used by concat_line(line)
65	concatsplit_rex = re.compile('\s#\s')
66
67	# split on {, }, or " in verify_out_of_braces
68	delimiter_rex = re.compile('([{}"])',re.I)
69
70	field_rex = re.compile('\s(\w)\s=\s(.*)')
71	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
72
73	url_rex = re.compile('\\\url\{([^}]*)\}')
74
75
76	#
77	# return the string parameter without braces
78	#
79	def transformurls(str):
80	return url_rex.sub(r'<a href="\1">\1</a>', str)
81
82	#
83	# return the string parameter without braces
84	#
85	def removebraces(str):
86	return rembraces_rex.sub('', str)
87
88	#
89	# latex-specific replacements
90	# (do this after braces were removed)
91	#
92	def latexreplacements(line):
93	line = string.replace(line, '~', ' ')
94	line = string.replace(line, '\\\'a', 'á')
95	line = string.replace(line, '\\"a', 'ä')
96	line = string.replace(line, '\\\'e', 'é')
97	line = string.replace(line, '\\"e', 'ë')
98	line = string.replace(line, '\\\'i', 'í')
99	line = string.replace(line, '\\"i', 'ï')
100	line = string.replace(line, '\\\'o', 'ó')
101	line = string.replace(line, '\\"o', 'ö')
102	line = string.replace(line, '\\\'u', 'ú')
103	line = string.replace(line, '\\"u', 'ü')
104	line = string.replace(line, '\\H o', 'õ')
105	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
106	line = string.replace(line, '\\\'A', 'Á')
107	line = string.replace(line, '\\"A', 'Ä')
108	line = string.replace(line, '\\\'E', 'É')
109	line = string.replace(line, '\\"E', 'Ë')
110	line = string.replace(line, '\\\'I', 'Í')
111	line = string.replace(line, '\\"I', 'Ï')
112	line = string.replace(line, '\\\'O', 'Ó')
113	line = string.replace(line, '\\"O', 'Ö')
114	line = string.replace(line, '\\\'U', 'Ú')
115	line = string.replace(line, '\\"U', 'Ü')
116	line = string.replace(line, '\\H O', 'Õ')
117	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
118
119	return line
120
121	#
122	# copy characters form a string decoding html expressions (&xyz;)
123	#
124	def copychars(str, ifrom, count):
125	result = ''
126	i = ifrom
127	c = 0
128	html_spec = False
129	while (i < len(str)) and (c < count):
130	if str[i] == '&':
131	html_spec = True;
132	if i+1 < len(str):
133	result += str[i+1]
134	c += 1
135	i += 2
136	else:
137	if not html_spec:
138	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
139	((str[i] >= 'a') and (str[i] <= 'z')):
140	result += str[i]
141	c += 1
142	elif str[i] == ';':
143	html_spec = False;
144	i += 1
145
146	return result
147
148
149	#
150	# Handle a list of authors (separated by 'and').
151	# It gives back an array of the follwing values:
152	# - num: the number of authors,
153	# - list: the list of the author names,
154	# - text: the bibtex text (separated by commas and/or 'and')
155	# - abbrev: abbreviation that can be used for indicate the
156	# bibliography entries
157	#
158	def bibtexauthor(data):
159	result = {}
160	bibtex = ''
161	result['list'] = author_rex.split(data)
162	result['num'] = len(result['list'])
163	for i, author in enumerate(result['list']):
164	# general transformations
165	author = latexreplacements(removebraces(author.strip()))
166	# transform "Xyz, A. B." to "A. B. Xyz"
167	pos = author.find(',')
168	if pos != -1:
169	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
170	result['list'][i] = author
171	bibtex += author + '#'
172	bibtex = bibtex[:-1]
173	if result['num'] > 1:
174	ix = bibtex.rfind('#')
175	if result['num'] == 2:
176	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
177	else:
178	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
179	bibtex = bibtex.replace('#', ', ')
180	result['text'] = bibtex
181
182	result['abbrev'] = ''
183	for author in result['list']:
184	pos = author.rfind(' ') + 1
185	count = 1
186	if result['num'] == 1:
187	count = 3
188	result['abbrev'] += copychars(author, pos, count)
189
190	return result
191
192
193	#
194	# data = title string
195	# @return the capitalized title (first letter is capitalized), rest are capitalized
196	# only if capitalized inside braces
197	#
198	def capitalizetitle(data):
199	title_list = capitalize_rex.split(data)
200	title = ''
201	count = 0
202	for phrase in title_list:
203	check = string.lstrip(phrase)
204
205	# keep phrase's capitalization the same
206	if check.find('{') == 0:
207	title += removebraces(phrase)
208	else:
209	# first word --> capitalize first letter (after spaces)
210	if count == 0:
211	title += check.capitalize()
212	else:
213	title += phrase.lower()
214	count = count + 1
215
216	return title
217
218
219	#
220	# @return the bibtex for the title
221	# @param data --> title string
222	# braces are removed from title
223	#
224	def bibtextitle(data, entrytype):
225	if entrytype in ('book', 'inbook'):
226	title = removebraces(data.strip())
227	else:
228	title = removebraces(capitalizetitle(data.strip()))
229	bibtex = title
230	return bibtex
231
232
233	#
234	# function to compare entry lists
235	#
236	def entry_cmp(x, y):
237	return cmp(x[0], y[0])
238
239
240	#
241	# print the XML for the transformed "filecont_source"
242	#
243	def bibtexdecoder(filecont_source):
244	filecont = []
245	file = []
246
247	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
248	pubtype_rex = re.compile('@(\w)\s{\s(.),')
249	endtype_rex = re.compile('}\s*$')
250	endtag_rex = re.compile('^\s}\s$')
251
252	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
253	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
254
255	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
256	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
257
258	for line in filecont_source:
259	line = line[:-1]
260
261	# encode character entities
262	line = string.replace(line, '&', '&')
263	line = string.replace(line, '<', '<')
264	line = string.replace(line, '>', '>')
265
266	# start entry: publication type (store for later use)
267	if pubtype_rex.match(line):
268	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
269	entrycont = {}
270	entry = []
271	entrytype = pubtype_rex.sub('\g<1>',line)
272	entrytype = string.lower(entrytype)
273	# entryid = pubtype_rex.sub('\g<2>', line)
274
275	# end entry if just a }
276	elif endtype_rex.match(line):
277	# generate doxygen code for the entry
278
279	# enty type related formattings
280	if entrytype in ('book', 'inbook'):
281	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
282	if not entrycont.has_key('author'):
283	entrycont['author'] = entrycont['editor']
284	entrycont['author']['text'] += ', editors'
285	elif entrytype == 'article':
286	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
287	elif entrytype in ('inproceedings', 'incollection', 'conference'):
288	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
289	elif entrytype == 'techreport':
290	if not entrycont.has_key('type'):
291	entrycont['type'] = 'Technical report'
292	elif entrytype == 'mastersthesis':
293	entrycont['type'] = 'Master\'s thesis'
294	elif entrytype == 'phdthesis':
295	entrycont['type'] = 'PhD thesis'
296
297	for eline in entrycont:
298	if eline != '':
299	eline = latexreplacements(eline)
300
301	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
302	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
303
304	if entrycont.has_key('author') and (entrycont['author'] != ''):
305	entry.append(entrycont['author']['text'] + '.')
306	if entrycont.has_key('title') and (entrycont['title'] != ''):
307	entry.append(entrycont['title'] + '.')
308	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
309	entry.append(entrycont['journal'] + ',')
310	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
311	entry.append('In ' + entrycont['booktitle'] + ',')
312	if entrycont.has_key('type') and (entrycont['type'] != ''):
313	eline = entrycont['type']
314	if entrycont.has_key('number') and (entrycont['number'] != ''):
315	eline += ' ' + entrycont['number']
316	eline += ','
317	entry.append(eline)
318	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
319	entry.append(entrycont['institution'] + ',')
320	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
321	entry.append(entrycont['publisher'] + ',')
322	if entrycont.has_key('school') and (entrycont['school'] != ''):
323	entry.append(entrycont['school'] + ',')
324	if entrycont.has_key('address') and (entrycont['address'] != ''):
325	entry.append(entrycont['address'] + ',')
326	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
327	entry.append(entrycont['edition'] + ' edition,')
328	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
329	entry.append(entrycont['howpublished'] + ',')
330	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
331	eline = entrycont['volume'];
332	if entrycont.has_key('number') and (entrycont['number'] != ''):
333	eline += '(' + entrycont['number'] + ')'
334	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
335	eline += ':' + entrycont['pages']
336	eline += ','
337	entry.append(eline)
338	else:
339	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
340	entry.append('pages ' + entrycont['pages'] + ',')
341	if entrycont.has_key('year') and (entrycont['year'] != ''):
342	if entrycont.has_key('month') and (entrycont['month'] != ''):
343	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
344	else:
345	entry.append(entrycont['year'] + '.')
346	if entrycont.has_key('note') and (entrycont['note'] != ''):
347	entry.append(entrycont['note'] + '.')
348
349	# generate keys for sorting and for the output
350	sortkey = ''
351	bibkey = ''
352	if entrycont.has_key('author'):
353	for author in entrycont['author']['list']:
354	sortkey += copychars(author, author.rfind(' ')+1, len(author))
355	bibkey = entrycont['author']['abbrev']
356	else:
357	bibkey = 'x'
358	if entrycont.has_key('year'):
359	sortkey += entrycont['year']
360	bibkey += entrycont['year'][-2:]
361	if entrycont.has_key('title'):
362	sortkey += entrycont['title']
363	if entrycont.has_key('key'):
364	sortkey = entrycont['key'] + sortkey
365	bibkey = entrycont['key']
366	entry.insert(0, sortkey)
367	entry.insert(1, bibkey)
368
369	# add the entry to the file contents
370	filecont.append(entry)
371
372	else:
373	# field, publication info
374	field = ''
375	data = ''
376
377	# field = {data} entries
378	if bracedata_rex.match(line):
379	field = bracefield_rex.sub('\g<1>', line)
380	field = string.lower(field)
381	data = bracedata_rex.sub('\g<2>', line)
382
383	# field = "data" entries
384	elif quotedata_rex.match(line):
385	field = quotefield_rex.sub('\g<1>', line)
386	field = string.lower(field)
387	data = quotedata_rex.sub('\g<2>', line)
388
389	# field = data entries
390	elif data_rex.match(line):
391	field = field_rex.sub('\g<1>', line)
392	field = string.lower(field)
393	data = data_rex.sub('\g<2>', line)
394
395	if field in ('author', 'editor'):
396	entrycont[field] = bibtexauthor(data)
397	line = ''
398	elif field == 'title':
399	line = bibtextitle(data, entrytype)
400	elif field != '':
401	line = removebraces(transformurls(data.strip()))
402
403	if line != '':
404	line = latexreplacements(line)
405	entrycont[field] = line
406
407
408	# sort entries
409	filecont.sort(entry_cmp)
410
411	# count the bibtex keys
412	keytable = {}
413	counttable = {}
414	for entry in filecont:
415	bibkey = entry[1]
416	if not keytable.has_key(bibkey):
417	keytable[bibkey] = 1
418	else:
419	keytable[bibkey] += 1
420
421	for bibkey in keytable.keys():
422	counttable[bibkey] = 0
423
424	# generate output
425	for entry in filecont:
426	# generate output key form the bibtex key
427	bibkey = entry[1]
428	if keytable[bibkey] == 1:
429	outkey = bibkey
430	else:
431	outkey = bibkey + chr(97 + counttable[bibkey])
432	counttable[bibkey] += 1
433
434	# append the entry code to the output
435	file.append('\\anchor ' + outkey + '\n' + \
436	'<b>[' + outkey + ']</b>')
437	for line in entry[2:]:
438	file.append(line)
439	file.append('')
440
441	return file
442
443
444	#
445	# return 1 iff abbr is in line but not inside braces or quotes
446	# assumes that abbr appears only once on the line (out of braces and quotes)
447	#
448	def verify_out_of_braces(line, abbr):
449
450	phrase_split = delimiter_rex.split(line)
451
452	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
453
454	open_brace = 0
455	open_quote = 0
456
457	for phrase in phrase_split:
458	if phrase == "{":
459	open_brace = open_brace + 1
460	elif phrase == "}":
461	open_brace = open_brace - 1
462	elif phrase == '"':
463	if open_quote == 1:
464	open_quote = 0
465	else:
466	open_quote = 1
467	elif abbr_rex.search(phrase):
468	if open_brace == 0 and open_quote == 0:
469	return 1
470
471	return 0
472
473
474	#
475	# a line in the form phrase1 # phrase2 # ... # phrasen
476	# is returned as phrase1 phrase2 ... phrasen
477	# with the correct punctuation
478	# Bug: Doesn't always work with multiple abbreviations plugged in
479	#
480	def concat_line(line):
481	# only look at part after equals
482	field = field_rex.sub('\g<1>',line)
483	rest = field_rex.sub('\g<2>',line)
484
485	concat_line = field + ' ='
486
487	pound_split = concatsplit_rex.split(rest)
488
489	phrase_count = 0
490	length = len(pound_split)
491
492	for phrase in pound_split:
493	phrase = phrase.strip()
494	if phrase_count != 0:
495	if phrase.startswith('"') or phrase.startswith('{'):
496	phrase = phrase[1:]
497	elif phrase.startswith('"'):
498	phrase = phrase.replace('"','{',1)
499
500	if phrase_count != length-1:
501	if phrase.endswith('"') or phrase.endswith('}'):
502	phrase = phrase[:-1]
503	else:
504	if phrase.endswith('"'):
505	phrase = phrase[:-1]
506	phrase = phrase + "}"
507	elif phrase.endswith('",'):
508	phrase = phrase[:-2]
509	phrase = phrase + "},"
510
511	# if phrase did have \#, add the \# back
512	if phrase.endswith('\\'):
513	phrase = phrase + "#"
514	concat_line = concat_line + ' ' + phrase
515
516	phrase_count = phrase_count + 1
517
518	return concat_line
519
520
521	#
522	# substitute abbreviations into filecont
523	# @param filecont_source - string of data from file
524	#
525	def bibtex_replace_abbreviations(filecont_source):
526	filecont = filecont_source.splitlines()
527
528	# These are defined in bibtex, so we'll define them too
529	abbr_list = ['jan','feb','mar','apr','may','jun',
530	'jul','aug','sep','oct','nov','dec']
531	value_list = ['January','February','March','April',
532	'May','June','July','August','September',
533	'October','November','December']
534
535	abbr_rex = []
536	total_abbr_count = 0
537
538	front = '\\b'
539	back = '(,?)\\b'
540
541	for x in abbr_list:
542	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
543	total_abbr_count = total_abbr_count + 1
544
545
546	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
547	re.I)
548
549	comment_rex = re.compile('@comment\s*{',re.I)
550	preamble_rex = re.compile('@preamble\s*{',re.I)
551
552	waiting_for_end_string = 0
553	i = 0
554	filecont2 = ''
555
556	for line in filecont:
557	if line == ' ' or line == '':
558	continue
559
560	if waiting_for_end_string:
561	if re.search('}',line):
562	waiting_for_end_string = 0
563	continue
564
565	if abbrdef_rex.search(line):
566	abbr = abbrdef_rex.sub('\g<1>', line)
567
568	if abbr_list.count(abbr) == 0:
569	val = abbrdef_rex.sub('\g<2>', line)
570	abbr_list.append(abbr)
571	value_list.append(string.strip(val))
572	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
573	total_abbr_count = total_abbr_count + 1
574	waiting_for_end_string = 1
575	continue
576
577	if comment_rex.search(line):
578	waiting_for_end_string = 1
579	continue
580
581	if preamble_rex.search(line):
582	waiting_for_end_string = 1
583	continue
584
585
586	# replace subsequent abbreviations with the value
587	abbr_count = 0
588
589	for x in abbr_list:
590
591	if abbr_rex[abbr_count].search(line):
592	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
593	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
594	# Check for # concatenations
595	if concatsplit_rex.search(line):
596	line = concat_line(line)
597	abbr_count = abbr_count + 1
598
599
600	filecont2 = filecont2 + line + '\n'
601	i = i+1
602
603
604	# Do one final pass over file
605
606	# make sure that didn't end up with {" or }" after the substitution
607	filecont2 = filecont2.replace('{"','{{')
608	filecont2 = filecont2.replace('"}','}}')
609
610	afterquotevalue_rex = re.compile('"\s,\s')
611	afterbrace_rex = re.compile('"\s*}')
612	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
613
614	# add new lines to data that changed because of abbreviation substitutions
615	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
616	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
617	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
618
619	return filecont2
620
621	#
622	# convert @type( ... ) to @type{ ... }
623	#
624	def no_outer_parens(filecont):
625
626	# do checking for open parens
627	# will convert to braces
628	paren_split = re.split('([(){}])',filecont)
629
630	open_paren_count = 0
631	open_type = 0
632	look_next = 0
633
634	# rebuild filecont
635	filecont = ''
636
637	at_rex = re.compile('@\w*')
638
639	for phrase in paren_split:
640	if look_next == 1:
641	if phrase == '(':
642	phrase = '{'
643	open_paren_count = open_paren_count + 1
644	else:
645	open_type = 0
646	look_next = 0
647
648	if phrase == '(':
649	open_paren_count = open_paren_count + 1
650
651	elif phrase == ')':
652	open_paren_count = open_paren_count - 1
653	if open_type == 1 and open_paren_count == 0:
654	phrase = '}'
655	open_type = 0
656
657	elif at_rex.search( phrase ):
658	open_type = 1
659	look_next = 1
660
661	filecont = filecont + phrase
662
663	return filecont
664
665
666	#
667	# make all whitespace into just one space
668	# format the bibtex file into a usable form.
669	#
670	def bibtexwasher(filecont_source):
671
672	space_rex = re.compile('\s+')
673	comment_rex = re.compile('\s*%')
674
675	filecont = []
676
677	# remove trailing and excessive whitespace
678	# ignore comments
679	for line in filecont_source:
680	line = string.strip(line)
681	line = space_rex.sub(' ', line)
682	# ignore comments
683	if not comment_rex.match(line) and line != '':
684	filecont.append(' '+ line)
685
686	filecont = string.join(filecont, '')
687
688	# the file is in one long string
689
690	filecont = no_outer_parens(filecont)
691
692	#
693	# split lines according to preferred syntax scheme
694	#
695	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
696
697	# add new lines after commas that are after values
698	filecont = re.sub('"\s*,', '",\n', filecont)
699	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
700	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
701	'\n\n\g<1>\g<2>,\n', filecont)
702
703	# add new lines after }
704	filecont = re.sub('"\s*}','"\n}\n', filecont)
705	filecont = re.sub('}\s*,','},\n', filecont)
706
707
708	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
709
710	# character encoding, reserved latex characters
711	filecont = re.sub('{\\\&}', '&', filecont)
712	filecont = re.sub('\\\&', '&', filecont)
713
714	# do checking for open braces to get format correct
715	open_brace_count = 0
716	brace_split = re.split('([{}])',filecont)
717
718	# rebuild filecont
719	filecont = ''
720
721	for phrase in brace_split:
722	if phrase == '{':
723	open_brace_count = open_brace_count + 1
724	elif phrase == '}':
725	open_brace_count = open_brace_count - 1
726	if open_brace_count == 0:
727	filecont = filecont + '\n'
728
729	filecont = filecont + phrase
730
731	filecont2 = bibtex_replace_abbreviations(filecont)
732
733	# gather
734	filecont = filecont2.splitlines()
735	i=0
736	j=0 # count the number of blank lines
737	for line in filecont:
738	# ignore blank lines
739	if line == '' or line == ' ':
740	j = j+1
741	continue
742	filecont[i] = line + '\n'
743	i = i+1
744
745	# get rid of the extra stuff at the end of the array
746	# (The extra stuff are duplicates that are in the array because
747	# blank lines were removed.)
748	length = len( filecont)
749	filecont[length-j:length] = []
750
751	return filecont
752
753
754	def filehandler(filepath):
755	try:
756	fd = open(filepath, 'r')
757	filecont_source = fd.readlines()
758	fd.close()
759	except:
760	print 'Could not open file:', filepath
761	washeddata = bibtexwasher(filecont_source)
762	outdata = bibtexdecoder(washeddata)
763	print '/**'
764	print '\page references References'
765	print
766	for line in outdata:
767	print line
768	print '*/'
769
770
771	# main program
772
773	def main():
774	import sys
775	if sys.argv[1:]:
776	filepath = sys.argv[1]
777	else:
778	print "No input file"
779	sys.exit()
780	filehandler(filepath)
781
782	if __name__ == "__main__": main()
783
784
785	# end python script

Download in other formats:

Original Format