COIN-OR::LEMON - Graph Library

Ticket #184: bibtex2dox-table.py

File bibtex2dox-table.py, 25.1 KB (added by Peter Kovacs, 15 years ago)
Line 
1#!/usr/bin/env /usr/local/Python/bin/python2.1
2"""
3  Decoder for bibliographic data, BibTeX
4  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
5
6  v.8
7  (c)2002-06-23 Vidar Bronken Gundersen
8  http://bibtexml.sf.net/
9  Reuse approved as long as this notification is kept.
10  Licence: GPL.
11
12  Contributions/thanks to:
13  Egon Willighagen, http://sf.net/projects/jreferences/
14  Richard Mahoney (for providing a test case)
15
16  Editted by Sara Sprenkle to be more robust and handle more bibtex features.  (c) 2003-01-15
17  1.  Changed bibtex: tags to bibxml: tags.
18  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
19  3.  Allow spaces between @type and first {
20  4.  "author" fields with multiple authors split by " and "
21      are put in separate xml "bibxml:author" tags.
22  5.  Option for Titles: words are capitalized
23      only if first letter in title or capitalized inside braces
24  6.  Removes braces from within field values
25  7.  Ignores comments in bibtex file (including @comment{ or % )
26  8.  Replaces some special latex tags, e.g., replaces ~ with ' '
27  9.  Handles bibtex @string abbreviations
28        --> includes bibtex's default abbreviations for months
29        --> does concatenation of abbr # " more " and " more " # abbr
30  10. Handles @type( ... ) or @type{ ... }
31  11. The keywords field is split on , or ; and put into separate xml
32      "bibxml:keywords" tags
33  12. Ignores @preamble
34
35  Known Limitations
36  1.  Does not transform Latex encoding like math mode and special latex symbols.
37  2.  Does not parse author fields into first and last names.
38      E.g., It does not do anything special to an author whose name is in the form LAST_NAME, FIRST_NAME
39      In "author" tag, will show up as <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
40  3.  Does not handle "crossref" fields other than to print <bibxml:crossref>...</bibxml:crossref>
41  4.  Does not inform user of the input's format errors.  You just won't be able to
42      transform the file later with XSL
43
44  You will have to manually edit the XML output if you need to handle
45  these (and unknown) limitations.
46
47"""
48
49import string, re
50
51# set of valid name characters
52valid_name_chars = '[\w\-:]'
53
54#
55# define global regular expression variables
56#
57author_rex = re.compile('\s+and\s+')
58rembraces_rex = re.compile('[{}]')
59capitalize_rex = re.compile('({\w*})')
60
61# used by bibtexkeywords(data)
62keywords_rex = re.compile('[,;]')
63
64# used by concat_line(line)
65concatsplit_rex = re.compile('\s*#\s*')
66
67# split on {, }, or " in verify_out_of_braces
68delimiter_rex = re.compile('([{}"])',re.I)
69
70field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
71data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
72
73url_rex = re.compile('\\\url\{([^}]*)\}')
74
75
76#
77# return the string parameter without braces
78#
79def transformurls(str):
80    return url_rex.sub(r'<a href="\1">\1</a>', str)
81
82#
83# return the string parameter without braces
84#
85def removebraces(str):
86    return rembraces_rex.sub('', str)
87
88#
89# latex-specific replacements
90# (do this after braces were removed)
91#
92def latexreplacements(line):
93    line = string.replace(line, '~', '&nbsp;')
94    line = string.replace(line, '\\\'a', '&aacute;')
95    line = string.replace(line, '\\"a', '&auml;')
96    line = string.replace(line, '\\\'e', '&eacute;')
97    line = string.replace(line, '\\"e', '&euml;')
98    line = string.replace(line, '\\\'i', '&iacute;')
99    line = string.replace(line, '\\"i', '&iuml;')
100    line = string.replace(line, '\\\'o', '&oacute;')
101    line = string.replace(line, '\\"o', '&ouml;')
102    line = string.replace(line, '\\\'u', '&uacute;')
103    line = string.replace(line, '\\"u', '&uuml;')
104    line = string.replace(line, '\\H o', '&otilde;')
105    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
106    line = string.replace(line, '\\\'A', '&Aacute;')
107    line = string.replace(line, '\\"A', '&Auml;')
108    line = string.replace(line, '\\\'E', '&Eacute;')
109    line = string.replace(line, '\\"E', '&Euml;')
110    line = string.replace(line, '\\\'I', '&Iacute;')
111    line = string.replace(line, '\\"I', '&Iuml;')
112    line = string.replace(line, '\\\'O', '&Oacute;')
113    line = string.replace(line, '\\"O', '&Ouml;')
114    line = string.replace(line, '\\\'U', '&Uacute;')
115    line = string.replace(line, '\\"U', '&Uuml;')
116    line = string.replace(line, '\\H O', '&Otilde;')
117    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
118
119    return line
120
121#
122# copy characters form a string decoding html expressions (&xyz;)
123#
124def copychars(str, ifrom, count):
125    result = ''
126    i = ifrom
127    c = 0
128    html_spec = False
129    while (i < len(str)) and (c < count):
130        if str[i] == '&':
131            html_spec = True;
132            if i+1 < len(str):
133                result += str[i+1]
134            c += 1
135            i += 2
136        else:
137            if not html_spec:
138                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
139                   ((str[i] >= 'a') and (str[i] <= 'z')):
140                    result += str[i]
141                    c += 1
142            elif str[i] == ';':
143                html_spec = False;
144            i += 1
145   
146    return result
147
148
149#
150# Handle a list of authors (separated by 'and').
151# It gives back an array of the follwing values:
152#  - num: the number of authors,
153#  - list: the list of the author names,
154#  - text: the bibtex text (separated by commas and/or 'and')
155#  - abbrev: abbreviation that can be used for indicate the
156#    bibliography entries
157#
158def bibtexauthor(data):
159    result = {}
160    bibtex = ''
161    result['list'] = author_rex.split(data)
162    result['num'] = len(result['list'])
163    for i, author in enumerate(result['list']):
164        # general transformations
165        author = latexreplacements(removebraces(author.strip()))
166        # transform "Xyz, A. B." to "A. B. Xyz"
167        pos = author.find(',')
168        if pos != -1:
169            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
170        result['list'][i] = author
171        bibtex += author + '#'
172    bibtex = bibtex[:-1]
173    if result['num'] > 1:
174        ix = bibtex.rfind('#')
175        if result['num'] == 2:
176            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
177        else:
178            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
179    bibtex = bibtex.replace('#', ', ')
180    result['text'] = bibtex
181   
182    result['abbrev'] = ''
183    for author in result['list']:
184        pos = author.rfind(' ') + 1
185        count = 1
186        if result['num'] == 1:
187            count = 3
188        result['abbrev'] += copychars(author, pos, count)
189
190    return result
191
192
193#
194# data = title string
195# @return the capitalized title (first letter is capitalized), rest are capitalized
196# only if capitalized inside braces
197#
198def capitalizetitle(data):
199    title_list = capitalize_rex.split(data)
200    title = ''
201    count = 0
202    for phrase in title_list:
203         check = string.lstrip(phrase)
204
205         # keep phrase's capitalization the same
206         if check.find('{') == 0:
207              title += removebraces(phrase)
208         else:
209         # first word --> capitalize first letter (after spaces)
210              if count == 0:
211                  title += check.capitalize()
212              else:
213                  title += phrase.lower()
214         count = count + 1
215
216    return title
217
218
219#
220# @return the bibtex for the title
221# @param data --> title string
222# braces are removed from title
223#
224def bibtextitle(data, entrytype):
225    if entrytype in ('book', 'inbook'):
226        title = removebraces(data.strip())
227    else:
228        title = removebraces(capitalizetitle(data.strip()))
229    bibtex = title
230    return bibtex
231
232
233#
234# function to compare entry lists
235#
236def entry_cmp(x, y):
237    return cmp(x[0], y[0])
238
239
240#
241# print the XML for the transformed "filecont_source"
242#
243def bibtexdecoder(filecont_source):
244    filecont = []
245    file = []
246   
247    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
248    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
249    endtype_rex = re.compile('}\s*$')
250    endtag_rex = re.compile('^\s*}\s*$')
251
252    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
253    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
254
255    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
256    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
257
258    for line in filecont_source:
259        line = line[:-1]
260
261        # encode character entities
262        line = string.replace(line, '&', '&amp;')
263        line = string.replace(line, '<', '&lt;')
264        line = string.replace(line, '>', '&gt;')
265
266        # start entry: publication type (store for later use)
267        if pubtype_rex.match(line):
268        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
269            entrycont = {}
270            entry = []
271            entrytype = pubtype_rex.sub('\g<1>',line)
272            entrytype = string.lower(entrytype)
273            # entryid   = pubtype_rex.sub('\g<2>', line)
274
275        # end entry if just a }
276        elif endtype_rex.match(line):
277            # generate doxygen code for the entry
278
279            # enty type related formattings
280            if entrytype in ('book', 'inbook'):
281                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
282                if not entrycont.has_key('author'):
283                    entrycont['author'] = entrycont['editor']
284                    entrycont['author']['text'] += ', editors'
285            elif entrytype == 'article':
286                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
287            elif entrytype in ('inproceedings', 'incollection', 'conference'):
288                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
289            elif entrytype == 'techreport':
290                if not entrycont.has_key('type'):
291                    entrycont['type'] = 'Technical report'
292            elif entrytype == 'mastersthesis':
293                entrycont['type'] = 'Master\'s thesis'
294            elif entrytype == 'phdthesis':
295                entrycont['type'] = 'PhD thesis'
296
297            for eline in entrycont:
298                if eline != '':
299                    eline = latexreplacements(eline)
300
301            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
302                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
303
304            if entrycont.has_key('author') and (entrycont['author'] != ''):
305                entry.append(entrycont['author']['text'] + '.')
306            if entrycont.has_key('title') and (entrycont['title'] != ''):
307                entry.append(entrycont['title'] + '.')
308            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
309                entry.append(entrycont['journal'] + ',')
310            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
311                entry.append('In ' + entrycont['booktitle'] + ',')
312            if entrycont.has_key('type') and (entrycont['type'] != ''):
313                eline = entrycont['type']
314                if entrycont.has_key('number') and (entrycont['number'] != ''):
315                    eline += ' ' + entrycont['number']
316                eline += ','
317                entry.append(eline)
318            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
319                entry.append(entrycont['institution'] + ',')
320            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
321                entry.append(entrycont['publisher'] + ',')
322            if entrycont.has_key('school') and (entrycont['school'] != ''):
323                entry.append(entrycont['school'] + ',')
324            if entrycont.has_key('address') and (entrycont['address'] != ''):
325                entry.append(entrycont['address'] + ',')
326            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
327                entry.append(entrycont['edition'] + ' edition,')
328            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
329                entry.append(entrycont['howpublished'] + ',')
330            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
331                eline = entrycont['volume'];
332                if entrycont.has_key('number') and (entrycont['number'] != ''):
333                    eline += '(' + entrycont['number'] + ')'
334                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
335                    eline += ':' + entrycont['pages']
336                eline += ','
337                entry.append(eline)
338            else:
339                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
340                    entry.append('pages ' + entrycont['pages'] + ',')
341            if entrycont.has_key('year') and (entrycont['year'] != ''):
342                if entrycont.has_key('month') and (entrycont['month'] != ''):
343                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
344                else:
345                    entry.append(entrycont['year'] + '.')
346            if entrycont.has_key('note') and (entrycont['note'] != ''):
347                entry.append(entrycont['note'] + '.')
348
349            # generate keys for sorting and for the output
350            sortkey = ''
351            bibkey = ''
352            if entrycont.has_key('author'):
353                for author in entrycont['author']['list']:
354                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
355                bibkey = entrycont['author']['abbrev']
356            else:
357                bibkey = 'x'
358            if entrycont.has_key('year'):
359                sortkey += entrycont['year']
360                bibkey += entrycont['year'][-2:]
361            if entrycont.has_key('title'):
362                sortkey += entrycont['title']
363            if entrycont.has_key('key'):
364                sortkey = entrycont['key'] + sortkey
365                bibkey = entrycont['key']
366            entry.insert(0, sortkey)
367            entry.insert(1, bibkey)
368           
369            # add the entry to the file contents
370            filecont.append(entry)
371
372        else:
373            # field, publication info
374            field = ''
375            data = ''
376           
377            # field = {data} entries
378            if bracedata_rex.match(line):
379                field = bracefield_rex.sub('\g<1>', line)
380                field = string.lower(field)
381                data =  bracedata_rex.sub('\g<2>', line)
382
383            # field = "data" entries
384            elif quotedata_rex.match(line):
385                field = quotefield_rex.sub('\g<1>', line)
386                field = string.lower(field)
387                data =  quotedata_rex.sub('\g<2>', line)
388
389            # field = data entries
390            elif data_rex.match(line):
391                field = field_rex.sub('\g<1>', line)
392                field = string.lower(field)
393                data =  data_rex.sub('\g<2>', line)
394           
395            if field in ('author', 'editor'):
396                entrycont[field] = bibtexauthor(data)
397                line = ''
398            elif field == 'title':
399                line = bibtextitle(data, entrytype)
400            elif field != '':
401                line = removebraces(transformurls(data.strip()))
402
403            if line != '':
404                line = latexreplacements(line)
405                entrycont[field] = line
406
407
408    # sort entries
409    filecont.sort(entry_cmp)
410   
411    # count the bibtex keys
412    keytable = {}
413    counttable = {}
414    for entry in filecont:
415        bibkey = entry[1]
416        if not keytable.has_key(bibkey):
417            keytable[bibkey] = 1
418        else:
419            keytable[bibkey] += 1
420
421    for bibkey in keytable.keys():
422        counttable[bibkey] = 0
423   
424    # generate output
425    for entry in filecont:
426        # generate output key form the bibtex key
427        bibkey = entry[1]
428        if keytable[bibkey] == 1:
429            outkey = bibkey
430        else:
431            outkey = bibkey + chr(97 + counttable[bibkey])
432        counttable[bibkey] += 1
433       
434        # append the entry code to the output
435        file.append('<tr valign="top">\n' + \
436                    '<td>[' + outkey + ']</td>')
437        file.append('<td>')
438        file.append('\\anchor ' + outkey)
439        for line in entry[2:]:
440            file.append(line)
441        file.append('</td>\n</tr>')
442        file.append('')
443
444    return file
445
446
447#
448# return 1 iff abbr is in line but not inside braces or quotes
449# assumes that abbr appears only once on the line (out of braces and quotes)
450#
451def verify_out_of_braces(line, abbr):
452
453    phrase_split = delimiter_rex.split(line)
454
455    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
456
457    open_brace = 0
458    open_quote = 0
459
460    for phrase in phrase_split:
461        if phrase == "{":
462            open_brace = open_brace + 1
463        elif phrase == "}":
464            open_brace = open_brace - 1
465        elif phrase == '"':
466            if open_quote == 1:
467                open_quote = 0
468            else:
469                open_quote = 1
470        elif abbr_rex.search(phrase):
471            if open_brace == 0 and open_quote == 0:
472                return 1
473
474    return 0
475
476
477#
478# a line in the form phrase1 # phrase2 # ... # phrasen
479# is returned as phrase1 phrase2 ... phrasen
480# with the correct punctuation
481# Bug: Doesn't always work with multiple abbreviations plugged in
482#
483def concat_line(line):
484    # only look at part after equals
485    field = field_rex.sub('\g<1>',line)
486    rest = field_rex.sub('\g<2>',line)
487
488    concat_line = field + ' ='
489
490    pound_split = concatsplit_rex.split(rest)
491
492    phrase_count = 0
493    length = len(pound_split)
494
495    for phrase in pound_split:
496        phrase = phrase.strip()
497        if phrase_count != 0:
498            if phrase.startswith('"') or phrase.startswith('{'):
499                phrase = phrase[1:]
500        elif phrase.startswith('"'):
501            phrase = phrase.replace('"','{',1)
502
503        if phrase_count != length-1:
504            if phrase.endswith('"') or phrase.endswith('}'):
505                phrase = phrase[:-1]
506        else:
507            if phrase.endswith('"'):
508                phrase = phrase[:-1]
509                phrase = phrase + "}"
510            elif phrase.endswith('",'):
511                phrase = phrase[:-2]
512                phrase = phrase + "},"
513
514        # if phrase did have \#, add the \# back
515        if phrase.endswith('\\'):
516            phrase = phrase + "#"
517        concat_line = concat_line + ' ' + phrase
518
519        phrase_count = phrase_count + 1
520
521    return concat_line
522
523
524#
525# substitute abbreviations into filecont
526# @param filecont_source - string of data from file
527#
528def bibtex_replace_abbreviations(filecont_source):
529    filecont = filecont_source.splitlines()
530
531    #  These are defined in bibtex, so we'll define them too
532    abbr_list = ['jan','feb','mar','apr','may','jun',
533                 'jul','aug','sep','oct','nov','dec']
534    value_list = ['January','February','March','April',
535                  'May','June','July','August','September',
536                  'October','November','December']
537
538    abbr_rex = []
539    total_abbr_count = 0
540
541    front = '\\b'
542    back = '(,?)\\b'
543
544    for x in abbr_list:
545        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
546        total_abbr_count = total_abbr_count + 1
547
548
549    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
550                             re.I)
551
552    comment_rex = re.compile('@comment\s*{',re.I)
553    preamble_rex = re.compile('@preamble\s*{',re.I)
554
555    waiting_for_end_string = 0
556    i = 0
557    filecont2 = ''
558
559    for line in filecont:
560        if line == ' ' or line == '':
561            continue
562
563        if waiting_for_end_string:
564            if re.search('}',line):
565                waiting_for_end_string = 0
566                continue
567
568        if abbrdef_rex.search(line):
569            abbr = abbrdef_rex.sub('\g<1>', line)
570
571            if abbr_list.count(abbr) == 0:
572                val = abbrdef_rex.sub('\g<2>', line)
573                abbr_list.append(abbr)
574                value_list.append(string.strip(val))
575                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
576                total_abbr_count = total_abbr_count + 1
577            waiting_for_end_string = 1
578            continue
579
580        if comment_rex.search(line):
581            waiting_for_end_string = 1
582            continue
583
584        if preamble_rex.search(line):
585            waiting_for_end_string = 1
586            continue
587
588
589        # replace subsequent abbreviations with the value
590        abbr_count = 0
591
592        for x in abbr_list:
593
594            if abbr_rex[abbr_count].search(line):
595                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
596                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
597                # Check for # concatenations
598                if concatsplit_rex.search(line):
599                    line = concat_line(line)
600            abbr_count = abbr_count + 1
601
602
603        filecont2 = filecont2 + line + '\n'
604        i = i+1
605
606
607    # Do one final pass over file
608
609    # make sure that didn't end up with {" or }" after the substitution
610    filecont2 = filecont2.replace('{"','{{')
611    filecont2 = filecont2.replace('"}','}}')
612
613    afterquotevalue_rex = re.compile('"\s*,\s*')
614    afterbrace_rex = re.compile('"\s*}')
615    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
616
617    # add new lines to data that changed because of abbreviation substitutions
618    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
619    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
620    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
621
622    return filecont2
623
624#
625# convert @type( ... ) to @type{ ... }
626#
627def no_outer_parens(filecont):
628
629    # do checking for open parens
630    # will convert to braces
631    paren_split = re.split('([(){}])',filecont)
632
633    open_paren_count = 0
634    open_type = 0
635    look_next = 0
636
637    # rebuild filecont
638    filecont = ''
639
640    at_rex = re.compile('@\w*')
641
642    for phrase in paren_split:
643        if look_next == 1:
644            if phrase == '(':
645                phrase = '{'
646                open_paren_count = open_paren_count + 1
647            else:
648                open_type = 0
649            look_next = 0
650
651        if phrase == '(':
652            open_paren_count = open_paren_count + 1
653
654        elif phrase == ')':
655            open_paren_count = open_paren_count - 1
656            if open_type == 1 and open_paren_count == 0:
657                phrase = '}'
658                open_type = 0
659
660        elif at_rex.search( phrase ):
661            open_type = 1
662            look_next = 1
663
664        filecont = filecont + phrase
665
666    return filecont
667
668
669#
670# make all whitespace into just one space
671# format the bibtex file into a usable form.
672#
673def bibtexwasher(filecont_source):
674
675    space_rex = re.compile('\s+')
676    comment_rex = re.compile('\s*%')
677
678    filecont = []
679
680    # remove trailing and excessive whitespace
681    # ignore comments
682    for line in filecont_source:
683        line = string.strip(line)
684        line = space_rex.sub(' ', line)
685        # ignore comments
686        if not comment_rex.match(line) and line != '':
687            filecont.append(' '+ line)
688
689    filecont = string.join(filecont, '')
690
691    # the file is in one long string
692
693    filecont = no_outer_parens(filecont)
694
695    #
696    # split lines according to preferred syntax scheme
697    #
698    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
699
700    # add new lines after commas that are after values
701    filecont = re.sub('"\s*,', '",\n', filecont)
702    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
703    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
704                          '\n\n\g<1>\g<2>,\n', filecont)
705
706    # add new lines after }
707    filecont = re.sub('"\s*}','"\n}\n', filecont)
708    filecont = re.sub('}\s*,','},\n', filecont)
709
710
711    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
712
713    # character encoding, reserved latex characters
714    filecont = re.sub('{\\\&}', '&', filecont)
715    filecont = re.sub('\\\&', '&', filecont)
716
717    # do checking for open braces to get format correct
718    open_brace_count = 0
719    brace_split = re.split('([{}])',filecont)
720
721    # rebuild filecont
722    filecont = ''
723
724    for phrase in brace_split:
725        if phrase == '{':
726            open_brace_count = open_brace_count + 1
727        elif phrase == '}':
728            open_brace_count = open_brace_count - 1
729            if open_brace_count == 0:
730                filecont = filecont + '\n'
731
732        filecont = filecont + phrase
733
734    filecont2 = bibtex_replace_abbreviations(filecont)
735
736    # gather
737    filecont = filecont2.splitlines()
738    i=0
739    j=0         # count the number of blank lines
740    for line in filecont:
741        # ignore blank lines
742        if line == '' or line == ' ':
743            j = j+1
744            continue
745        filecont[i] = line + '\n'
746        i = i+1
747
748    # get rid of the extra stuff at the end of the array
749    # (The extra stuff are duplicates that are in the array because
750    # blank lines were removed.)
751    length = len( filecont)
752    filecont[length-j:length] = []
753
754    return filecont
755
756
757def filehandler(filepath):
758    try:
759        fd = open(filepath, 'r')
760        filecont_source = fd.readlines()
761        fd.close()
762    except:
763        print 'Could not open file:', filepath
764    washeddata = bibtexwasher(filecont_source)
765    outdata = bibtexdecoder(washeddata)
766    print '/**'
767    print '\page references References'
768    print
769    print '<table border="0" cellspacing="5px" width="100%">'
770    print
771    for line in outdata:
772        print line
773    print '</table>'
774    print
775    print '*/'
776
777
778# main program
779
780def main():
781    import sys
782    if sys.argv[1:]:
783        filepath = sys.argv[1]
784    else:
785        print "No input file"
786        sys.exit()
787    filehandler(filepath)
788
789if __name__ == "__main__": main()
790
791
792# end python script