import re
import argparse
from collections import OrderedDict

parser = argparse.ArgumentParser()
parser.add_argument('--file', '-f', type=str, required=True, help='Bib file')
parser.add_argument('--out', '-o', type=str, required=True, help='Output file')

args = parser.parse_args()
bibfile = args.file
outfile = args.out

class Entry:
  def __init__(self):
    self.type = None
    self.key = None
    self.fields = OrderedDict()

  def write(self, f, remove_keys, blind_keys):

    # use f_content as a string builder - so that we can not just add but remove characters from it
    f_content = ''
    f_content += '@%s{%s,\n' % (self.type, self.key)
    for key in self.fields:
      if key in remove_keys:
        continue
      val = self.fields[key]
      if key in blind_keys:
        f_content += '  %%%%%s = {%s},\n' % (key, val)
      else:
        f_content += '  %s = {%s},\n' % (key, val)
    # Remove the trailing comma before ending the entry with a }.
    # So that each entry ends nicely as '}}' rather than '},}', might not be valid for bibtex
    f_content = f_content[:-2]
    f_content += '\n}\n'
    f.write(f_content)


entries = dict()


def clean(infile, outfile):
  """
  Clean the bib file generated by Mendeley to remove unnecessary fields that might messes it up
  :param infile: raw mendeley export
  :param outfile: cleaned bib file
  :return: None
  """

  # Fields to be removed
  remove_keys = ['annote', 'annotate', 'abstract', 'mendeley-groups', 'keywords', 'file']

  # Fields to be commented out - so that bibtex will ignore it
  blind_keys = ['issn', 'isbn', 'doi']

  bibf = open(infile, 'r')
  cleanf = open(outfile, 'w')
  line = bibf.readline()
  while line:
    m = re.search('@(.*)\{(.*),.*', line)
    if m:
      entry = Entry()
      entry.type = m.group(1)
      entry.key = m.group(2)

      line = bibf.readline()
      while not re.search('^\}$', line):
        m = re.search('(.*)\s+=\s+\{(.*)\}.*', line)
        if m:
          entry.fields[m.group(1)] = m.group(2)
        line = bibf.readline()
      if entry.key not in entries:
        entries[entry.key] = entry
    line = bibf.readline()

  for key in entries:
    entry = entries[key]
    entry.write(cleanf, remove_keys, blind_keys)
  cleanf.close()

clean(bibfile, outfile)