# Keith Briggs 2017-07-23 html output; nodate
# coding=utf8
# Keith Briggs 2017-07-17 en dash
from sys import stderr,path,exit
import re
from regnal_year_03 import parse_regnal_year_spec,get_regnalyear,int_to_roman
wordy_expander={
'': '',
'<': 'before ',
'>': 'after ',
'circa': 'circa ',
'c': 'circa ',
'C': 'circa ',
'nd': 'no date, ',
'n.d.': 'no date, ',
'E': 'early ',
'M': 'middle of the ',
'L': 'late ',
'EM': 'early to middle ',
'ML': 'middle to late ',
'1H': 'first half of the ',
'2H': 'second half of the ',
'1T': 'first third of the ',
'2T': 'middle third of the ',
'3T': 'last third of the ',
'1Q': 'first quarter of the ',
'2Q': 'second quarter of the ',
'3Q': 'third quarter of the ',
'4Q': 'fourth quarter of the ',
}
eml_shift={
'E': 33.0,
'M': 66.0,
'L': 100.0,
'EM': 75.0,
'ML': 100.0,
'1H': 50.0,
'2H': 100.0,
'1T': 33.0,
'2T': 66.0,
'3T': 100.0,
'1Q': 25.0,
'2Q': 50.0,
'3Q': 75.0,
'4Q': 100.0,
}
nodate ='(nd)|(n\.d\.)|(no date)'
prenote ='(?P(%s)|(\[(.*?)\]))?'%nodate
postnote ='(\[(?P.*?)\])?'
circa ='(?P(c\.?)|(circa))'
uncertain ='(?P\?)'
ba ='(?P[<>])' # before or after
half ='[12]h'
third ='[123]t'
quarter ='[1234]q'
eml ='(em)|(ml)|[eml]'
prefix ='(?P(%s)|(%s)|(%s)|(%s))'%('%d',half,third,quarter,eml,)
simplerange='(?P(1\d\d\d[-]\d)|(1\d\d\d[-]\d\d)$)' # post-1000 only
century ='(?P\d\d?)[Cc]'
decade ='(?P\d{2,3}0)s'
year ='(?P\d{1,4})'
oldstyle ='(?P(\d{3}[012345678]/\d)|(\d{3}9/\d{2}))'
first_dict={
'uncertain': uncertain%0,
'ba': ba%0,
'circa': circa%0,
'prefix': prefix%0,
'year': year%0,
'simplerange': simplerange,
'century': century%0,
'decade': decade%0,
'oldstyle': oldstyle%0,
}
second_dict={
'uncertain': uncertain%1,
'ba': ba%1,
'circa': circa%1,
'prefix': prefix%1,
'year': year%1,
'century': century%1,
'decade': decade%1,
'oldstyle': oldstyle%1,
}
first ='{uncertain}?{ba}?{circa}?{prefix}?({simplerange}|{oldstyle}|{decade}|{year}|{century})'.format(**first_dict)
second='{uncertain}?{ba}?{circa}?{prefix}?({oldstyle}|{decade}|{year}|{century})'.format(**second_dict)
second='((?P([x-])|([-–]{2}))'+second+')?' # note en dash here
dd=prenote+first+second+postnote+'$'
print(dd)
re_dd=re.compile(dd,flags=re.IGNORECASE)
re_spaces=re.compile(r'(\s{2,})')
re_circa=re.compile(r'c.(.*?)(?=$|-|x)') # lookahead assertion
def simplerange_to_float(x):
a,b=x.split('-')
if len(b)==1: return float(a[:3]+b)
return float(a[:2]+b)
class DDD:
' document date descriptor '
def __init__(s,dd,verbose=False):
s.verbose=verbose
s.clean=''
s.wordy=''
s.sortkey=-1.0 # indicates not yet defined
if not dd: # empty input
s.ok=False; return
s.dd=dd # save exact input
s.regnalyear=None,None,None
dds=dd.strip()
# special case: regnal year (these cannot be uncertain)...
ry=parse_regnal_year_spec(dds)
if ry:
if ry=='error':
s.ok=False
s.sortkey=-1.0
s.wordy=s.clean='DDDFIXME(%s)'%dd
return
if s.verbose: print('ry="%s"'%(ry,))
s.ok=True
s.regnalyear=ry
year,monarch=ry[0],ry[1]+' '+str(ry[2])
actual_year=get_regnalyear(year,monarch)
if s.verbose: print('year="%s", monarch="%s", actual_year="%s"'%(year,monarch,actual_year,))
if s.verbose: print('int_to_roman(ry[2])="%s"'%(int_to_roman(ry[2])))
monarch_name=(monarch.split())[0]
s.clean=str(year)+' '+monarch_name+' '+int_to_roman(ry[2])
if s.verbose: print('s.clean="%s"'%(s.clean,))
s.wordy=s.clean+' (%s)'%actual_year
s.latex=s.clean+' (%s)'%actual_year
s.latex=s.latex.replace('<','$<$').replace('>','$>$')
if '/' in actual_year:
s.oldstyle=actual_year
x,y=actual_year.split('/')
s.sortkey=float(x)+1.0
else:
s.sortkey=float(actual_year)
return
# general case...
sortkey_offset0=sortkey_offset1=0.0
m=re_dd.match(dds)
if not m:
s.ok=False
return
s.ok=True
year0=''
d=m.groupdict()
items=list(d.items())
year0=None
nodate=prenote=postnote=c_rangesep=rangesep=''
ba0=circa0=prefix0=year0=decade0=century0=oldstyle0=uncertain0=''
ba1=circa1=prefix1=year1=decade1=century1=oldstyle1=uncertain1=''
c_ba0=c_circa0=c_prefix0=c_year0=c_decade0=c_century0=c_oldstyle0=c_uncertain0=''
c_ba1=c_circa1=c_prefix1=c_year1=c_decade1=c_century1=c_oldstyle1=c_uncertain1=''
eml0=eml1=False
for key,value in items:
if value is None: continue
if s.verbose: print('key="%s" value="%s"'%(key,value,))
if key=='simplerange':
c_year0=year0=value
s.sortkey=simplerange_to_float(value)
break
if key=='prenote':
prenote=value.strip('[]')
if prenote=='nd' or prenote=='n.d.' or prenote=='no date':
nodate='n.d.'
prenote=''
continue
if key=='rangesep':
c_rangesep=value
rangesep=' to '
continue
if key=='postnote':
postnote=value.strip('[]')
continue
if key=='ba0':
c_ba0=value
ba0=wordy_expander[value]
continue
if key=='ba1':
c_ba1=value
ba1=wordy_expander[value]
continue
if key=='circa0':
c_circa0='c.'
circa0='circa '
continue
if key=='circa1':
c_circa1='c.'
circa1='circa '
continue
if key=='uncertain0':
c_uncertain0='?'
uncertain0='perhaps '
continue
if key=='uncertain1':
c_uncertain1='?'
uncertain1='perhaps '
continue
if key=='prefix0':
c_prefix0=value.lower()
prefix0=wordy_expander[value.upper()]
sortkey_offset0=eml_shift[value.upper()]
if c_prefix0 in ('e','m','l','em','ml'): eml0=True
continue
if key=='prefix1':
c_prefix1=value.lower()
prefix1=' the '+wordy_expander[value.upper()]
sortkey_offset1=eml_shift[value.upper()]
if c_prefix1 in ('e','m','l','em','ml'): eml1=True
continue
if key=='decade0':
c_decade0=decade0=value+'s'
s.sortkey=float(value)+10.0
if eml0:
sortkey_offset0/=10.0
sortkey_offset0-=10.0
#print('#',key,value,s.sortkey,sortkey_offset0)
continue
if key=='decade1':
s.sortkey=float(value)+10.0
c_decade1=decade1=value+'s'
if eml1:
sortkey_offset1/=10.0
sortkey_offset1-=10.0
continue
if key=='century0':
s.sortkey=100.0*float(value)-100.0
c_century0='%sC'%value
century0=' %sth century'%value
continue
if key=='century1':
s.sortkey=100.0*float(value)-100.0
c_century1='%sC'%value
century1=' %sth century'%value
continue
if key=='year0':
c_year0=year0=value
s.sortkey=float(year0)
continue
if key=='year1':
c_year1=year1=value
s.sortkey=float(year1)
continue
if key=='oldstyle0':
c_oldstyle0=oldstyle0=value
i=value.index('/')
s.sortkey=float(value[:i])+1.0
continue
if key=='oldstyle1':
c_oldstyle1=oldstyle1=value
i=value.index('/')
s.sortkey=float(value[:i])+1.0
continue
if sortkey_offset1:
s.sortkey+=sortkey_offset1
else:
s.sortkey+=sortkey_offset0
clean=' '.join((nodate,c_uncertain0,c_ba0,c_circa0,c_prefix0,c_year0,c_century0,c_decade0,c_oldstyle0,c_rangesep,c_uncertain1,c_ba1,c_circa1,c_prefix1,c_year1,c_century1,c_decade1,c_oldstyle1,))
wordy=' '.join((wordy_expander[nodate],prenote,uncertain0,ba0,circa0,prefix0,year0,century0,decade0,oldstyle0,rangesep,uncertain1,ba1,circa1,prefix1,year1,century1,decade1,oldstyle1,postnote,))
clean=clean.replace('-',u'–') # en dash
wordy=re_spaces.sub(' ',wordy)
s.wordy=wordy.replace(' ,',',').strip()
if prenote: prenote='['+prenote+']'
if postnote: postnote='['+postnote+']'
s.clean=(prenote+clean.replace(' ','')+postnote).strip().replace(' ,',',')
s.latex=s.clean
if 'c.' in s.latex: # \circa
s.latex,k=re_circa.subn(r'\circa{\1}',s.latex,count=2)
if '--' not in s.latex: s.latex=s.latex.replace('-','--')
s.latex=s.latex.replace(u'–','--')
s.latex=s.latex.replace('<','$<$').replace('>','$>$').replace('x',r'$\times$')
def get_ok(s):
return s.ok
def get_sortkey(s):
return s.sortkey
def get_clean(s):
return s.clean
def get_wordy(s):
return s.wordy
def get_latex(s):
return s.latex
def __repr__(s):
return "DDD('%s')"%(s.dd,)
def __str__(s):
return "DD('%s')"%(s.dd,)
def __lt__(s,t):
return s.sortkey\n%-26s\t%4s\t%-22s\t%s\n'%('input','sort','normalized output','verbose output',))
print('-'*120)
f=open(fn,'r')
for line in f:
if '#'==line[0]:
html.write('\n%s\n'%line[:-1])
continue
dd=DDD(line.strip('\n'))
if not dd.get_ok():
print('"%s" failed'%dd)
html.write('"%s" failed\n'%dd)
else:
output_line='%-20s\t%4.0f\t%18s\t%-22s\t%s'%(line[:-1],dd.get_sortkey(),dd.get_clean(),dd.get_latex(),dd.get_wordy(),)
print(output_line)
html_line='%-26s\t%4.0f\t%-22s\t%s'%(line[:-1],dd.get_sortkey(),dd.get_clean(),dd.get_wordy(),)
html_line=html_line.replace('<','<').replace('>','>').replace(u'–','–')
html.write(html_line+'\n')
if dd.get_sortkey()<500: exit()
f.close()
html.write('')
html.close()
if __name__=='__main__':
test_01()