#!/usr/bin/env python
# -*- coding: cp1252 -*-
#figure out where the line breaks are in the file and do something with them.
import os
import sys
import getopt
import time
import string
global g_title
global l_tags
global g_index_only
g_title=""
g_encoding="utf-8"
g_encoding="cp1252"
g_encoding="iso-8859-1"
g_index_only=False
g_date_folder=""
#SVT Nyheter
def go_past_ul(s):
s_result=s
num_uls=0
got_two=False
ulpos=1
dlpos=1
nextpos=1
count=len(s)
while count>0:
count-=1
ulpos=s.find("ul>",ulpos+2)
if s[ulpos-1]=="/":
num_uls-=1
else:
num_uls+=1
if num_uls>=2:got_two=True
if ulpos<0:break
if got_two==True and num_uls==0:
s_result=s[ulpos+3:]
break
return s_result
def doc_title_name():
s_result=""
s_month=int(time.strftime("%m"))
s_month=month_name(s_month)
s_month=s_month.lower()
s_result = "Nyheter / Actualités - " + time.strftime("%d") + " " + s_month + " " + time.strftime("%Y")
return s_result
def date_folder_name():
s_result=""
s_month=int(time.strftime("%m"))
s_result = os.getcwd() + os.sep + time.strftime("NY%d") + month_name(s_month) + time.strftime("%y")
return s_result
def month_name(n_month):
s_result=""
if (n_month==1): s_result="JAN"
if (n_month==2): s_result="FEV"
if (n_month==3): s_result="MAR"
if (n_month==4): s_result="AVR"
if (n_month==5): s_result="MAI"
if (n_month==6): s_result="JUN"
if (n_month==7): s_result="JUL"
if (n_month==8): s_result="AOU"
if (n_month==9): s_result="SEP"
if (n_month==10): s_result="OKT"
if (n_month==11): s_result="NOV"
if (n_month==12): s_result="DEC"
return s_result
l_tags=[""]
k_style_sheet="_000_style.css"
def add_tag_to_globals(s_tag, l_tags):
if (s_tag in l_tags)==False:
l_tags.append(s_tag)
return
def write_all_tags():
fw=open("all_tags.txt", "w")
i=0
while i<len(l_tags):
fw.write(l_tags[i]+"\n")
i=i+1
fw.close()
def left_str(s,n):
r=""
i=0
b=1
while b==1:
if i>=n or i>=len(s) or s[i]==".":
b=0
if b!=0:
r=r+s[i]
i=i+1
return r
def process_file(szFilename,iFileNum):
global g_title
szOutfile="~out.htm"
f = open(szFilename, "r")
szLine=""
s_last_str=""
ls=""
s=""
b_read_file = 1
ft = 0;lt = 0
r=""
while b_read_file == 1:
szLine=f.readline()
szLine=szLine.replace("\n"," ")
s=s+szLine
ft = f.tell()
if lt == ft:
b_read_file=0
lt = ft
f.close()
szOutfile=simplify_filename(szFilename)
g_title=get_html_title(s)
g_title=replace_accented_chars_with_codes(g_title)
if len(g_title)<=4: g_title=szOutfile#"no_title"
if s.find("<lock>")>=0 or s.find("<LOCK>")>=0 or s.find("<html class=\"lock\"")>=0:
return szFilename
if g_index_only==True:
if os.path.exists(szOutfile)==False:
os.rename(szFilename,szOutfile)
return szOutfile
while s<>s_last_str:
s_last_str=s
s=remove_ctrl_chars(s)
s=s.replace("<!--","<!-- ")
s=strip_comments(s)
s=strip_copyrights(s)
ls=""
while s<>ls:
ls=s
s=s.replace(" "," ")
ls=""
while s<>ls:
ls=s
s=s.replace("<3","♥")
s=s.replace("♥","♥")
s=s.replace("♥","") # get rid of hearts for now.
s=strip_tags(s)
s=get_rid_of_useless_tags(s)
ls=""
while s<>ls:
ls=s
s=s.replace("</h1><br>","</h1>")
s=s.replace("<br><ul>","<ul>")
ls=""
while s<>ls:
ls=s
s=s.replace("<br>©","©")
s=s.replace("©"," ©")
ls=""
while s<>ls:
ls=s
if s.endswith("<br>"):
s=s.rstrip("<br>")
# remove unnecessary "br" tags before and after header tags.
ls=""
while s<>ls:
ls=s
for i in range(1,7):
s_header_tag="h"+str(i)+">"
s=s.replace("<br><"+s_header_tag,"<"+s_header_tag)
s=s.replace("<"+s_header_tag+"<br>","<"+s_header_tag)
s=s.replace("</"+s_header_tag+"<br>","</"+s_header_tag)
ls=""
while s<>ls:
ls=s
for i in range(2,7):
s_header_tag="h"+str(i)+">"
s=s.replace("<"+s_header_tag,"<h2>")
s=s.replace("</"+s_header_tag,"</h2>")
ls=""
while s<>ls:
ls=s
s=s.replace("<center><br>","<br><center>")
s=s.replace("<center></center>","")
s=s.replace("<table><br>","")
s=s.replace("</td><br>","")
s=s.replace("</tr><br>","")
s=s.replace("Annons:<br>","")
ls=""
while s<>ls:
ls=s
s=s.replace("<br> ","<br>")
s=s.replace(" <br>","<br>")
s=s.replace("<br><br><br>","<br><br>")
# make it start the file at the first major heading tag.
hp=s.find("<h1>")
if hp>0:
s=s[hp:]+s[:hp-1] # split the file, putting the content right after the tag first.
# for small mobile devices
if False:
ls=""
while s<>ls:
ls=s
s=s.replace("<h2>","<b>")
s=s.replace("</h2>","</b><br>")
s=s.replace("<h1>","<b>")
s=s.replace("</h1>","</b><br>")
s=strip_script(s)
s=replace_accented_chars_with_codes(s)
# post processing changes
ls=""
while s<>ls:
ls=s
s=s.replace(" <tr>","<tr>")
s=s.replace(" <td>","<td>")
if s.find(" SVT ")>0:
s=go_past_ul(s)
# workaround for a SPECIFIC problem.
s=s.replace("</iframe>","<!-- ")
s=s.replace("\"javascript:void();\">", " -->")
s=remove_acirc(s)
s=s.replace("</tr>","</tr>\n")
s=s.replace("<br>","<br>\n")
s=s.replace("</dd><dt>", "</dd><dt>\n")
# this puts both header lines for canadian articles at the top.
htp=0
stp=""
if s.endswith("</h2>"):
htp=s.rfind("<h2>")
if htp>=0:
stp=s[htp:]
s=s.replace(stp,"")
s=stp+s
s=s.replace("</h2><h1>","<br>")
s=s.replace("h1>","h2>")
s=meta_encoding_string()+s+"\n</body>\n</html>"
os.unlink(szFilename)
szOutfile=simplify_filename(szFilename)
if len(szOutfile)<=8: szOutfile="no_"+szOutfile
szOutfile=next_avail_file(szOutfile) # stops it from overwriting files in case they have the same name after simplifying with chars.
fw=open(szOutfile, "w")
fw.write(s)
fw.close()
return szOutfile
def remove_ctrl_chars(s):
r=""
ls=len(s)
i=0
c=""
while i<ls:
c=s[i]
if ord(c)<32:
c=" "
r=r+c
i=i+1
return r
def remove_ext(s):
r=s
i=len(s)
j=-1
while i>=0:
i=i-1
if s[i]==".":
j=i
break
if j>=0:
r=s.__getslice__(0,j)
return r
def remove_end_nums(s):
r=s
i=len(s)
j=-1
while i>=0:
i=i-1
if (s[i]>="a" and s[i]<="z"):
j=i+1
break
if j>=0:
r=s.__getslice__(0,j)
return r
def get_ext(s):
r=""
i=len(s)
j=-1
while i>=0:
i=i-1
if s[i]==".":
j=i
break
if j>=0:
r=s.__getslice__(j+1,len(s))
return r
def next_avail_file(s):
r=s
i_num=0
while os.path.exists(s)==True:
#s_ext=get_ext(s)
s_ext="htm"
r=remove_ext(s)
r=remove_end_nums(r)
r=r + "_" + str(i_num) + "." + s_ext
i_num=i_num+1
s=r
return r
def simplify_filename(s):
k_maxlen=40
r=""
i=0
c=""
s_ext=""
s_file=""
i_len=0
s=deaccent_string(s)
for i in range(0,len(s)):
c=s[i]
c=c.lower()
if ((c>="a" and c<="z") or (c>="0" and c<="9")) or c==".":
r=r+c
else:
r=r+"_"
while r.find('__')>=0:r=r.replace("__","_")
r=replace_accented_chars_with_codes(r)
s_file=remove_ext(r)
#s_ext=get_ext(r)
#s_ext=s_ext.__getslice__(0,3)
s_ext="htm"
i_len=len(s_file)
if i_len>k_maxlen:i_len=k_maxlen
s_file=s_file.__getslice__(0,i_len)
r=s_file + "." + s_ext
return r
def deaccent_string(s):
s=s.replace("ä","a")
s=s.replace("Ã¥","a")
s=s.replace("Ã ","a")
s=s.replace("ö","o")
s=s.replace("é","e")
s=s.replace("ê","e")
s=s.replace("è","e")
s=s.replace("ë","e")
s=s.replace("î","i")
s=s.replace("ù","u")
s=s.replace("û","u")
s=s.replace("Ä","a")
s=s.replace("Ã…","a")
s=s.replace("Ö","o")
return s
def get_rid_of_useless_tags(s):
s=s.replace("<?xml>","")
s=s.replace("<td><br>","<td>")
s=s.replace("<br></td>","</td>")
s=s.replace("<a<", "<")
s=s.replace("<img<", "<")
s=s.replace("</h2> </td> </tr> </table><br>", "</h2></td></tr></table>")
s=s.replace("<small><br>","<br><small>")
s=s.replace("<br><blockquote>","<blockquote>")
s=s.replace("<blockquote><br>","<blockquote>")
s=s.replace("<br></blockquote>","</blockquote>")
s=s.replace("</blockquote><br>","</blockquote>")
s=s.replace(" "," ")
s=s.replace("</title><br>","</title>")
s=s.replace("<o>","")
s=s.replace("</o>","")
s=s.replace("<br> ","<br>")
s=s.replace(" <br>","<br>")
s=s.replace("<strong>","<b>")
s=s.replace("</strong>","</b>")
s=s.replace("<em>","<b>")
s=s.replace("</em>","</b>")
s=s.replace("<b><br></b>","<br>")
s=s.replace("<u><br></u>","<br>")
s=s.replace("<i><br></i>","<br>")
s=s.replace("<b><br>","<br><b>")
s=s.replace("<br></b>","</b><br>")
s=s.replace("</table><br><br>","</table><br>")
s=s.replace("<br><br><table>","<br><table>")
s=s.replace("<!doctype>","")
s=s.replace("</ul><br>","</ul>")
s=s.replace("</ol><br>","</ol>")
s=s.replace("<br></li>","</li>")
s=s.replace("<li><br>","<li>")
s=s.replace("<!--trackbacks>","")
s=s.replace("<var>","")
s=s.replace("<pe>","")
s=s.replace("<span>","")
s=s.replace("</span>","")
s=s.replace("<font>","")
s=s.replace("</font>","")
s=s.replace("<div>","<br>")
s=s.replace("</div>","<br>")
s=s.replace("<p>","<br><br>")
s=s.replace("</p><br>","<br>")
s=s.replace("</p>","<br><br>")
s=s.replace("<hr> ","<hr>")
s=s.replace("<hr><hr>","<hr>")
s=s.replace("<hr><br>","<hr>")
s=s.replace("<hgroup>","")
s=s.replace("</hgroup>","")
s=s.replace("<meta>","")
s=s.replace("<area>","")
s=s.replace("<map>","")
s=s.replace("</map>","")
s=s.replace("<img>","")
s=s.replace("<a>","")
s=s.replace("</a>","")
s=s.replace("<!-->","")
s=s.replace("<link>","")
s=s.replace("<aside>","")
s=s.replace("</aside>","")
s=s.replace("<form>","")
s=s.replace("</form>","")
s=s.replace("<input>","")
s=s.replace("<![endif]>","")
s=s.replace("<script>","<textarea>")
s=s.replace("</script>","</textarea>")
s=s.replace("<style>","<textarea>")
s=s.replace("</style>","</textarea>")
s=s.replace("<iframe>","")
s=s.replace("</iframe>","")
s=s.replace("<param>","")
s=s.replace("</param>","")
s=s.replace("<embed>","")
s=s.replace("</embed>","")
s=s.replace("<object>","")
s=s.replace("</object>","")
s=s.replace("<noscript>","")
s=s.replace("</noscript>","")
s=s.replace("<html>","")
s=s.replace("</html>","")
s=s.replace("<body>","")
s=s.replace("</body>","")
s=s.replace("<head>","")
s=s.replace("</head>","")
s=s.replace("<nav>","")
s=s.replace("</nav>","")
s=s.replace("<header>","")
s=s.replace("</header>","")
s=s.replace("<article>","")
s=s.replace("</article>","")
s=s.replace("<section>","")
s=s.replace("</section>","")
s=s.replace("<figure>","")
s=s.replace("</figure>","")
s=s.replace("<figcaption>","")
s=s.replace("</figcaption>","")
s=s.replace("<footer>","")
s=s.replace("</footer>","")
s=s.replace("<panel>","")
s=s.replace("</panel>","")
s=s.replace("<center>","")
s=s.replace("</center>","")
s=s.replace("&quot;","\"")
s=s.replace("\xEF\xBB\xBF","") #gets rid of weird symbols at the beginning of files.
return s
def replace_accented_chars_with_codes(s):
# russian
s=s.replace("\xD0\x27","B")# D0 =-D
s=s.replace("\xD0\xB5","e")#
s=s.replace("\xD1\x80","p")# D1 =N~
s=s.replace("\xD0\xBD","H")
# normal
s=s.replace("\xE2\x89\xA5","≥")
s=s.replace("\xE2\x89\xA4","≤")
s=s.replace("\xC5\x91","ő")
s=s.replace("\xC5\xB1","ű")
s=s.replace("\xE2\xAC\xA8","—")
s=s.replace("\xEF\xBF\xBD"," ")
s=s.replace("\xE2\x99\xA5","♥")
s=s.replace("\xE2\x82\xAC","€")
s=s.replace("\xE2\x80\x89"," ")
s=s.replace("\xE2\x80\xBA",">")
s=s.replace("\xE2\x80\xA6","...")
s=s.replace("\xE2\x80\xA2","•")
s=s.replace("\xE2\x80\x93","–")
s=s.replace("\xE2\x80\x94","—")
s=s.replace("\xE2\x80\x98","'")
s=s.replace("\xE2\x80\x99","'")
s=s.replace("\xE2\x80\x9C","\"")
s=s.replace("\xE2\x80\x9D","\"")
s=s.replace("\xE2\x80","—") # need Z with caron
s=s.replace("\xE2\x98","☼")
s=s.replace("\xC2\xB0","°")
s=s.replace("\xC2\xA0"," ")
s=s.replace("\xC2\xAD","­")
s=s.replace("\xC2\xA4","•")
s=s.replace("\xE2\x96\xA0","•")
s=s.replace("\xC2\xAE","®")
s=s.replace("\xC2\xBB","»")
s=s.replace("\xC2\xAB","«")
s=s.replace("\xC2\xB2","²")
s=s.replace("\xC2\xB3","³")
s=s.replace("\xC2\xB9","¹")
s=s.replace("\xC2\xB4","'")
s=s.replace("\xC2\xBD","½")
s=s.replace("\xC3\xBC","ü")
s=s.replace("\xC3\xBA","ú")
s=s.replace("\xC3\xA0","à")
s=s.replace("\xC3\xA1","á")
s=s.replace("\xC3\xA2","â")
s=s.replace("\xC3\xA3","ã")
s=s.replace("\xC3\xA4","ä")
s=s.replace("\xC3\xA5","å")
s=s.replace("\xC3\xA6","æ")
s=s.replace("\xC3\xA7","ç")
s=s.replace("\xC3\xAC","ì")
s=s.replace("\xC3\xAD","í")
s=s.replace("\xC3\xAE","î")
s=s.replace("\xC3\xAF","ï")
s=s.replace("\xC3\xB0","ð")
s=s.replace("\xC3\xB1","ñ")
s=s.replace("\xC3\xB2","ò")
s=s.replace("\xC3\xB3","ó")
s=s.replace("\xC3\xB5","õ")
s=s.replace("\xC3\xB4","ô")
s=s.replace("\xC3\xB8","ø")
s=s.replace("\xC3\xB6","ö")
s=s.replace("\xC3\xB9","ù")
s=s.replace("\xC3\xBE","þ")
s=s.replace("\xC3\xA9","é")
s=s.replace("\xC3\xA8","è")
s=s.replace("\xC3\xAA","ê")
s=s.replace("\xC3\xAB","ë")
s=s.replace("\xC3\xBB","û")
s=s.replace("\xC3\xB1","ñ")
s=s.replace("\x00\x9C","œ")
s=s.replace("\xC2\x9C","œ")
s=s.replace("\xC5\x22","œ")
s=s.replace("\xC5\x27","Œ")
s=s.replace("\xC5\x93","œ")
s=s.replace("\xC2\xA9","©")
s=s.replace("\xC2\xA3","£")
s=s.replace("\xC3\x87","Ç")
s=s.replace("\xC3\x84","Ä")
s=s.replace("\xC3\x85","Å")
s=s.replace("\xC3\x82","Â")
s=s.replace("\xC3\x80","À")
s=s.replace("\xC3\x81","Á")
s=s.replace("\xC3\x88","È")
s=s.replace("\xC3\x89","É")
s=s.replace("\xC3\x8A","Ê")
s=s.replace("\xC3\x8B","Ë")
s=s.replace("\xC3\x8C","Ì")
s=s.replace("\xC3\x8D","Í")
s=s.replace("\xC3\x8E","Î")
s=s.replace("\xC3\x8F","Ï")
s=s.replace("\xC3\x94","Ô")
s=s.replace("\xC3\x96","Ö")
s=s.replace("\xC3\x99","Ù") #?
s=s.replace("\xC3\x9C","Ü") #?
#s=s.replace("\xBF","")
s=s.replace("\xC0","À")
s=s.replace("\xC7","Ç")
s=s.replace("\xC8","È")
s=s.replace("\xC9","É")
s=s.replace("\xD4","Ô")
s=s.replace("\xD9","Ù")
s=s.replace("\xDB","Û")
s=s.replace("\xE0","à")
s=s.replace("\xE2","â")
s=s.replace("\xE7","ç")
s=s.replace("\xE8","è")
s=s.replace("\xE9","é")
s=s.replace("\xEA","ê")
s=s.replace("\xEB","ë")
s=s.replace("\xEE","î")
s=s.replace("\xEF","ï")
s=s.replace("\xF4","ô")
s=s.replace("\xF9","ù")
s=s.replace("\xFB","û")
s=s.replace("\n"," ")
s=s.replace("\r"," ")
s=s.replace("\t"," ")
s=s.replace(" Â "," ")
s=s.replace("\x80","€")
s=s.replace("\x85","...")
s=s.replace("\x2D","-")
s=s.replace("\u002D","-")
s=s.replace("\x92","'")
s=s.replace("\x93","\"")
s=s.replace("\x94","\"")
s=s.replace("\xBB","»")
s=s.replace("\xAB","«")
s=s.replace("\xA0"," ")
s=s.replace("’","\'")
s=s.replace("‘","\'")
s=s.replace("”","\"")
s=s.replace("“","\"")
s=s.replace(" \xC3 "," à ")
s=s.replace("'\xC3 ","'à ")
return s
def strip_script(s):
r=""
cp=0
lbp=1
rbp=0
while lbp>=0:
lbp=s.find("<textarea",cp)
if lbp>=0:
while cp<lbp: # copy up to and including.
r=r+s[cp]
cp=cp+1
rbp=s.find("</textarea>",lbp+10) # was lbp+1
if rbp>lbp:
cp=rbp+11
else:
cp=cp+10
while cp<len(s):
r=r+s[cp]
cp=cp+1
return r
def meta_encoding_string():
r="<!DOCTYPE html>\n<html class=\"lock\">\n"
r+="<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=" + g_encoding + "\" />"
r+="\n"
r+="<link rel=\"stylesheet\" type=\"text/css\" href=\""+k_style_sheet+"\">\n"
r+="</head>\n"
r+="<body>\n"
return r
def char_code_table():
f = open("charmap.htm", "w")
i=32
s=""
f.write(meta_encoding_string())
f.write("<table border=1 cellspacing=0 cellpadding='4'>")
while i<=255:
s="<tr><td>" + hex(i).upper() + "</td><td>" + "&#" + str(i) + ";</td></tr>"
f.write(s)
i=i+1
f.write("</table></body></html>")
return
def get_html_title(s):
tp=0
te=0
r=""
tp=s.find("<title>")
te=s.find("</title>",tp)
if tp>0 and te>0 and te>tp:
if te-tp<200:
r=s[tp+7:te]
return r
def strip_tags(s):
r=""
c=""
s_tag=""
s_last_tag=""
tag_char_count=0
cp=0
b_intag=False
b_afterspace=False
while cp<len(s):
c=s[cp];
if c=="<":
tag_char_count=0
b_intag=True
b_afterspace=False
if b_intag==True:
c=c.lower()
tag_char_count=tag_char_count+1
if (c==" " or c==":") and b_afterspace==False:b_afterspace=True
if tag_char_count>2 and c=="/":c=""
if b_afterspace==True and c!=">":c=""
s_tag=s_tag+c
if c==">" and b_intag==True:
b_intag=False
if s_tag==inverse_tag(s_last_tag):
gt_index=r.rfind("<")
if gt_index>=0:
r=r[0:gt_index]
else:
add_tag_to_globals(s_tag, l_tags)
r=r+s_tag
s_last_tag=s_tag
s_tag=""
c=""
if b_intag==True and b_afterspace==True:c=""
if b_intag==False:
if c!=" " and c!="":s_last_tag=""
r=r+c
cp=cp+1
return r
def inverse_tag(s):
s_result=""
if len(s)>1:
if s[1]=="/":
s_result=s_result.replace("/","")
else:
s_result="</"+s[1:]
return s_result
def strip_comments(s):
# this still gets stuck !
r=""
ls=""
ap=0
rp=0
ap=s.find("<!--")
while ap>=0:
ap=s.find("<!--")
rp=s.find("-->",ap+3)
if ap>0 and rp>ap:
s=s[:ap]+s[rp+3:]
else:
break
return s
def strip_copyrights(s):
r=""
ap=0
rp=0
ap=s.find("©")
while ap>=0:
ap=s.find("©")
rp=s.find("<",ap)
if ap>0 and rp>ap:
s=s[:ap]+s[rp:]
else:
break
return s
# gets rid of annoying "Â"s.
def remove_acirc(s):
r=""
ls=len(s)
i=0
c=""
lc=""
b=False
bp=False
while i<ls:
c=s[i]
bp=True
if lc=="\xC2":
if c=="\x22":bp=False
if c=="\x96":bp=False
if not is_letter(c):bp=False
if bp:r=r+lc
i=i+1
lc=c
r=r+lc
return r
def is_html_file(s):
b=False
if (s.endswith(".htm")==True or s.endswith(".html")==True or s.endswith(".dhtml")==True or s.endswith(".dht")==True): b=True
if s.endswith(".ab")==True: b=True
if s.endswith(".shtml")==True: b=True
if s.endswith(".aspx")==True: b=True
return b
def is_letter(s):
b=False
if (s>="a" and s<="z"): b=True
if (s>="A" and s<="Z"): b=True
return b
# loop through all of the files in the directory
def main():
g_date_folder=date_folder_name()
num_html_files=0
k_index_file="_000_index.htm"
ls=os.listdir(".")
ls.sort()
r=""
s_new_filename=""
s_link_name=""
i=0
while i<len(ls):
szFilename=ls[i]
uFilename=str(szFilename)
uFilename=uFilename.lower()
if is_html_file(uFilename) and uFilename<>k_index_file:
num_html_files=num_html_files+1
i=i+1
s_progress="0"
i=0
nth_html_file=0
while i<len(ls):
szFilename=ls[i]
uFilename=str(szFilename)
uFilename=uFilename.lower()
if is_html_file(uFilename) and uFilename<>k_index_file:
nth_html_file=nth_html_file+1
print s_progress + " " + szFilename[:30]
s_new_filename=""
if os.path.exists(szFilename)==True:
s_new_filename=process_file(szFilename,i)
if s_new_filename!="":
s_link_name=s_new_filename
if g_title!="":s_link_name=g_title
s_link="<a href=\"" + s_new_filename + "\">" + s_link_name + "</a>"
if s_link.find("<i>")>=0:s_link=s_link+"</i>"
if s_link.find("<b>")>=0:s_link=s_link+"</b>"
s_link=s_link+"<br>\n"
r=r+s_link
#show the progress indicator.
if num_html_files>0:
s_progress="%.2f" % (float(nth_html_file)/float(num_html_files)*float(100))
else:
s_progress="0"
curr_time=os.times()[0]
last_time=os.times()[0]
while curr_time==last_time:
last_time=os.times()[0]
# progress was here.
i=i+1
if nth_html_file>0:
if not os.path.exists(g_date_folder):
os.mkdir(g_date_folder)
r=meta_encoding_string()+"<style>a{text-decoration:none}</style>"+"<title>"+doc_title_name()+"</title>"+"<center><table><tr><td>"+r
fw=open(k_index_file, "w")
fw.write(r)
fw.write("</td></tr></table>")
fw.close()
fw=open(g_date_folder + os.sep + k_style_sheet, "w")
#fw.write(".specialbar{background-color:maroon;color:white;font-weight:bold;text-align:center;}\n")
fw.write("html{font-size:14pt;width:75%;margin:auto;padding:6pt;border:solid black 1px;font-family:Sans}\n")
fw.write("td{vertical-align:top;}\n")
fw.write("h1{color:navy;font-size:18pt;}\n")
fw.write("h2{color:navy;font-size:18pt;}\n")
fw.write("a{color:navy;font-size:14pt;}\n")
fw.close()
else:
print "Nothing to do"
if True:
i=0
ls=os.listdir(".")
ls.sort()
while i<len(ls):
szFilename=ls[i]
if is_html_file(szFilename):
s_moved_filename = g_date_folder + os.sep + szFilename
if os.path.exists(s_moved_filename)==False:
os.rename(szFilename, s_moved_filename)
i=i+1
#write_all_tags()
print "done"
return
#print "Warning : back up all files before proceeding"
main()