#!/bin/sh # Convert ANSI (terminal) colours and attributes to HTML # Licence: LGPLv2 # Author: # http://www.pixelbeat.org/docs/terminal_colours/ # Examples: # ls -l --color=always | ansi2html.sh > ls.html # git show --color | ansi2html.sh > last_change.html # Generally one can use the `script` util to capture full terminal output. # Changes: # V0.1, 24 Apr 2008, Initial release # V0.2, 01 Jan 2009, Phil Harnish # Support `git diff --color` output by # matching ANSI codes that specify only # bold or background colour. # P@draigBrady.com # Support `ls --color` output by stripping # redundant leading 0s from ANSI codes. # Support `grep --color=always` by stripping # unhandled ANSI codes (specifically ^[[K). # V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ # Remove cat -v usage which mangled non ascii input. # Cleanup regular expressions used. # Support other attributes like reverse, ... # P@draigBrady.com # Correctly nest tags (even across lines). # Add a command line option to use a dark background. # Strip more terminal control codes. # V0.4, 17 Sep 2009, P@draigBrady.com # Handle codes with combined attributes and color. # Handle isolated attributes with css. # Strip more terminal control codes. # V0.23, 28 Feb 2016 # http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh gawk --version >/dev/null || exit 1 if test $1 = "--version" { printf '0.23\n' && exit } proc usage { printf '%s\n' \ 'This utility converts ANSI codes in data passed to stdin It has 4 optional parameters: --bg=dark --palette=linux|solarized|tango|xterm --css-only|--body-only E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html' >&2 exit } if test $1 = "--help" { usage } proc processArg { test $1 = "--bg=dark" && do { setglobal dark_bg = 'yes'; return; } test $1 = "--css-only" && do { setglobal css_only = 'yes'; return; } test $1 = "--body-only" && do { setglobal body_only = 'yes'; return; } if test $1 = "--palette=solarized" { # See http://ethanschoonover.com/solarized setglobal P0 = '073642'; setglobal P1 = 'D30102'; setglobal P2 = '859900'; setglobal P3 = 'B58900'; setglobal P4 = '268BD2'; setglobal P5 = 'D33682'; setglobal P6 = '2AA198'; setglobal P7 = 'EEE8D5'; setglobal P8 = '002B36'; setglobal P9 = 'CB4B16'; setglobal P10 = '586E75'; setglobal P11 = '657B83'; setglobal P12 = '839496'; setglobal P13 = '6C71C4'; setglobal P14 = '93A1A1'; setglobal P15 = 'FDF6E3'; return; } elif test $1 = "--palette=solarized-xterm" { # Above mapped onto the xterm 256 color palette setglobal P0 = '262626'; setglobal P1 = 'AF0000'; setglobal P2 = '5F8700'; setglobal P3 = 'AF8700'; setglobal P4 = '0087FF'; setglobal P5 = 'AF005F'; setglobal P6 = '00AFAF'; setglobal P7 = 'E4E4E4'; setglobal P8 = '1C1C1C'; setglobal P9 = 'D75F00'; setglobal P10 = '585858'; setglobal P11 = '626262'; setglobal P12 = '808080'; setglobal P13 = '5F5FAF'; setglobal P14 = '8A8A8A'; setglobal P15 = 'FFFFD7'; return; } elif test $1 = "--palette=tango" { # Gnome default setglobal P0 = '000000'; setglobal P1 = 'CC0000'; setglobal P2 = '4E9A06'; setglobal P3 = 'C4A000'; setglobal P4 = '3465A4'; setglobal P5 = '75507B'; setglobal P6 = '06989A'; setglobal P7 = 'D3D7CF'; setglobal P8 = '555753'; setglobal P9 = 'EF2929'; setglobal P10 = '8AE234'; setglobal P11 = 'FCE94F'; setglobal P12 = '729FCF'; setglobal P13 = 'AD7FA8'; setglobal P14 = '34E2E2'; setglobal P15 = 'EEEEEC'; return; } elif test $1 = "--palette=xterm" { setglobal P0 = '000000'; setglobal P1 = 'CD0000'; setglobal P2 = '00CD00'; setglobal P3 = 'CDCD00'; setglobal P4 = '0000EE'; setglobal P5 = 'CD00CD'; setglobal P6 = '00CDCD'; setglobal P7 = 'E5E5E5'; setglobal P8 = '7F7F7F'; setglobal P9 = 'FF0000'; setglobal P10 = '00FF00'; setglobal P11 = 'FFFF00'; setglobal P12 = '5C5CFF'; setglobal P13 = 'FF00FF'; setglobal P14 = '00FFFF'; setglobal P15 = 'FFFFFF'; return; } else { # linux console setglobal P0 = '000000'; setglobal P1 = 'AA0000'; setglobal P2 = '00AA00'; setglobal P3 = 'AA5500'; setglobal P4 = '0000AA'; setglobal P5 = 'AA00AA'; setglobal P6 = '00AAAA'; setglobal P7 = 'AAAAAA'; setglobal P8 = '555555'; setglobal P9 = 'FF5555'; setglobal P10 = '55FF55'; setglobal P11 = 'FFFF55'; setglobal P12 = '5555FF'; setglobal P13 = 'FF55FF'; setglobal P14 = '55FFFF'; setglobal P15 = 'FFFFFF'; test $1 = "--palette=linux" && return; } } processArg #defaults for var in [@ARGV] { processArg $var; } test $css_only && test $body_only && usage # Mac OSX's GNU sed is installed as gsed # use e.g. homebrew 'gnu-sed' to get it if ! sed --version >/dev/null 2>&1 { if gsed --version >/dev/null 2>&1 { alias sed=gsed } else { echo "Error, can't find an acceptable GNU sed." >&2 exit 1 } } test $css_only || test $body_only || printf '%s' "
'
test $body_only && printf '%s\n' 'Be sure to use  and 
' >&2

setglobal p = ''\x1b\[''        #shortcut to match escape codes

# Handle various xterm control sequences.
# See /usr/share/doc/xterm-*/ctlseqs.txt
sed "
# escape ampersand and quote
s#&#\&#g; s#\"#\"#g;
s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)

s#\r\$## # strip trailing \r

# strip other non SGR escape sequences
s#[\x07]##g
s#\x1b[]>=\][0-9;]*##g
s#\x1bP+.\{5\}##g
# Mark cursor positioning codes \"Jr;c;
s#$(p)\([0-9]\{1,2\}\)G#\"J;\1;#g
s#$(p)\([0-9]\{1,2\}\);\([0-9]\{1,2\}\)H#\"J\1;\2;#g

# Mark clear as \"Cn where n=1 is screen and n=0 is to end-of-line
s#$(p)H#\"C1;#g
s#$(p)K#\"C0;#g
# Mark Cursor move columns as \"Mn where n is +ve for right, -ve for left
s#$(p)C#\"M1;#g
s#$(p)\([0-9]\{1,\}\)C#\"M\1;#g
s#$(p)\([0-9]\{1,\}\)D#\"M-\1;#g
s#$(p)\([0-9]\{1,\}\)P#\"X\1;#g

s#$(p)[0-9;?]*[^0-9;?m]##g

" |

# Normalize the input before transformation
sed "
# escape HTML (ampersand and quote done above)
s#>#\>#g; s#<#\<#g;

# normalize SGR codes a little

# split 256 colors out and mark so that they're not
# recognised by the following 'split combined' line
:e
s#$(p)\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#$(p)\1m$(p)¬\2m#g; t e
s#$(p)\([34]8;5;[0-9]\{1,3\}\)m#$(p)¬\1m#g;

:c
s#$(p)\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#$(p)\1m$(p)\2m#g; t c   # split combined
s#$(p)0\([0-7]\)#$(p)\1#g                                 #strip leading 0
s#$(p)1m\(\($(p)[4579]m\)*\)#\1$(p)1m#g                   #bold last (with clr)
s#$(p)m#$(p)0m#g                                          #add leading 0 to norm

# undo any 256 color marking
s#$(p)¬\([34]8;5;[0-9]\{1,3\}\)m#$(p)\1m#g;

# map 16 color codes to color + bold
s#$(p)9\([0-7]\)m#$(p)3\1m$(p)1m#g;
s#$(p)10\([0-7]\)m#$(p)4\1m$(p)1m#g;

# change 'reset' code to \"R
s#$(p)0m#\"R;#g
" |

# Convert SGR sequences to HTML
sed "
# common combinations to minimise html (optional)
:f
s#$(p)3[0-7]m$(p)3\([0-7]\)m#$(p)3\1m#g; t f
:b
s#$(p)4[0-7]m$(p)4\([0-7]\)m#$(p)4\1m#g; t b
s#$(p)3\([0-7]\)m$(p)4\([0-7]\)m##g
s#$(p)4\([0-7]\)m$(p)3\([0-7]\)m##g

s#$(p)1m##g
s#$(p)4m##g
s#$(p)5m##g
s#$(p)7m##g
s#$(p)9m##g
s#$(p)3\([0-9]\)m##g
s#$(p)4\([0-9]\)m##g

s#$(p)38;5;\([0-9]\{1,3\}\)m##g
s#$(p)48;5;\([0-9]\{1,3\}\)m##g

s#$(p)[0-9;]*m##g # strip unhandled codes
" |

# Convert alternative character set and handle cursor movement codes
# Note we convert here, as if we do at start we have to worry about avoiding
# conversion of SGR codes etc., whereas doing here we only have to
# avoid conversions of stuff between &...; or <...>
#
# Note we could use sed to do this based around:
#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
# However that would be very awkward as we need to only conv some input.
# The basic scheme that we do in the awk script below is:
#  1. enable transliterate once "T1; is seen
#  2. disable once "T0; is seen (may be on diff line)
#  3. never transliterate between &; or <> chars
#  4. track x,y movements and active display mode at each position
#  5. buffer line/screen and dump when required
sed "
# change 'smacs' and 'rmacs' to \"T1 and \"T0 to simplify matching.
s#\x1b(0#\"T1;#g;
s#\x0E#\"T1;#g;

s#\x1b(B#\"T0;#g
s#\x0F#\"T0;#g
" |
shell {
gawk '
function dump_line(l,del,c,blanks,ret) {
  for(c=1;c")
  for(i=1;i<=spc;i++) {
    rm=rm?rm:(a[i]!=attr[i]">")
    if(rm) {
      ret=ret ""
      delete a[i];
    }
  }
  for(i=1;i"
    if(a[i]!=attr[i]) {
      a[i]=attr[i]
      ret = ret attr[i]
    }
  }
  return ret
}

function encode(string,start,end,i,ret,pos,sc,buf) {
   if(!end) end=length(string);
   if(!start) start=1;
   state=3
   for(i=1;i<=length(string);i++) {
     c=substr(string,i,1)
     if(state==2) {
       sc=sc c
       if(c==";") {
          c=sc
          state=last_mode
       } else continue
     } else {
       if(c=="\r") { x=1; continue }
       if(c=="<") {
         # Change attributes - store current active
         # attributes in span array
         split(substr(string,i),cord,">");
         i+=length(cord[1])
         span[++spc]=cord[1] ">"
         continue
       }
       else if(c=="&") {
         # All goes to single position till we see a semicolon
         sc=c
         state=2
         continue
       }
       else if(c=="\b") {
          # backspace move insertion point back 1
          if(spc) attr[x,y]=atos(span)
          x=x>1?x-1:1
          continue
       }
       else if(c=="\"") {
          split(substr(string,i+2),cord,";")
          cc=substr(string,i+1,1);
          if(cc=="T") {
              # Transliterate on/off
              if(cord[1]==1&&state==3) last_mode=state=4
              if(cord[1]==0&&state==4) last_mode=state=3
          }
          else if(cc=="C") {
              # Clear
              if(cord[1]+0) {
                # Screen - if Recording dump screen
                if(dumpStatus==dsActive) ret=ret dump_screen()
                dumpStatus=dsActive
                delete dump
                delete attr
                x=y=1
              } else {
                # To end of line
                for(pos=x;posmaxY) maxY=y
                # Change y - start recording
                dumpStatus=dumpStatus?dumpStatus:dsReset
              }
          }
          else if(cc=="M") {
              # Move left/right on current line
              x+=cord[1]
          }
          else if(cc=="X") {
              # delete on right
              for(pos=x;pos<=maxX;pos++) {
                nx=pos+cord[1]
                if(nx=start&&i<=end&&c in Trans) c=Trans[c]
     }
     if(dumpStatus==dsReset) {
       delete dump
       delete attr
       ret=ret"\n"
       dumpStatus=dsActive
     }
     if(dumpStatus==dsNew) {
       # After moving/clearing we are now ready to write
       # somthing to the screen so start recording now
       ret=ret"\n"
       dumpStatus=dsActive
     }
     if(dumpStatus==dsActive||dumpStatus==dsOff) {
       dump[x,y] = c
       if(!spc) delete attr[x,y]
       else attr[x,y] = atos(span)
       if(++x>maxX) maxX=x;
     }
    }
    # End of line if dumping increment y and set x back to first col
    x=1
    if(!dumpStatus) return ret dump_line(y,1);
    else if(++y>maxY) maxY=y;
    return ret
}
BEGIN{
  OFS=FS
  # dump screen status
  dsOff=0    # Not dumping screen contents just write output direct
  dsNew=1    # Just after move/clear waiting for activity to start recording
  dsReset=2  # Screen cleared build new empty buffer and record
  dsActive=3 # Currently recording
  F="abcdefghijklmnopqrstuvwxyz{}`~"
  T="▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·"
  maxX=80
  delete cur;
  x=y=1
  for(i=1;i<=length(F);i++)Trans[substr(F,i,1)]=substr(T,i,1);
}

{ $0=encode($0) }
1
END {
  if(dumpStatus) {
    print dump_screen();
  }
}'
}

test $body_only || printf '
\n'