Monday, June 8, 2009

Writing in Devnagari using Perl

Hello everyone! I have written this Perl script which will display characters of Devnagri. Well, this might sound preety vague, let me explain in detail.
Devnagari (देवनागरी) is an abugida of Hindi and Marathi languages which are followed in India. Suppose there is a web designer who wants to design a web site which can display in Hindi or Marathi how will he do it? He will use the following Perl script which I have written. This Perl script displays the Devnagari using the standard Unicode characters. Each Devnagari character has got a unique Unicode value.
Now the keyboard layout has got only English alphabets, using certain combinations of english alphabets this Perl script will display in Devnagari.

Vowels

Consonants

Other Characters




#Typing In Unicode
# By Prabhat Godse
binmode(STDOUT, ":utf8");
#! usr/bin/perl
#Unicode single consonants and their HEX value
$c{'k'}="\x{0915}";
$c{'g'}="\x{0917}";
$c{'C'}="\x{091B}";
$c{'j'}="\x{091C}";
$c{'T'}="\x{091F}";
$c{'D'}="\x{0921}";
$c{'N'}="\x{0923}";
$c{'t'}="\x{0924}";
$c{'d'}="\x{0926}";
$c{'n'}="\x{0928}";
$c{'p'}="\x{092A}";
$c{'b'}="\x{092C}";
$c{'m'}="\x{092E}";
$c{'y'}="\x{092F}";
$c{'r'}="\x{0930}";
$c{'l'}="\x{0932}";
$c{'L'}="\x{0933}";
$c{'v'}="\x{0935}";
$c{'s'}="\x{0938}";
$c{'h'}="\x{0939}";

#Unicode double character consonants

$c_d{'kh'}="\x{0916}";
$c_d{'jh'}="\x{091D}";
$c_d{'bh'}="\x{092D}";
$c_d{'sh'}="\x{0936}";
$c_d{'Sh'}="\x{0937}";
$c_d{'ph'}="\x{092B}";
$c_d{'gh'}="\x{0918}";
$c_d{'ch'}="\x{091A}";
$c_d{'dh'}="\x{0927}";
$c_d{'Nj'}="\x{091E}";
$c_d{'Th'}="\x{0920}";
$c_d{'th'}="\x{0925}";
$c_d{'Dh'}="\x{0922}";
$c_d{'tr'}="\x{0924}\x{094D}\x{0930}";

#Unicode Independent Vowel

$i_v{'a'}= "\x{0905}";
$i_v{'A'}= "\x{0906}";
$i_v{'i'}= "\x{0907}";
$i_v{'I'}= "\x{0908}";
$i_v{'u'}= "\x{0909}";
$i_v{'U'}= "\x{090A}";
$i_v{'e'}= "\x{090F}";
$i_v_d{'AY'}= "\x{0910}";
$i_v{'o'}= "\x{0913}";
$i_v_d{'AW'}= "\x{0914}";
$i_v_d{'OE'}= "\x{0911}";
$i_v_d{'AE'}= "\x{090D}";
$i_v_t{'HRI'}="\x{090B}";

# Unicode Dependent Vowel

$d_v{'A'}= "\x{093E}";
$d_v{'i'}= "\x{093F}";
$d_v{'I'}= "\x{0940}";
$d_v{'u'}= "\x{0941}";
$d_v{'U'}= "\x{0942}";
$d_v{'e'}= "\x{0947}";
$d_v_d{'AY'}= "\x{0948}";
$d_v_d{'OE'}= "\x{0911}";
$d_v{':'}= "\x{0903}";
$d_v{'o'}= "\x{094B}";
$d_v_d{'AW'}= "\x{094C}";
$d_v{'`'}= "\x{094D}\x{0930}";
$d_v{','}= "\x{093C}";
$d_v{'^'}= "\x{0901}";
$d_v_d{'AE'}= "\x{0945}";
$d_v_t{'HRI'}="\x{0943}";

#Special Characters

$s_s{'|'}= "\x{093D}";
$s{'.'}= ".";
$s_s{'\\'}= "\x{094d}";
$s_s{'M'}= "\x{0902}";
$s_d{'hy'}= "\x{0939}\x{094D}\x{092F}";
$s_d{'dy'}= "\x{0926}\x{094D}\x{092F}";
$s_d{'kt'}= "\x{0915}\x{094D}\x{0924}";
$s_d{'tr'}= "\x{0924}\x{094D}\x{0930}";
$s_d{'TT'}= "\x{091F}\x{094D}\x{0920}";
$s_d{'OM'}= "\x{0950}";
$s_t_s{'.r'}="\x{0930}\x{094D}\x{200D}";
$s_t{'kSh'}= "\x{0915}\x{094D}\x{0937}";
$s_t{'Shr'}= "\x{0936}\x{094D}\x{0930}";
$s_t{'dny'}= "\x{091C}\x{094D}\x{091E}";
$s_t{'chh'}= "\x{091B}";
$s_t{'ddh'}= "\x{0926}\x{094D}\x{0927}";
$s_q{'ThTh'}="\x{0920}\x{094D}\x{0920}";

#numbers
$num{'0'}="\x{0966}";
$num{'1'}="\x{0967}";
$num{'2'}="\x{0968}";
$num{'3'}="\x{0969}";
$num{'4'}="\x{096A}";
$num{'5'}="\x{096B}";
$num{'6'}="\x{096C}";
$num{'7'}="\x{096D}";
$num{'8'}="\x{096E}";
$num{'9'}="\x{096F}";



#End of Map Start of Logic
$prev_char="vowel";
$space=" -";
$nchar=0;
$location=0;
$hex;
$input=<>; # ENTER YOUR INPUT HERE................................<-<- @array=split(//,$input); $temp=0; $size=length($input); # main loop while($location!= $size) { $weird=0; #Check For Numbers $temp=@array[$location]; if($num{"$temp"}) { $location++; $weird=1; $hext=$num{"$temp"}; $prev_char="vowel"; $hex="$hex$hext"; $temp=0; } #check if character is fowarded by character sub char { $tempc=0; $tempc="@array[$location]@array[$location+1]"; if($c_d{"$tempc"}) { $nchar=1; $tempc=0; } $tempc=@array[$location]; if($c{"$tempc"}) { $nchar=1; $tempc=0; } $tempc="@array[$location]@array[$location+1]@array[$location+2]@array[$location+3]"; if($s_q{"$tempc"}) { $nchar=1; $tempc=0; } $tempc="@array[$location]@array[$location+1]@array[$location+2]"; if($s_t{"$tempc"}) { $nchar=1; $tempc=0; } $tempc="@array[$location]@array[$location+1]"; if($s_d{"$tempc"}) { $nchar=1; $tempc=0; } $tempc=@array[$location]; if($s_s{"$tempc"}) { $nchar=1; $tempc=0; } } #check for special charaters $temp="@array[$location]@array[$location+1]"; if($s_t_s{"$temp"}) { $location=$location+2; $weird=1; $hext=$s_t_s{"$temp"}; $prev_char="vowel"; $hex="$hex$hext"; $temp=0; } $temp=@array[$location]; if($s{"$temp"}) { $location++; $weird=1; $hext=$s{"$temp"}; $hex="$hex$hext"; $prev_char="vowel"; $temp=0; } $temp="@array[$location]@array[$location+1]@array[$location+2]"; if($temp eq kSh || $temp eq Shr) { $hext=$s_t{"$temp"}; $location=$location+3; $temp=0; $weird=1; $prev_char="consonant"; char; if($nchar eq 0) { $hex="$hex$hext"; } else { $hex="$hex$hext"; $hex="$hex\x{094D}"; } $nchar=0; } $temp="@array[$location]@array[$location+1]@array[$location+2]@array[$location+3]"; if($s_q{"$temp"}) { $hext=$s_q{"$temp"}; $hex="$hex$hext"; $temp=0; $weird=1; $prev_char="consonant"; $location=$location+4; } else { $temp=0; $temp="@array[$location]@array[$location+1]@array[$location+2]"; if($s_t{"$temp"}) { $hext=$s_t{"$temp"}; $hex="$hex$hext"; $temp=0; $weird=1; $prev_char="consonant"; $location=$location+3; } else { $temp="@array[$location]@array[$location+1]"; if($s_d{"$temp"}) { $hext=$s_d{"$temp"}; $hex="$hex$hext"; $temp=0; $weird=1; $prev_char="consonant"; $location=$location+2; } else { $temp=@array[$location]; if($s_s{"$temp"}) { $hext=$s_s{"$temp"}; $hex="$hex$hext"; $temp=0; $weird=1; $prev_char="consonant"; $location++; } } } } #check the consonant $temp="@array[$location]@array[$location+1]"; if($c_d{"$temp"}) { $hext=$c_d{"$temp"}; $location=$location+2; $temp=0; $weird=1; $prev_char="consonant"; char; if($nchar eq 0) { $hex="$hex$hext"; } else { $hex="$hex$hext"; $hex="$hex\x{094D}"; } } else { if(@array[$location+1] eq R) { $temp=0; } else { $temp=@array[$location]; } if($c{"$temp"}) { $hext=$c{"$temp"}; $location++; $weird=1; $prev_char="consonant"; char; if($nchar eq 0) { $hex="$hex$hext"; } else { $hex="$hex$hext"; $hex="$hex\x{094D}"; } } $temp=0; } $nchar=0; #consonant followed by an independent vowel if($location>0)
{
if(@array[$location] eq a)
{
$temp="@array[$location+1]@array[$location+2]";
if($i_v_d{"$temp"})
{
$location++;
$prev_char="vowel";
$temp=0;

}
}


if(@array[$location] eq a)
{
$temp=@array[$location+1];
if($i_v{"$temp"})
{
$location++;
$prev_char="vowel";
}

$temp="@array[$location+1]@array[$location+2]@array[$location+3]";
if($i_v_t{"$temp"})
{
$location++;
$prev_char="vowel";

}

$temp=0;
}
}

#check for vowel

if($prev_char eq vowel)
{
$temp="@array[$location]@array[$location+1]";
if($i_v_d{"$temp"})
{
$hext=$i_v_d{"$temp"};
$hex="$hex$hext";
$weird=1;
$location=$location+2;
$temp=0;
$prev_char="vowel";
}
else
{
$temp=@array[$location];
if($i_v{"$temp"})
{
$hext=$i_v{"$temp"};
$hex="$hex$hext";
$weird=1;
$location++;
$temp=0;
$prev_char="vowel";
}
else
{ $temp="@array[$location]@array[$location+1]@array[$location+2]"; }

if($i_v_t{"$temp"})
{
$hext=$i_v_t{"$temp"};
$hex="$hex$hext";
$weird=1;
$location=$location+3;
$temp=0;
$prev_char="vowel";
}
}
}

# Consonant followed By Dependent Vowel

if($prev_char eq consonant)
{
$temp="@array[$location]@array[$location+1]@array[$location+2]";
if($d_v_t{"$temp"})
{
$hext=$d_v_t{"$temp"};
$hex="$hex$hext";
$location=$location+3;
$weird=1;
$temp=0;
$prev_char="vowel";
}
$temp="@array[$location]@array[$location+1]";
if($d_v_d{"$temp"})
{
$hext=$d_v_d{"$temp"};
$hex="$hex$hext";
$location=$location+2;
$weird=1;
$temp=0;
$prev_char="vowel";
}
else
{ $temp=@array[$location]; }

if($d_v{"$temp"})
{
$hext=$d_v{"$temp"};
$hex="$hex$hext";
$weird=1;
$location++;
$temp=0;
$prev_char="vowel";
}
}

#check if no input is entered

if($weird eq 0)
{
if(@array[$location] eq a)
{
$location++;
$prev_char="vowel";
}
else
{
$hex="$hex@array[$location]";
$location++;
$prev_char="vowel";
}

}

}

printf $hex;


1 comments:

chorny said...

Add "use strict;use warnings;use diagnostics;" at start of your code, after #!. It will help to write better Perl.
Later you can drop last one.

if(@array[$location] eq a)
should be
if( $array[$location] eq 'a')

Post a Comment