#!/usr/bin/perl -w
use strict;
=SUMMARY
This code filters special characters out of a string. NOTE that in this
code, characters considered to be "normal" are those of the universal
ISO-8859-1 Latin character set. This is particulary useful in CGI scripts
that work with unicode or for those looking for a saftey net when running
CGI scripts from non-Latin-based locales.
The code could be further optimized without too much effort, but it's good
enough for most applications as is and I'm leaving it alone to avoid further
obfuscation.
=cut
# This next line just lets me move a big example text block to the end of the
# script instead of stuffing it in right here.
use vars qw( $ex );
# The code which filters out non-ASCII characters in a string
my(@asc,@esc) = ((),()); my(@chars) = split(//,${\eval('"'.$ex.'"')});
my($IN) = sub { my($c) = shift; foreach (@_) { return 1 if ($c==$_) } 0 };
my($nOK) = sub { my($c) = shift||''; return(0) unless length($c);$c=ord$c;
($c >255) ? 1 : &$IN($c,(0..9,11,12,14..31,127,129,139..145,157,158,173))};
foreach (@chars) { if (&$nOK($_)) { push(@esc,$_) } else { push(@asc,$_) } }
=AND NOW FOR AN EXAMPLE
The rest of this code is a quick demo of the filtering code. The example
filters out all the non-ASCII characters to display the text of the first
chapter in the book of Genesis as it appears in the King James version of
the Old Testament, which is found in the Bible. In order to show that the
code really got rid of any junk characters, all of the non-ASCII characters
that were filtered out will then be dumped in non-escaped form.
=cut
use Text::Wrap; $Text::Wrap::columns = 80;
print(@asc, qq[\012\012FILTERED OUT:\012\012], wrap('','',@esc), qq[\012] x 2);
=NOTE
Non-ASCII characters in the big chunk of text down there are all escaped to
octal format so I don't have to put a bunch of unrecognizable characters
into this file and confuse/crash your system terminal or browser (or
whatever else you are using to look at this).
=cut
INIT { $ex = <<'__CHR__'; $ex =~ s/\r|\n//g; }
\0121\016\040\040In\040th\010e\040beg\034inn\023ing\021\040Go\034d\040c\022rea\
021ted\034\040th\021e\040\025hea\035ven\023\040and\040th\034e\040e\023art\020h\
056\012\034\0122\040\022\040An\027d\040t\034he\040\026ear\022th\040was\010\040w
ithou\010t\040form\054\040\034and\022\040vo\026id\073\034\040an\022d\040d\027ar
\034kne\021ss\040\022was\034\040up\020on\040\022the\034\040fa\023ce\040\020of\0
12\034\040\040\040the\040\027dee\034p\056\027\040An\020d\040t\034he\040\025Spi\
026rit\034\040of\021\040Go\025d\040m\034ove\021d\040u\023pon\034\040th\023e\025
\040fa\034ce\040\020of\040\025the\034\040wa\022ter\020s\056\012\034\0123\040\02
1\040An\022d\040G\035od\040said\023\054\040\034Let\023\040th\023ere\035\040be\0
27\040li\021ght\035\072\040a\020nd\040\021the\034re\040\025was\020\040li\035g\0
20ht\056\027\012\0124\034\040\040A\021nd\040\022God\034\040sa\027w\040t\023he\0
40\034lig\025ht\054\024\040th\035at\040\023it\023\040wa\034s\040g\023ood\020\07
2\040a\034nd\040\026God\022\040di\035vid\025ed\040\022the\034\040li\027ght\024\
040\035fro\025m\040t\022he\012\034\040\040\040\021dar\022kne\034ss\056\020\012\
0125\022\040\040A\034nd\040\021God\024\040ca\035ll\024ed\040\027the\034\040li\0
21ght\026\040Da\034y\054\040\022and\020\040th\034e\040d\025ark\025nes\034s\040h
\027e\040\021cal\034led\022\040Ni\021ght\034\056\040A\023nd\040\020the\034\012\
040\040\022\040ev\024eni\034ng\040\024and\025\040t\035he\040\020mor\025nin\035g
\040w\022ere\020\040th\035e\040f\027irs\021t\040d\034ay\056\021\012\0126\020\04
0\040A\034n\027d\040G\024od\040said\054\040Let\040there\040be\040a\016\040firma
ment\040\016in\040the\040mid\016st\040of\040the\040waters\054\040and\040let\012
\040\040\040it\040di\016vide\040th\016e\040waters\040from\040the\040wate\016rs\
056\012\0127\040\016\040An\016d\040God\040m\016ade\040the\040firmam\010ent\054\
040and\040di\016vided\040the\040w\016aters\040w\016hich\040were\040under\040the
\016\012\040\040\040fir\016mament\040\016fro\016m\040the\040waters\040which\040
\016were\040ab\016ove\040the\040firmament\072\040and\040i\010t\040was\040so\056
\012\0128\040\040And\040God\040c\016all\016ed\040\016the\016\040firmament\040\0
16Hea\016ven\056\040And\040the\040e\016vening\040and\040\016the\040morning\040w
ere\012\010\040\040\040th\010e\040second\040day\056\012\0129\040\040And\040\016
God\016\040said\054\040Le\016t\040t\016he\040waters\040\016under\040the\040heav
en\040be\016\040ga\016thered\040togethe\016r\040u\016nto\040o\021ne\012\023\040
\040\040\034pla\027ce\054\023\040an\034d\040l\023et\040\025the\034\040dr\021y\0
40l\025and\035\040ap\025pe\027ar\072\034\040an\022d\040i\026t\040w\034as\040\02
3so\056\020\012\0121\035\040A\027nd\040\021God\034\040ca\027lle\024d\034\040th\
021e\040d\026ry\040\034lan\025d\040E\020art\034h\073\040\021and\025\040th\035e\
040g\020ath\025eri\034ng\022\040to\021get\034her\023\040of\023\040th\035e\040w\
023ate\021rs\012\034\040\040\040\020cal\020led\035\040he\020\040S\025eas\035\07
2\040a\022nd\040\020God\034\040sa\020w\040t\020hat\034\040it\021\040wa\022s\040
g\034ood\022\056\012\012\02611\034\040An\020d\040G\021od\040\034sai\027d\054\04
0\020Let\034\040th\024e\040e\025art\035h\040b\027rin\023g\040f\034o\025rth\024\
040gr\034ass\023\054\040t\021he\040\034her\021b\040y\023iel\034din\023g\040s\02
6eed\034\054\040a\023nd\025\012\040\040\034\040th\021e\040f\025rui\034t\040t\02
2ree\026\040yi\034eld\020ing\020\040fr\034uit\023\040af\020t\035er\040\027his\0
21\040ki\034nd\054\027\040wh\023ose\034\040se\026ed\040\026is\040\034in\040\023
its\025elf\035\054\040\025upo\025n\012\040\035\040\040t\027he\040\021ear\034th\
072\021\040an\022d\040i\034t\040w\020as\040\022so\056\034\012\0121\0232\020\040
An\034d\040t\023he\040\021ear\034th\040\021bro\023ugh\034t\040f\021ort\025h\040
g\034ras\026s\054\040\026an\027d\040h\021erb\034\040yi\022eld\026ing\034\040se\
022ed\040\021aft\034er\040\023his\021\040ki\035nd\054\023\012\040\023\040\040a\
034nd\040\021the\022\040tr\035ee\040\021yie\020ldi\034ng\040\023fru\025it\054\0
34\040wh\023ose\023\040s\034eed\021\040wa\022s\040i\034n\040i\023tse\020lf\054\
035\040af\022ter\023\040hi\035s\040k\021ind\026\072\040a\035n\024d\012\040\020\
040\040G\034od\040\026saw\040tha\035t\040i\027t\040w\023as\040\034goo\027d\056\
012\020\01213\034\040An\023d\040\025the\035\040ev\027eni\021ng\040\034and\026\0
40th\021e\040m\035orn\021ing\022\040we\034re\040\025the\023\040\034thi\020rd\04
0\025day\034\056\012\012\02214\040\020And\034\040Go\025d\040s\024aid\034\054\04
0L\021et\040\026the\034re\023\040be\020\040li\034ght\020s\040i\025n\040t\034he\
040\021fir\027mam\035ent\020\040of\021\040th\035e\040h\025e\020ave\035n\040t\02
7o\040d\021ivi\034de\012\022\040\040\040\020the\034\040da\024y\040f\025rom\034\
040th\025e\040n\022ig\034ht\073\022\040an\024d\040l\035et\040\021the\020m\040b\
034e\040f\025or\040\021sig\034ns\054\027\040an\021d\040f\034or\027\040se\023aso
\034ns\054\027\040an\020d\040f\034or\012\025\040\040\040\024day\035s\054\040\02
0and\021\040ye\035ars\024\072\012\020\01215\034\040An\020d\040l\020et\040\034th
e\021m\040b\022e\040f\034or\040\020lig\022hts\034\040in\023\040th\020e\034\040f
i\026rma\021men\034t\040o\024f\040t\025he\040\034hea\023ven\021\040to\034\040gi
\021ve\040\023lig\035ht\020\040up\027on\012\035the\020\040ea\025rth\034\072\040
a\021nd\040\020it\040\034was\023\040so\026\056\012\012\03516\040\027A\021nd\040
\034God\026\040ma\020de\040\034two\023\040gr\021eat\034\040li\022ght\021s\073\0
40\035the\020\040gr\026ea\034ter\020\040li\024ght\034\040to\021\040ru\027le\040
\034the\025\040da\023y\054\040\034and\023\040th\025e\012\040\034\040\025\040le\
023sse\034r\040l\027igh\020t\040t\034o\040rule\040\026the\034\040ni\023ght\020\
072\040h\034e\040m\023ad\027e\040t\035he\040\022sta\023rs\040\035als\020o\056\0
12\021\01217\035\040An\027d\040G\023od\040\034set\025\040th\022em\020\040in\020
\040th\035e\040f\021irm\020ame\034nt\040\022of\040\020the\034\040he\021ave\022n
\040t\034o\040g\025iv\021e\040l\034igh\020t\040u\025pon\035\040th\020e\012\040\
027\040\040e\034art\026h\054\012\021\01218\034\040An\020d\040t\024o\035\040ru\0
20le\040\020ove\035r\040t\020he\040\021day\040and\016\040over\040the\040night\0
54\016\040an\016d\040to\040di\016vide\040the\040li\016ght\016\040from\012\040\0
40\040the\040dark\010ness\072\040and\040God\040s\016aw\040\016that\040it\016\04
0wa\016s\040good\056\012\01219\040And\040the\040evening\040and\040th\016e\040mo
rni\016ng\040were\017\040th\016e\040fou\016rth\040day\056\012\0122\040A\016nd\0
40\016God\040said\054\040Let\040the\040waters\040br\016ing\016\040forth\040abu\
016ndantly\040the\040moving\040crea\016ture\040that\012\040\040\040ha\016th\040
life\054\040and\040fowl\040that\040may\040f\016ly\040above\040the\040earth\040\
016in\040\016the\040op\016en\040firmament\040of\016\012\040\040\017\040heav\016
en\056\012\01221\016\040An\016d\040God\040created\040g\016reat\040whales\054\04
0and\040every\040\016living\040creatur\016e\040that\040moveth\054\040which\012\
040\040\040the\040w\016aters\040b\016rought\040for\016th\040abun\016dantly\054\
040after\040the\016ir\040kind\054\040an\017d\040every\040wi\016nged\040fo\016wl
\012\040\040\040after\040his\040kind\072\040a\016nd\040\016God\040saw\040that\0
40i\016t\040was\040g\016ood\016\056\012\01222\040And\040G\016od\040blessed\040\
017the\016m\054\040sayi\016ng\054\040Be\016\040fruitf\016ul\054\040and\016\040m
u\016ltipl\016y\054\040and\040fill\040the\012\040\040\040wa\016ter\016s\040in\0
40the\040se\017as\054\040and\016\040le\016t\040fowl\040\016multiply\040i\016n\0
40t\016he\040earth\056\012\01223\040And\040t\016he\040even\010ing\040a\034nd\04
0\021th\025e\040m\035orn\020ing\025\040we\035re\040\025the\020\040fi\034fth\020
\040da\020y\056\012\034\01224\021\040An\025d\040\035God\025\040sa\027id\054\034
\040Le\022t\040t\026he\040\034ear\020th\040\020bri\034ng\040\023for\026th\040\0
34th\021e\040l\025ivi\034ng\040\022cre\026atu\034re\040\020aft\020er\040his\040
\010kind\054\012\040\010\040\040cattl\034e\054\040\025and\021\040cr\034eep\020i
ng\021\040th\034ing\026\054\040a\020nd\040\034bea\027st\022\040of\034\040th\025
e\040e\023art\034h\040a\024fte\021r\040h\035is\040\020kin\027d\072\040\034and\0
23\040it\025\012\040\034\040\040w\021as\040\025so\056\034\012\0122\0275\040A\02
0nd\040\034God\020\040ma\024de\040\034the\027\040be\021ast\034\040o\023f\040t\0
20he\040\034ear\020th\040\020aft\034er\040his\040\027kin\034d\054\040\022and\02
3\040ca\034ttl\022e\040\024aft\034er\040\024the\025ir\012\035\040\040\040\020ki
n\025d\054\040\035and\022\040ev\020ery\034\040th\020ing\020\040t\034hat\020\040
cr\025eep\034eth\021\040up\027on\040\034the\040ear\027th\040\034aft\027er\040\0
20his\034\040k\025ind\026\072\040a\034nd\040\021God\025\012\040\040\034\040sa\0
21w\040t\023hat\040i\034t\040w\027as\040\021goo\035d\025\056\012\012\02226\040\
034And\022\040Go\026d\040s\035aid\021\054\040L\021et\040\034us\040\027mak\020e\
040m\034an\040\024in\025\040ou\035r\040i\020mag\020e\054\040\034aft\023er\040\0
25our\034\040li\021ken\022ess\034\072\040a\020nd\040\023l\034et\040\024the\024m
\012\040\034\040\040h\027ave\040domin\034ion\020\040ov\020er\040\034the\027\040
fi\024sh\034\040of\021\040th\021e\040s\034ea\054\020\040an\025d\040o\034ver\022
\040th\020e\040f\034owl\021\040of\022\040th\035e\040\021air\023\054\040a\034nd\
012\021\040\040\040\026ove\034r\040t\027he\040\021cat\034tle\027\054\040a\021nd
\040\034ove\022r\040\021all\035\040th\027e\040e\021art\034h\054\040\025and\021\
040ov\034er\040\025eve\026ry\040\034cre\020epi\025ng\034\040th\021ing\027\040th
\034at\012\023\040\040\040\020cre\035epe\023th\040\020upo\034n\040t\026he\040\0
20ear\035th\020\056\012\012\02727\040\034So\040\023God\020\040cr\034eat\027ed\0
40\021man\034\040in\027\040hi\020s\040o\034wn\040\025im\024age\035\054\040i\027
n\040t\023he\040\034ima\027ge\040\023of\040\034God\023\040create\034d\040h\025e
\040h\024im\034\073\012\040\021\040\040m\022ale\034\040an\020d\040f\022ema\034l
e\040\021cre\020ate\034d\040h\021e\040t\022hem\034\056\012\027\01228\023\040An\
034d\040G\023od\040\020ble\034sse\023d\040t\021hem\034\054\040a\021nd\040\023Go
d\035\040sa\027id\021\040un\034to\040\021the\025m\054\040\034Be\040\025fru\021i
tf\034ul\054\020\040an\025d\040m\034ultiply\027\054\012\034\040\040\040\022and\
023\040re\034ple\025nis\021h\040t\034he\040\027ear\021th\054\034\040an\025d\040
s\024ubd\034ue\021\040it\026\072\040a\034nd\040\023hav\020e\040d\035omi\023nio\
020n\040o\034ver\026\040th\020e\040f\035ish\020\040o\021f\012\040\035\040\040t\
027he\040\025sea\034\054\040a\021nd\040\022ove\035r\040t\020he\040fowl\034\040o
f\020\040th\020e\040\034air\021\054\040a\022nd\040\034ove\020r\040e\022ver\034y
\040l\023ivi\020ng\040\034thi\023ng\040\021tha\034t\012\035\040\040\040\022mov\
021eth\034\040up\023on\040\027the\034\040ea\022rth\026\056\012\012\03429\040\02
7And\024\040Go\034d\040\026sai\023d\054\040\034Beh\023old\020\054\040I\034\040h
a\022ve\040\021giv\035en\040\024you\021\040ev\034ery\020\040h\020erb\034\040be\
027ari\021ng\040\034see\024d\054\040\020whi\035ch\040\027is\012\021\040\040\040
\034upo\020n\040t\025he\034\040fa\022ce\040\020of\040\034all\020\040th\024e\040
e\034art\027h\054\040\020and\034\040every\040\026tre\034e\054\021\040in\020\040
th\034e\040w\023hic\022h\040i\034s\040t\023he\040\021fru\034it\040\021of\012\02
3\040\040\040\035a\040t\027re\021e\040y\035iel\020din\027g\040s\034eed\024\073\
040t\027o\040y\034ou\040\027it\040\023sha\034ll\040\020be\040\024fo\034r\040m\0
25eat\027\056\012\012\0343\040\021And\025\040to\034\040ev\023ery\025\040be\034a
st\023\040of\023\040th\034e\040\021ear\022th\054\034\040an\027d\040t\024o\040e\
035ver\020y\040f\025owl\034\040of\022\040th\021e\040a\034ir\054\021\040a\027nd\
040\034to\040\025eve\023ry\012\034\040\040\040\023thi\020ng\040\034tha\023t\040
c\025ree\034pet\025h\040u\023po\034n\040t\027he\040\020ear\034th\054\040whe\026
rei\035n\040t\027her\023e\040i\035s\040l\020ife\021\054\040I\034\040h\024ave\02
3\040gi\035ven\025\012\040\040\023\040ev\034ery\022\040gr\020een\034\040he\020r
b\040\024for\034\040me\025at\027\072\040a\034nd\040\025it\040\024was\034\040so\
022\056\012\012\02031\040\034And\023\040Go\021d\040s\034aw\040\021eve\023ry\035
\040th\027ing\021\040th\034at\040\021he\040\020had\034\040ma\021de\054\025\040a
n\034d\054\040\027beh\027old\035\054\040\026it\040\026was\034\040ve\023ry\040\0
27goo\034d\056\040\025And\023\012\040\040\035\040th\021e\040e\020ven\034ing\021
\040a\020nd\040\034the\027\040mo\021rni\034ng\040\023wer\025e\040t\034he\040\02
3six\023th\040\034day\021\056\012
__CHR__