こちらに触発されて。
以下のperl scriptに前処理をさせました。
#!/usr/local/bin/perl use strict; use warnings; use Encode; use Unicode::UCD; my $sjis = find_encoding('shiftjis'); my $eucjp = find_encoding('eucjp'); binmode STDOUT, ':utf8'; print "<table>\n"; for my $ord (0..0xD7FF,0xE000..0xFFFE){ my $sjischr; # check if it can be printed in generic sjis eval { my $chr = chr $ord; $sjischr = $sjis->encode($chr, 1); }; next if $@; # check if it is either symbol or punctuation my $info = Unicode::UCD::charinfo($ord) or next; $info->{category} =~ /^[PS]/ or next; my $sjisseq = join "", map { sprintf "\\x%02X", $_ } unpack("C*",$sjischr); my $eucjpchr = $eucjp->encode(chr $ord); my $eucjpseq = join "", map { sprintf "\\x%02X", $_ } unpack("C*",$eucjpchr); printf <<EOT, $ord, $ord, $sjisseq, $eucjpseq, $info->{name}; <tr><th>&#x%02X;</th><td><tt>U+%04X</tt></td> <td><tt>%s</tt></td><td><tt>%s</tt></td><td>%s</td></tr> EOT } print "</table>";
考えかたとしては、すっぴんのShift_JIS (ascii + jis x 0201 + jis x 0208)に含まれる文字のうち、UnicodeでPunctuationないしSymbolとして分類されているものを抜き出した、ということです。
よって、以下の表にある約物であれば、日本語を扱える環境であればほぼ確実に表示できるということになります。
亀甲括弧がtortoise shell bracketなんて、もろ直訳で面白いですね。
Dan the Man with Too Many Symbols to Juggle
Unicode | Shift_JIS | EUC_JP | Unicode Name | |
! | U+0021 | \x21 | \x21 | EXCLAMATION MARK |
" | U+0022 | \x22 | \x22 | QUOTATION MARK |
# | U+0023 | \x23 | \x23 | NUMBER SIGN |
$ | U+0024 | \x24 | \x24 | DOLLAR SIGN |
% | U+0025 | \x25 | \x25 | PERCENT SIGN |
& | U+0026 | \x26 | \x26 | AMPERSAND |
' | U+0027 | \x27 | \x27 | APOSTROPHE |
( | U+0028 | \x28 | \x28 | LEFT PARENTHESIS |
) | U+0029 | \x29 | \x29 | RIGHT PARENTHESIS |
* | U+002A | \x2A | \x2A | ASTERISK |
+ | U+002B | \x2B | \x2B | PLUS SIGN |
, | U+002C | \x2C | \x2C | COMMA |
- | U+002D | \x2D | \x2D | HYPHEN-MINUS |
. | U+002E | \x2E | \x2E | FULL STOP |
/ | U+002F | \x2F | \x2F | SOLIDUS |
: | U+003A | \x3A | \x3A | COLON |
; | U+003B | \x3B | \x3B | SEMICOLON |
< | U+003C | \x3C | \x3C | LESS-THAN SIGN |
= | U+003D | \x3D | \x3D | EQUALS SIGN |
> | U+003E | \x3E | \x3E | GREATER-THAN SIGN |
? | U+003F | \x3F | \x3F | QUESTION MARK |
@ | U+0040 | \x40 | \x40 | COMMERCIAL AT |
[ | U+005B | \x5B | \x5B | LEFT SQUARE BRACKET |
\ | U+005C | \x5C | \x5C | REVERSE SOLIDUS |
] | U+005D | \x5D | \x5D | RIGHT SQUARE BRACKET |
^ | U+005E | \x5E | \x5E | CIRCUMFLEX ACCENT |
_ | U+005F | \x5F | \x5F | LOW LINE |
` | U+0060 | \x60 | \x60 | GRAVE ACCENT |
{ | U+007B | \x7B | \x7B | LEFT CURLY BRACKET |
| | U+007C | \x7C | \x7C | VERTICAL LINE |
} | U+007D | \x7D | \x7D | RIGHT CURLY BRACKET |
~ | U+007E | \x7E | \x7E | TILDE |
¢ | U+00A2 | \x81\x91 | \xA1\xF1 | CENT SIGN |
£ | U+00A3 | \x81\x92 | \xA1\xF2 | POUND SIGN |
§ | U+00A7 | \x81\x98 | \xA1\xF8 | SECTION SIGN |
¨ | U+00A8 | \x81\x4E | \xA1\xAF | DIAERESIS |
¬ | U+00AC | \x81\xCA | \xA2\xCC | NOT SIGN |
° | U+00B0 | \x81\x8B | \xA1\xEB | DEGREE SIGN |
± | U+00B1 | \x81\x7D | \xA1\xDE | PLUS-MINUS SIGN |
´ | U+00B4 | \x81\x4C | \xA1\xAD | ACUTE ACCENT |
¶ | U+00B6 | \x81\xF7 | \xA2\xF9 | PILCROW SIGN |
× | U+00D7 | \x81\x7E | \xA1\xDF | MULTIPLICATION SIGN |
÷ | U+00F7 | \x81\x80 | \xA1\xE0 | DIVISION SIGN |
‐ | U+2010 | \x81\x5D | \xA1\xBE | HYPHEN |
― | U+2015 | \x81\x5C | \xA1\xBD | HORIZONTAL BAR |
‖ | U+2016 | \x81\x61 | \xA1\xC2 | DOUBLE VERTICAL LINE |
‘ | U+2018 | \x81\x65 | \xA1\xC6 | LEFT SINGLE QUOTATION MARK |
’ | U+2019 | \x81\x66 | \xA1\xC7 | RIGHT SINGLE QUOTATION MARK |
“ | U+201C | \x81\x67 | \xA1\xC8 | LEFT DOUBLE QUOTATION MARK |
” | U+201D | \x81\x68 | \xA1\xC9 | RIGHT DOUBLE QUOTATION MARK |
† | U+2020 | \x81\xF5 | \xA2\xF7 | DAGGER |
‡ | U+2021 | \x81\xF6 | \xA2\xF8 | DOUBLE DAGGER |
‥ | U+2025 | \x81\x64 | \xA1\xC5 | TWO DOT LEADER |
… | U+2026 | \x81\x63 | \xA1\xC4 | HORIZONTAL ELLIPSIS |
‰ | U+2030 | \x81\xF1 | \xA2\xF3 | PER MILLE SIGN |
′ | U+2032 | \x81\x8C | \xA1\xEC | PRIME |
″ | U+2033 | \x81\x8D | \xA1\xED | DOUBLE PRIME |
※ | U+203B | \x81\xA6 | \xA2\xA8 | REFERENCE MARK |
℃ | U+2103 | \x81\x8E | \xA1\xEE | DEGREE CELSIUS |
← | U+2190 | \x81\xA9 | \xA2\xAB | LEFTWARDS ARROW |
↑ | U+2191 | \x81\xAA | \xA2\xAC | UPWARDS ARROW |
→ | U+2192 | \x81\xA8 | \xA2\xAA | RIGHTWARDS ARROW |
↓ | U+2193 | \x81\xAB | \xA2\xAD | DOWNWARDS ARROW |
⇒ | U+21D2 | \x81\xCB | \xA2\xCD | RIGHTWARDS DOUBLE ARROW |
⇔ | U+21D4 | \x81\xCC | \xA2\xCE | LEFT RIGHT DOUBLE ARROW |
∀ | U+2200 | \x81\xCD | \xA2\xCF | FOR ALL |
∂ | U+2202 | \x81\xDD | \xA2\xDF | PARTIAL DIFFERENTIAL |
∃ | U+2203 | \x81\xCE | \xA2\xD0 | THERE EXISTS |
∇ | U+2207 | \x81\xDE | \xA2\xE0 | NABLA |
∈ | U+2208 | \x81\xB8 | \xA2\xBA | ELEMENT OF |
∋ | U+220B | \x81\xB9 | \xA2\xBB | CONTAINS AS MEMBER |
− | U+2212 | \x81\x7C | \xA1\xDD | MINUS SIGN |
√ | U+221A | \x81\xE3 | \xA2\xE5 | SQUARE ROOT |
∝ | U+221D | \x81\xE5 | \xA2\xE7 | PROPORTIONAL TO |
∞ | U+221E | \x81\x87 | \xA1\xE7 | INFINITY |
∠ | U+2220 | \x81\xDA | \xA2\xDC | ANGLE |
∧ | U+2227 | \x81\xC8 | \xA2\xCA | LOGICAL AND |
∨ | U+2228 | \x81\xC9 | \xA2\xCB | LOGICAL OR |
∩ | U+2229 | \x81\xBF | \xA2\xC1 | INTERSECTION |
∪ | U+222A | \x81\xBE | \xA2\xC0 | UNION |
∫ | U+222B | \x81\xE7 | \xA2\xE9 | INTEGRAL |
∬ | U+222C | \x81\xE8 | \xA2\xEA | DOUBLE INTEGRAL |
∴ | U+2234 | \x81\x88 | \xA1\xE8 | THEREFORE |
∵ | U+2235 | \x81\xE6 | \xA2\xE8 | BECAUSE |
∽ | U+223D | \x81\xE4 | \xA2\xE6 | REVERSED TILDE |
≒ | U+2252 | \x81\xE0 | \xA2\xE2 | APPROXIMATELY EQUAL TO OR THE IMAGE OF |
≠ | U+2260 | \x81\x82 | \xA1\xE2 | NOT EQUAL TO |
≡ | U+2261 | \x81\xDF | \xA2\xE1 | IDENTICAL TO |
≦ | U+2266 | \x81\x85 | \xA1\xE5 | LESS-THAN OVER EQUAL TO |
≧ | U+2267 | \x81\x86 | \xA1\xE6 | GREATER-THAN OVER EQUAL TO |
≪ | U+226A | \x81\xE1 | \xA2\xE3 | MUCH LESS-THAN |
≫ | U+226B | \x81\xE2 | \xA2\xE4 | MUCH GREATER-THAN |
⊂ | U+2282 | \x81\xBC | \xA2\xBE | SUBSET OF |
⊃ | U+2283 | \x81\xBD | \xA2\xBF | SUPERSET OF |
⊆ | U+2286 | \x81\xBA | \xA2\xBC | SUBSET OF OR EQUAL TO |
⊇ | U+2287 | \x81\xBB | \xA2\xBD | SUPERSET OF OR EQUAL TO |
⊥ | U+22A5 | \x81\xDB | \xA2\xDD | UP TACK |
⌒ | U+2312 | \x81\xDC | \xA2\xDE | ARC |
─ | U+2500 | \x84\x9F | \xA8\xA1 | BOX DRAWINGS LIGHT HORIZONTAL |
━ | U+2501 | \x84\xAA | \xA8\xAC | BOX DRAWINGS HEAVY HORIZONTAL |
│ | U+2502 | \x84\xA0 | \xA8\xA2 | BOX DRAWINGS LIGHT VERTICAL |
┃ | U+2503 | \x84\xAB | \xA8\xAD | BOX DRAWINGS HEAVY VERTICAL |
┌ | U+250C | \x84\xA1 | \xA8\xA3 | BOX DRAWINGS LIGHT DOWN AND RIGHT |
┏ | U+250F | \x84\xAC | \xA8\xAE | BOX DRAWINGS HEAVY DOWN AND RIGHT |
┐ | U+2510 | \x84\xA2 | \xA8\xA4 | BOX DRAWINGS LIGHT DOWN AND LEFT |
┓ | U+2513 | \x84\xAD | \xA8\xAF | BOX DRAWINGS HEAVY DOWN AND LEFT |
└ | U+2514 | \x84\xA4 | \xA8\xA6 | BOX DRAWINGS LIGHT UP AND RIGHT |
┗ | U+2517 | \x84\xAF | \xA8\xB1 | BOX DRAWINGS HEAVY UP AND RIGHT |
┘ | U+2518 | \x84\xA3 | \xA8\xA5 | BOX DRAWINGS LIGHT UP AND LEFT |
┛ | U+251B | \x84\xAE | \xA8\xB0 | BOX DRAWINGS HEAVY UP AND LEFT |
├ | U+251C | \x84\xA5 | \xA8\xA7 | BOX DRAWINGS LIGHT VERTICAL AND RIGHT |
┝ | U+251D | \x84\xBA | \xA8\xBC | BOX DRAWINGS VERTICAL LIGHT AND RIGHT HEAVY |
┠ | U+2520 | \x84\xB5 | \xA8\xB7 | BOX DRAWINGS VERTICAL HEAVY AND RIGHT LIGHT |
┣ | U+2523 | \x84\xB0 | \xA8\xB2 | BOX DRAWINGS HEAVY VERTICAL AND RIGHT |
┤ | U+2524 | \x84\xA7 | \xA8\xA9 | BOX DRAWINGS LIGHT VERTICAL AND LEFT |
┥ | U+2525 | \x84\xBC | \xA8\xBE | BOX DRAWINGS VERTICAL LIGHT AND LEFT HEAVY |
┨ | U+2528 | \x84\xB7 | \xA8\xB9 | BOX DRAWINGS VERTICAL HEAVY AND LEFT LIGHT |
┫ | U+252B | \x84\xB2 | \xA8\xB4 | BOX DRAWINGS HEAVY VERTICAL AND LEFT |
┬ | U+252C | \x84\xA6 | \xA8\xA8 | BOX DRAWINGS LIGHT DOWN AND HORIZONTAL |
┯ | U+252F | \x84\xB6 | \xA8\xB8 | BOX DRAWINGS DOWN LIGHT AND HORIZONTAL HEAVY |
┰ | U+2530 | \x84\xBB | \xA8\xBD | BOX DRAWINGS DOWN HEAVY AND HORIZONTAL LIGHT |
┳ | U+2533 | \x84\xB1 | \xA8\xB3 | BOX DRAWINGS HEAVY DOWN AND HORIZONTAL |
┴ | U+2534 | \x84\xA8 | \xA8\xAA | BOX DRAWINGS LIGHT UP AND HORIZONTAL |
┷ | U+2537 | \x84\xB8 | \xA8\xBA | BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY |
┸ | U+2538 | \x84\xBD | \xA8\xBF | BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT |
┻ | U+253B | \x84\xB3 | \xA8\xB5 | BOX DRAWINGS HEAVY UP AND HORIZONTAL |
┼ | U+253C | \x84\xA9 | \xA8\xAB | BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL |
┿ | U+253F | \x84\xB9 | \xA8\xBB | BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY |
╂ | U+2542 | \x84\xBE | \xA8\xC0 | BOX DRAWINGS VERTICAL HEAVY AND HORIZONTAL LIGHT |
╋ | U+254B | \x84\xB4 | \xA8\xB6 | BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL |
■ | U+25A0 | \x81\xA1 | \xA2\xA3 | BLACK SQUARE |
□ | U+25A1 | \x81\xA0 | \xA2\xA2 | WHITE SQUARE |
▲ | U+25B2 | \x81\xA3 | \xA2\xA5 | BLACK UP-POINTING TRIANGLE |
△ | U+25B3 | \x81\xA2 | \xA2\xA4 | WHITE UP-POINTING TRIANGLE |
▼ | U+25BC | \x81\xA5 | \xA2\xA7 | BLACK DOWN-POINTING TRIANGLE |
▽ | U+25BD | \x81\xA4 | \xA2\xA6 | WHITE DOWN-POINTING TRIANGLE |
◆ | U+25C6 | \x81\x9F | \xA2\xA1 | BLACK DIAMOND |
◇ | U+25C7 | \x81\x9E | \xA1\xFE | WHITE DIAMOND |
○ | U+25CB | \x81\x9B | \xA1\xFB | WHITE CIRCLE |
◎ | U+25CE | \x81\x9D | \xA1\xFD | BULLSEYE |
● | U+25CF | \x81\x9C | \xA1\xFC | BLACK CIRCLE |
◯ | U+25EF | \x81\xFC | \xA2\xFE | LARGE CIRCLE |
★ | U+2605 | \x81\x9A | \xA1\xFA | BLACK STAR |
☆ | U+2606 | \x81\x99 | \xA1\xF9 | WHITE STAR |
♀ | U+2640 | \x81\x8A | \xA1\xEA | FEMALE SIGN |
♂ | U+2642 | \x81\x89 | \xA1\xE9 | MALE SIGN |
♪ | U+266A | \x81\xF4 | \xA2\xF6 | EIGHTH NOTE |
♭ | U+266D | \x81\xF3 | \xA2\xF5 | MUSIC FLAT SIGN |
♯ | U+266F | \x81\xF2 | \xA2\xF4 | MUSIC SHARP SIGN |
、 | U+3001 | \x81\x41 | \xA1\xA2 | IDEOGRAPHIC COMMA |
。 | U+3002 | \x81\x42 | \xA1\xA3 | IDEOGRAPHIC FULL STOP |
〃 | U+3003 | \x81\x56 | \xA1\xB7 | DITTO MARK |
〈 | U+3008 | \x81\x71 | \xA1\xD2 | LEFT ANGLE BRACKET |
〉 | U+3009 | \x81\x72 | \xA1\xD3 | RIGHT ANGLE BRACKET |
《 | U+300A | \x81\x73 | \xA1\xD4 | LEFT DOUBLE ANGLE BRACKET |
》 | U+300B | \x81\x74 | \xA1\xD5 | RIGHT DOUBLE ANGLE BRACKET |
「 | U+300C | \x81\x75 | \xA1\xD6 | LEFT CORNER BRACKET |
」 | U+300D | \x81\x76 | \xA1\xD7 | RIGHT CORNER BRACKET |
『 | U+300E | \x81\x77 | \xA1\xD8 | LEFT WHITE CORNER BRACKET |
』 | U+300F | \x81\x78 | \xA1\xD9 | RIGHT WHITE CORNER BRACKET |
【 | U+3010 | \x81\x79 | \xA1\xDA | LEFT BLACK LENTICULAR BRACKET |
】 | U+3011 | \x81\x7A | \xA1\xDB | RIGHT BLACK LENTICULAR BRACKET |
〒 | U+3012 | \x81\xA7 | \xA2\xA9 | POSTAL MARK |
〓 | U+3013 | \x81\xAC | \xA2\xAE | GETA MARK |
〔 | U+3014 | \x81\x6B | \xA1\xCC | LEFT TORTOISE SHELL BRACKET |
〕 | U+3015 | \x81\x6C | \xA1\xCD | RIGHT TORTOISE SHELL BRACKET |
〜 | U+301C | \x81\x60 | \xA1\xC1 | WAVE DASH |
゛ | U+309B | \x81\x4A | \xA1\xAB | KATAKANA-HIRAGANA VOICED SOUND MARK |
゜ | U+309C | \x81\x4B | \xA1\xAC | KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK |
・ | U+30FB | \x81\x45 | \xA1\xA6 | KATAKANA MIDDLE DOT |
! | U+FF01 | \x81\x49 | \xA1\xAA | FULLWIDTH EXCLAMATION MARK |
# | U+FF03 | \x81\x94 | \xA1\xF4 | FULLWIDTH NUMBER SIGN |
$ | U+FF04 | \x81\x90 | \xA1\xF0 | FULLWIDTH DOLLAR SIGN |
% | U+FF05 | \x81\x93 | \xA1\xF3 | FULLWIDTH PERCENT SIGN |
& | U+FF06 | \x81\x95 | \xA1\xF5 | FULLWIDTH AMPERSAND |
( | U+FF08 | \x81\x69 | \xA1\xCA | FULLWIDTH LEFT PARENTHESIS |
) | U+FF09 | \x81\x6A | \xA1\xCB | FULLWIDTH RIGHT PARENTHESIS |
* | U+FF0A | \x81\x96 | \xA1\xF6 | FULLWIDTH ASTERISK |
+ | U+FF0B | \x81\x7B | \xA1\xDC | FULLWIDTH PLUS SIGN |
, | U+FF0C | \x81\x43 | \xA1\xA4 | FULLWIDTH COMMA |
. | U+FF0E | \x81\x44 | \xA1\xA5 | FULLWIDTH FULL STOP |
/ | U+FF0F | \x81\x5E | \xA1\xBF | FULLWIDTH SOLIDUS |
: | U+FF1A | \x81\x46 | \xA1\xA7 | FULLWIDTH COLON |
; | U+FF1B | \x81\x47 | \xA1\xA8 | FULLWIDTH SEMICOLON |
< | U+FF1C | \x81\x83 | \xA1\xE3 | FULLWIDTH LESS-THAN SIGN |
= | U+FF1D | \x81\x81 | \xA1\xE1 | FULLWIDTH EQUALS SIGN |
> | U+FF1E | \x81\x84 | \xA1\xE4 | FULLWIDTH GREATER-THAN SIGN |
? | U+FF1F | \x81\x48 | \xA1\xA9 | FULLWIDTH QUESTION MARK |
@ | U+FF20 | \x81\x97 | \xA1\xF7 | FULLWIDTH COMMERCIAL AT |
[ | U+FF3B | \x81\x6D | \xA1\xCE | FULLWIDTH LEFT SQUARE BRACKET |
\ | U+FF3C | \x81\x5F | \xA1\xC0 | FULLWIDTH REVERSE SOLIDUS |
] | U+FF3D | \x81\x6E | \xA1\xCF | FULLWIDTH RIGHT SQUARE BRACKET |
^ | U+FF3E | \x81\x4F | \xA1\xB0 | FULLWIDTH CIRCUMFLEX ACCENT |
_ | U+FF3F | \x81\x51 | \xA1\xB2 | FULLWIDTH LOW LINE |
` | U+FF40 | \x81\x4D | \xA1\xAE | FULLWIDTH GRAVE ACCENT |
{ | U+FF5B | \x81\x6F | \xA1\xD0 | FULLWIDTH LEFT CURLY BRACKET |
| | U+FF5C | \x81\x62 | \xA1\xC3 | FULLWIDTH VERTICAL LINE |
} | U+FF5D | \x81\x70 | \xA1\xD1 | FULLWIDTH RIGHT CURLY BRACKET |
。 | U+FF61 | \xA1 | \x8E\xA1 | HALFWIDTH IDEOGRAPHIC FULL STOP |
「 | U+FF62 | \xA2 | \x8E\xA2 | HALFWIDTH LEFT CORNER BRACKET |
」 | U+FF63 | \xA3 | \x8E\xA3 | HALFWIDTH RIGHT CORNER BRACKET |
、 | U+FF64 | \xA4 | \x8E\xA4 | HALFWIDTH IDEOGRAPHIC COMMA |
・ | U+FF65 | \xA5 | \x8E\xA5 | HALFWIDTH KATAKANA MIDDLE DOT |
 ̄ | U+FFE3 | \x81\x50 | \xA1\xB1 | FULLWIDTH MACRON |
¥ | U+FFE5 | \x81\x8F | \xA1\xEF | FULLWIDTH YEN SIGN |
http://kirik.tea-nifty.com/diary/2007/05/post_2332.html