Listup Shift_JIS Class

概要

  • Shift_JIS 範囲内の文字が Unicode のどの文字クラスに当たるのかを調べリストアップする。

ソース

  • fileListupShiftJISClass.zip
  • Program.cs
    すべてを展開すべてを収束
      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    
     
     
     
     
     
     
     
    -
    |
    -
    |
    |
    |
    |
    -
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    !
    |
    |
    |
    -
    |
    -
    |
    !
    |
    |
    -
    |
    |
    -
    |
    |
    !
    !
    |
    -
    |
    |
    !
    !
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    |
    -
    |
    |
    |
    !
    |
    |
    -
    |
    |
    -
    |
    |
    -
    |
    |
    !
    !
    |
    !
    !
    !
    
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
     
    namespace ListupShiftJISClass
    {
        class Program
        {
            /// <summary>
            /// .NET Framework の文字クラス名
            /// http://msdn.microsoft.com/ja-jp/library/20bw873z
            /// </summary>
            static string[] ClassNames ={
                "BasicLatin", 
                "Latin-1Supplement", 
                "LatinExtended-A", 
                "LatinExtended-B", 
                "IPAExtensions", 
                "SpacingModifierLetters", 
                "CombiningDiacriticalMarks", 
                "Greek", 
                "GreekandCoptic", 
                "Cyrillic", 
                "CyrillicSupplement", 
                "Armenian", 
                "Hebrew", 
                "Arabic", 
                "Syriac", 
                "Thaana", 
                "Devanagari", 
                "Bengali", 
                "Gurmukhi", 
                "Gujarati", 
                "Oriya", 
                "Tamil", 
                "Telugu", 
                "Kannada", 
                "Malayalam", 
                "Sinhala", 
                "Thai", 
                "Lao", 
                "Tibetan", 
                "Myanmar", 
                "Georgian", 
                "HangulJamo", 
                "Ethiopic", 
                "Cherokee", 
                "UnifiedCanadianAboriginalSyllabics", 
                "Ogham", 
                "Runic", 
                "Tagalog", 
                "Hanunoo", 
                "Buhid", 
                "Tagbanwa", 
                "Khmer", 
                "Mongolian", 
                "Limbu", 
                "TaiLe", 
                "KhmerSymbols", 
                "PhoneticExtensions", 
                "LatinExtendedAdditional", 
                "GreekExtended", 
                "GeneralPunctuation", 
                "SuperscriptsandSubscripts", 
                "CurrencySymbols", 
                "CombiningDiacriticalMarksforSymbols", 
                "CombiningMarksforSymbols", 
                "LetterlikeSymbols", 
                "NumberForms", 
                "Arrows", 
                "MathematicalOperators", 
                "MiscellaneousTechnical", 
                "ControlPictures", 
                "OpticalCharacterRecognition", 
                "EnclosedAlphanumerics", 
                "BoxDrawing", 
                "BlockElements", 
                "GeometricShapes", 
                "MiscellaneousSymbols", 
                "Dingbats", 
                "MiscellaneousMathematicalSymbols-A", 
                "SupplementalArrows-A", 
                "BraillePatterns", 
                "SupplementalArrows-B", 
                "MiscellaneousMathematicalSymbols-B", 
                "SupplementalMathematicalOperators", 
                "MiscellaneousSymbolsandArrows", 
                "CJKRadicalsSupplement", 
                "KangxiRadicals", 
                "IdeographicDescriptionCharacters", 
                "CJKSymbolsandPunctuation", 
                "Hiragana", 
                "Katakana", 
                "Bopomofo", 
                "HangulCompatibilityJamo", 
                "Kanbun", 
                "BopomofoExtended", 
                "KatakanaPhoneticExtensions", 
                "EnclosedCJKLettersandMonths", 
                "CJKCompatibility", 
                "CJKUnifiedIdeographsExtensionA", 
                "YijingHexagramSymbols", 
                "CJKUnifiedIdeographs", 
                "YiSyllables", 
                "YiRadicals", 
                "HangulSyllables", 
                "HighSurrogates", 
                "HighPrivateUseSurrogates", 
                "LowSurrogates", 
                "PrivateUse",
                "PrivateUseArea",
                "CJKCompatibilityIdeographs", 
                "AlphabeticPresentationForms", 
                "ArabicPresentationForms-A", 
                "VariationSelectors", 
                "CombiningHalfMarks", 
                "CJKCompatibilityForms", 
                "SmallFormVariants", 
                "ArabicPresentationForms-B", 
                "HalfwidthandFullwidthForms", 
                "Specials", 
            };
            static Dictionary<string, Regex> regCharClasses = new Dictionary<string, Regex>();
     
            static void Main(string[] args)
            {
                foreach (string classname in ClassNames)
                {
                    regCharClasses[classname] = new Regex(@"\p{Is" + classname + "}");
                }
                Dictionary<string, string> result = new Dictionary<string, string>();
                for (int code = 0x00; code <= 0xffff; ++code)
                {
                    string c = ((char)code).ToString();
                    if (IsShiftJIS(c))
                    {
                        string classname = getClassName(c);
                        result[classname] = (result.ContainsKey(classname)) ? result[classname] + c : c;
                    }
                }
                foreach (string classname in result.Keys)
                {
                    Console.WriteLine("{0}", classname);
                    //Console.WriteLine("{0}\n{1}\n", classname, result[classname]);
                }
            }
     
            /// <summary>
            /// Shift_JIS 範囲内の文字のみかどうかをチェックして、その結果を返す。
            /// </summary>
            /// <remarks>
            /// http://acha-ya.cocolog-nifty.com/blog/2010/12/unicode-ef79.html
            /// </remarks>
            /// <param name="checkString">チェック対象の文字列</param>
            /// <returns>
            /// <list type="bullet">
            /// <item>true: 全ての文字は Shift_JIS 範囲内である</item>
            /// <item>false: Shift_JIS 範囲外の文字が含まれている</item>
            /// </list>
            /// </returns>
            static bool IsShiftJIS(string checkString)
            {
                byte[] translateBuffer = Encoding.GetEncoding("shift_jis").GetBytes(checkString);
                string translateString = Encoding.GetEncoding("shift_jis").GetString(translateBuffer);
                return (checkString == translateString.ToString());
            }
     
            static string getClassName(string c)
            {
                string result = "-";    // Not found
                foreach (string classname in ClassNames)
                {
                    Match m = regCharClasses[classname].Match(c);
                    if (m.Success)
                    {
                        result = classname;
                        break;
                    }
                }
                return result;
            }
        }
    }

出力

BasicLatin
Latin-1Supplement
Greek
Cyrillic
GeneralPunctuation
LetterlikeSymbols
NumberForms
Arrows
MathematicalOperators
MiscellaneousTechnical
EnclosedAlphanumerics
BoxDrawing
GeometricShapes
MiscellaneousSymbols
CJKSymbolsandPunctuation
Hiragana
Katakana
EnclosedCJKLettersandMonths
CJKCompatibility
CJKUnifiedIdeographs
PrivateUse
CJKCompatibilityIdeographs
HalfwidthandFullwidthForms

添付ファイル: fileListupShiftJISClass.zip 256件 [詳細]

リロード   新規 下位ページ作成 編集 凍結 差分 添付 コピー 名前変更   ホーム 一覧 検索 最終更新 バックアップ リンク元   ヘルプ   最終更新のRSS
Last-modified: Tue, 28 Aug 2012 08:15:06 JST (2184d)