CSharp to LowLevel

  • C#

Es gibt 10 Antworten in diesem Thema. Der letzte Beitrag () ist von Facebamm.

    CSharp to LowLevel

    Hallo,

    Die Hauptfrage ist, wie würdet ihr das angehen?

    Was ist mein Ziel?
    Mein Ziel ist es eine neue Programmiersprache bzw. angepasstes CSharp für mehr Low-Level access und features zu "schreiben".
    Was stört mich momentan an CSharp?

    Spoiler anzeigen
    naja, mittlerweile arbeite ich viel mit CSharp auf geräten mit den ich gern viel mehr machen würdem zum Beispiel mit einem Raspberry Pi 4 (Hier raus entstand auch alles).
    ... Mir feht da einfach das low level .. in manchen fällen wurde ich gern assembler ausführen, was in CSharp seh schwer ist umzusetzen.

    Hier mal wie sowas aussehen könnte(windows)
    Spoiler anzeigen

    C#-Quellcode

    1. static class PFunction
    2. {
    3. public static void* LoadMemoryFunction(byte[] funcSrc)
    4. {
    5. int srcLen = (funcSrc.Length * sizeof(byte));
    6. byte* pFunc = (byte*) Marshal.AllocHGlobal(srcLen); // wurde ich jetzt auch nicht mehr so machen
    7. Buffer.MemoryCopy(Unsafe.AsPointer(ref funcSrc[0]), pFunc, srcLen , srcLen);
    8. VirtualProtectEx(Process.GetCurrentProcess().Handle, pFunc, (UIntPtr)srcLen, 0x40, out _);
    9. return pFunc;
    10. }
    11. [DllImport("kernel32.dll")]
    12. static extern bool VirtualProtectEx(IntPtr hProcess,
    13. void* lpAddress,
    14. UIntPtr dwSize,
    15. uint flNewProtect,
    16. out uint lpflOldProtect);
    17. }
    18. // weiß nicht mal mehr ob das noch geht
    19. private static readonly byte[] asmCode =
    20. {
    21. 0x89, 0xD0, 0x44, 0x29, 0xC0,
    22. 0x89, 0xC1, 0xC1, 0xF9, 0x1F, 0x21,
    23. 0xC1, 0x89, 0xD0, 0x29, 0xC8, 0xC3
    24. };
    25. private byte* asmCodeBuffer;
    26. private delegate* <int, int, int> MinMaxASM;
    27. private delegate* <int, int, int> MinMax;
    28. public void Init()
    29. {
    30. MinMaxASM = (delegate* <int, int, int>) PFunction.LoadMemoryFunction(asmCode);
    31. MinMax = &MinMax2;
    32. }


    - oder einfach paar nop's als timing ist recht "aufwending"
    Spoiler anzeigen

    C#-Quellcode

    1. public delegate void TimerInvoker();
    2. public static TimerInvoker CreateNopTimer(int cycleCount)
    3. {
    4. DynamicMethod delayN = new DynamicMethod(
    5. $"Delay{cycleCount}",
    6. null,
    7. null,
    8. typeof(Timer).Module);
    9. ILGenerator il = delayN.GetILGenerator();
    10. for (int i = 0; i < cycleCount; i++)
    11. {
    12. il.Emit(OpCodes.Nop);
    13. }
    14. il.Emit(OpCodes.Ret);
    15. return (TimerInvoker)delayN.CreateDelegate(typeof(TimerInvoker));
    16. }

    - oder warum muss ich bei einer IF-Abfrage immer ein bool haben, warum nicht 0 := 0 und >0 := 1?
    Hier beim auslesen vom status im speicher
    Spoiler anzeigen

    C#-Quellcode

    1. public unsafe bool GetValueBool(){
    2. return (*_levRegisterOffset & _levValueMask) == _levValueMask;
    3. }

    Warum die abfrage ob das identisch ist um einen bool zu erhalten?

    - oder warum kann ich kein datentyp gaben der ab den 3 Bit schreibt, sonder nur in byte steps [FiedOffset(byteoffset)].

    - dann fehlen mir die macros
    Spoiler anzeigen

    C#-Quellcode

    1. // quellcode ist noch beim übersetzen von Cpp to CSharp, also nicht wundern wegen dem sizeof(struct v4l2_streamparm)
    2. /// <summary>_IOWR('V', 15, struct v4l2_buffer)</summary>
    3. public const uint VIDIOC_QBUF = (_IOWR << (int) _IOC_DIRSHIFT)
    4. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    5. | (15U << (int) _IOC_NRSHIFT)
    6. | ((uint) sizeof(struct v4l2_buffer) << (int) _IOC_SIZESHIFT);
    7. /// <summary>_IOWR('V', 17, struct v4l2_buffer)</summary>
    8. public const uint VIDIOC_DQBUF = (_IOWR << (int) _IOC_DIRSHIFT)
    9. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    10. | (17U << (int) _IOC_NRSHIFT)
    11. | ((uint) sizeof(struct v4l2_buffer) << (int) _IOC_SIZESHIFT);
    12. /// <summary>_IOW('V', 18, int)</summary>
    13. public const uint VIDIOC_STREAMON = (_IOW << (int) _IOC_DIRSHIFT)
    14. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    15. | (18U << (int) _IOC_NRSHIFT)
    16. | ((uint) sizeof(int) << (int) _IOC_SIZESHIFT);
    17. /// <summary>_IOW('V', 19, int)</summary>
    18. public const uint VIDIOC_STREAMOFF = (_IOW << (int) _IOC_DIRSHIFT)
    19. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    20. | (19U << (int) _IOC_NRSHIFT)
    21. | ((uint) sizeof(int) << (int) _IOC_SIZESHIFT);
    22. /// <summary>_IOWR('V', 21, struct v4l2_streamparm)</summary>
    23. public const uint VIDIOC_G_PARM = (_IOWR << (int) _IOC_DIRSHIFT)
    24. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    25. | (21U << (int) _IOC_NRSHIFT)
    26. | ((uint) sizeof(struct v4l2_streamparm) << (int) _IOC_SIZESHIFT);
    27. /// <summary>_IOWR('V', 22, struct v4l2_streamparm)</summary>
    28. public const uint VIDIOC_S_PARM = (_IOWR << (int) _IOC_DIRSHIFT)
    29. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    30. | (22U << (int) _IOC_NRSHIFT)
    31. | ((uint) sizeof(struct v4l2_streamparm) << (int) _IOC_SIZESHIFT);
    32. /// <summary>_IOR('V', 23, v4l2_std_id)</summary>
    33. public const uint VIDIOC_G_STD = (_IOR << (int) _IOC_DIRSHIFT)
    34. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    35. | (23U << (int) _IOC_NRSHIFT)
    36. | ((uint) sizeof(v4l2_std_id) << (int) _IOC_SIZESHIFT);
    37. /// <summary>_IOW('V', 24, v4l2_std_id)</summary>
    38. public const uint VIDIOC_S_STD = (_IOW << (int) _IOC_DIRSHIFT)
    39. | ((uint) 'V' << (int) _IOC_TYPESHIFT)
    40. | (24U << (int) _IOC_NRSHIFT)
    41. | ((uint) sizeof(v4l2_std_id) << (int) _IOC_SIZESHIFT);



    Dann stört es mich, das ich keine Context freien Methoden schreiben kann.
    Also ich brauch immer eine Klasse.
    Ich weiß ich kann using static xyz; nutzen, aber das ist doch nicht sinn der sache.

    Ich weiß das es für VS ein plugin gibt mit dem direkt il geschrieben werden kann, aber selbst das ist nicht so schön. (Mein empfinden)


    Was ich damit sagen will: Immer wird gesagt, man kann kein OS in CSharp schreiben (Was schon probiert wurde und auch ziemlich gut klappte, soweit ich weiß) und das find ich eig traurig,
    denn ich find CSharp eine wunderschöne sprache, denn sie liest sich (für mich) wie ein Buch und ich find auch den Style sehr schön in CSharp.

    Kurz um drum: Mir fehlen Feature ... und die Code-Gen Ka*** macht es nicht besser. Ist schön aber jedes mal Visual Studio neustarten nach einer änderung macht kein Spaß :thumbdown:
    PS: ReSharper ist da nicht besser.


    So wie würdet ihr das angehen?
    - Eigenen parser mit lexer, gramma und was da nicht dazu gehört (nicht compiler, der muss eh neu geschrieben werden)
    - Den kram von Roslyn nutzen, der sehr aufwending geschreiben ist und erst studiert werden muss?
    - Microsoft.CodeAnalysis.CSharp- Nuget nutzen, damit ich mir lexing und parsing spare und "nur noch" die nodes analysieren muss?
    - oder ganz anders?


    MfG

    Facebamm
    Ich kann dir nicht folgen?
    Eine neue Sprache angelehnt an C#? Oder Erweiterungen wie Nuget-Pakete?

    Was ist für dich LowLevel? LowLevel als Einstiegshürde? Als Hardwarenah?

    Was passt dir jetzt an Visual Studio nicht?
    Reicht VS-Code?
    Die deutsche Sprache ist Freeware, du kannst sie benutzen, ohne dafür zu bezahlen. Sie ist aber nicht Open Source, also darfst du sie nicht verändern, wie es dir gerade passt.
    Eher gesagt ich will die ganze .Net Runtime los werden. Also wie C oder Cpp nur ohne den Linker kram und der komischen Sprachen.

    Zudem möchte ich sprachspezifische Features hinzufügen wie:
    - asm-inline
    - macros
    - BitPos, also FiedOffset bloß für bits und das ohne getter, setter und masking
    - Circular shift <<<, >>>
    - if-abfragen bei den ich direkt nen wert rein geben kann und es nur geprüft wird ob der wert >0 := true und 0 := false

    Aber dazu muss brauch ich nen eigenen Compiler
    Und meine erster schritt ist dahin gehen der Lexer, Parser etc.

    Nun ist die Frage wie ich das am besten angehe.
    Dazu hab ich mir verschiedene gedanken gemacht wie ich das machen könnte um erstmal einen Parser zu bekommen:
    - Eigenen parser mit lexer, gramma und was da nicht dazu gehört (nicht compiler, der muss eh neu geschrieben werden)
    - Den kram von Roslyn nutzen, der sehr aufwending geschreiben ist und erst studiert werden muss?
    - Microsoft.CodeAnalysis.CSharp- Nuget nutzen, damit ich mir lexing und parsing spare und "nur noch" die nodes analysieren muss?
    - oder ganz anders?

    BTW. der Parser muss nicht in Cpp oder C geschrieben werden, kann das ganze auch in CSharp machen, aber das was am ende rauskommen soll, sollte unabhängig von der .Net Runtime sein.
    Das ist ja ein absolutes Mammutprojekt, das wird dich viele Jahre kosten denke ich.
    Da ist es vermutlich besser sich eine ganz neue Sprache zu suchen, wenn dich wirklich so viel an dem managed in C# stört? Ist halt absichtlich keine LowLevel-Sprache. Es gibt ja noch Unmengen alternativen.
    Aber vielleicht kannst du ja mal schauen was Unity da gemacht hat. Die haben ja auch einen IL2Cpp gemacht um den C# code nach C++ und dann native zu kompilieren:

    IL2CPP
    LLILC Reader

    Fakiz schrieb:

    UWP Apps werden zu Nativen Code kompiliert Stichwort .Net Native.

    Darum geht es hier aber nicht!

    und Btw.: Universal Windows Platform (UWP) apps[1] (formerly Windows Store apps and Metro-style apps)[2] are applications that can be used across all compatible Microsoft Windows devices, including personal computers (PCs), tablets, smartphones, Xbox One, Microsoft HoloLens, and Internet of Things. - wiki

    Was mit ATMega Chips?
    Wo sind da die Feature?
    So wie ich das sehe ist das Kernstück deiner Anwendung die Entwicklung der Kompliler, Syntax und Schlüßelwörter existieren weitestgehend. Wenn du einmal ganz unscheniert .Net Native (Stichwort .Net Nativ nicht UWP) in deine bevorzugte Suchmaschine eingibst stolperst du zwangsläufig über den MS DevBlog zum .Net Native Kompiler. Dort erklärt MS anschaulich wie dieser Funktioniert und welche Bestandteile dieser hat. Das sollte neben dem lernen einer Assembler Sprache ein guter Anfang sein.
    Okay, ... hast du für mich einen direkten link, denn meine Suchmaschine bringt mir diese Beiträge nicht


    Es wäre schon cool, wenn ich da auch einiges "klauen" könnte, so muss ich nicht alles neu schreiben.

    Edit:
    bzw.
    Hier mal mein Lexer :D btw ... ist noch nicht alles drin was ich haben will :w
    was mich auch stört ist, das ich immer den Source mit gebe

    Wird irgendwann noch auf git kommen das ganze :D

    LexerLevel1.cs
    Spoiler anzeigen

    C#-Quellcode

    1. namespace CNative;
    2. using System.Collections.Concurrent;
    3. using System.Runtime.CompilerServices;
    4. using static LexerNodeType;
    5. internal enum LexerNodeType
    6. {
    7. Space,
    8. Word,
    9. Number,
    10. Symbol
    11. }
    12. internal struct LexerNode
    13. {
    14. public readonly LexerNodeType Type;
    15. public readonly int Start;
    16. public readonly int End;
    17. public LexerNode(LexerNodeType type, int start, int end)
    18. {
    19. Type = type;
    20. Start = start;
    21. End = end;
    22. }
    23. }
    24. internal class LexerLevel1
    25. {
    26. private string _src;
    27. private ConcurrentQueue<LexerNode> _queue;
    28. public LexerLevel1(
    29. string src,
    30. ref ConcurrentQueue<LexerNode> level1LexerQueue)
    31. {
    32. _src = src;
    33. _queue = level1LexerQueue;
    34. }
    35. public async Task AnalyseAsync(CancellationTokenSource cts, CancellationToken ct)
    36. {
    37. await Task.Run(() =>
    38. {
    39. ReadOnlySpan<char> sourceCode = _src.ToCharArray();
    40. int i = 0;
    41. while (!cts.IsCancellationRequested && i < sourceCode.Length)
    42. {
    43. Unsafe.SkipInit(out LexerNode node);
    44. if (IsSpace(sourceCode, in i, ref node)
    45. || IsDigit(_src, in i, ref node)
    46. || IsWord(_src, in i, ref node))
    47. {
    48. _queue.Enqueue(node);
    49. }
    50. else
    51. {
    52. node = new LexerNode(Symbol, i, i + 1);
    53. _queue.Enqueue(node);
    54. }
    55. i = node.End;
    56. }
    57. if (!ct.IsCancellationRequested)
    58. {
    59. cts.Cancel();
    60. }
    61. }, ct);
    62. }
    63. private static bool IsSpace(ReadOnlySpan<char> src, in int i, ref LexerNode node) {
    64. if (src[i] is ' ' or '\t' or '\r' or '\n')
    65. {
    66. node = new LexerNode(Space, i, i + 1);
    67. return true;
    68. }
    69. return false;
    70. }
    71. private static bool IsDigit(ReadOnlySpan<char> src, in int i, ref LexerNode node) {
    72. if(src[i] is >= '0' and <= '9')
    73. {
    74. int end = i + 1;
    75. while (end < src.Length && src[end] is >= '0' and <= '9')
    76. {
    77. end++;
    78. }
    79. node = new LexerNode(Number, i, end);
    80. return true;
    81. }
    82. return false;
    83. }
    84. private static bool IsWord(ReadOnlySpan<char> src, in int i, ref LexerNode node)
    85. {
    86. if (src[i] is (>= 'A' and <= 'Z') or (>= 'a' and <= 'z'))
    87. {
    88. int end = i + 1;
    89. while (end < src.Length && src[end] is (>= 'A' and <= 'Z') or (>= 'a' and <= 'z'))
    90. {
    91. end++;
    92. }
    93. node = new LexerNode(Word, i, end);
    94. return true;
    95. }
    96. return false;
    97. }
    98. }

    LexerLevel2.cs
    Spoiler anzeigen

    Quellcode

    1. namespace CNative;
    2. using System.Collections.Concurrent;
    3. using System.Diagnostics;
    4. using static LexerTokenType;
    5. public enum LexerTokenType
    6. {
    7. // keyword
    8. /// <summary> byte </summary>
    9. Byte,
    10. /// <summary> bool </summary>
    11. Bool,
    12. /// <summary> class </summary>
    13. Class,
    14. /// <summary> char </summary>
    15. Char,
    16. /// <summary> do </summary>
    17. Do,
    18. /// <summary> double </summary>
    19. Double,
    20. /// <summary> enum </summary>
    21. Enum,
    22. /// <summary> false </summary>
    23. False,
    24. /// <summary> for </summary>
    25. For,
    26. /// <summary> foreach </summary>
    27. Foreach,
    28. /// <summary> float </summary>
    29. Float,
    30. /// <summary> get </summary>
    31. Get,
    32. /// <summary> interface </summary>
    33. Interface,
    34. /// <summary> if </summary>
    35. If,
    36. /// <summary> null </summary>
    37. Null,
    38. /// <summary> public </summary>
    39. Public,
    40. /// <summary> private </summary>
    41. Private,
    42. /// <summary> return </summary>
    43. Return,
    44. /// <summary> set </summary>
    45. Set,
    46. /// <summary> struct </summary>
    47. Struct,
    48. /// <summary> sbyte </summary>
    49. SByte,
    50. /// <summary> stackalloc </summary>
    51. Stackalloc,
    52. /// <summary> static </summary>
    53. Static,
    54. /// <summary> true </summary>
    55. True,
    56. /// <summary> this </summary>
    57. This,
    58. /// <summary> ushort </summary>
    59. UShort,
    60. /// <summary> uint </summary>
    61. UInt,
    62. /// <summary> ulong </summary>
    63. Ulong,
    64. /// <summary> using </summary>
    65. Using,
    66. /// <summary> var </summary>
    67. Var,
    68. /// <summary> void </summary>
    69. Void,
    70. /// <summary> while </summary>
    71. While,
    72. /// <summary> #define </summary>
    73. Define,
    74. /// <summary> [_A-Za-z][_A-Za-z0-9]* </summary>
    75. Identifier,
    76. /// <summary> \s </summary>
    77. Space,
    78. /// <summary> \t </summary>
    79. Tab,
    80. /// <summary> \n or \r\n </summary>
    81. Newline,
    82. // IsCompareOperator
    83. /// <summary> &lt;= </summary>
    84. LessThen,
    85. /// <summary> >= </summary>
    86. GreaterThen,
    87. /// <summary> == </summary>
    88. EqualsThen,
    89. /// <summary> != </summary>
    90. NotEqualsThen,
    91. // Assign
    92. /// <summary> = </summary>
    93. Assign,
    94. // BinaryAssign
    95. /// <summary> &= </summary>
    96. AndEquals,
    97. /// <summary> |= </summary>
    98. OrEquals,
    99. /// <summary> ^= </summary>
    100. XorEquals,
    101. /// <summary> ~= </summary>
    102. InvertEquals,
    103. // MathmaticalAssign
    104. /// <summary> += </summary>
    105. PlusEquals,
    106. /// <summary> -= </summary>
    107. MinusEquals,
    108. /// <summary> *= </summary>
    109. MultiplicationEquals,
    110. /// <summary> **= </summary>
    111. PowerEquals,
    112. /// <summary> /= </summary>
    113. DivisionEquals,
    114. /// <summary> &= </summary>
    115. ModuloEquals,
    116. // BinaryOperator
    117. /// <summary> ~ </summary>
    118. Invert,
    119. /// <summary> ^ </summary>
    120. Xor,
    121. /// <summary> | </summary>
    122. Or,
    123. /// <summary> & </summary>
    124. And,
    125. // MathmaticalOperator
    126. /// <summary> + </summary>
    127. Plus,
    128. /// <summary> - </summary>
    129. Minus,
    130. /// <summary> * </summary>
    131. Multiplication,
    132. /// <summary> ** </summary>
    133. Power,
    134. /// <summary> / </summary>
    135. Division,
    136. /// <summary> % </summary>
    137. Modulo,
    138. // Symbols
    139. /// <summary> . </summary>
    140. Dot,
    141. /// <summary> , </summary>
    142. Comma,
    143. /// <summary> ' </summary>
    144. SingleQuotationMark,
    145. /// <summary> " </summary>
    146. DoubleQuotationMark,
    147. /// <summary> : </summary>
    148. Colon,
    149. /// <summary> ; </summary>
    150. Semicolon,
    151. /// <summary> ! </summary>
    152. ExclamationMark,
    153. /// <summary> ( </summary>
    154. OpenParenthesis,
    155. /// <summary> ) </summary>
    156. CloseParenthesis,
    157. /// <summary> [ </summary>
    158. OpenBrackets,
    159. /// <summary> ] </summary>
    160. CloseBrackets,
    161. /// <summary> { </summary>
    162. OpenBraces,
    163. /// <summary> } </summary>
    164. CloseBraces,
    165. /// <summary> [0-9]+ </summary>
    166. Numbers,
    167. /// <summary> //[^$]* </summary>
    168. SingleLineComment,
    169. /// <summary> /* </summary>
    170. InlineCommentStart,
    171. /// <summary> */ </summary>
    172. InlineCommentEnd,
    173. Unknown = -1
    174. }
    175. [DebuggerDisplay("Type = {Type}")]
    176. internal struct LexerToken
    177. {
    178. public readonly LexerTokenType Type;
    179. public readonly int Start;
    180. public readonly int End;
    181. public LexerToken(LexerTokenType type, int start, int end)
    182. {
    183. Type = type;
    184. Start = start;
    185. End = end;
    186. }
    187. }
    188. internal class LexerLevel2
    189. {
    190. private string _src;
    191. private ConcurrentQueue<LexerNode> _level1Queue;
    192. private ConcurrentQueue<LexerToken> _level2Queue;
    193. public LexerLevel2(
    194. string src,
    195. ref ConcurrentQueue<LexerNode> level1LexerQueue,
    196. ref ConcurrentQueue<LexerToken> level2LexerQueue)
    197. {
    198. _src = src;
    199. _level1Queue = level1LexerQueue;
    200. _level2Queue = level2LexerQueue;
    201. }
    202. public async Task AnalyseAsync(CancellationToken ct = default)
    203. {
    204. await Task.Run(() =>
    205. {
    206. int end;
    207. LexerTokenType type;
    208. ReadOnlySpan<char> src = _src.ToCharArray();
    209. while (!ct.IsCancellationRequested || _level1Queue.Count > 0)
    210. {
    211. // TODO: out params and if's
    212. LexerNode node = GetNextLvl1Node();
    213. switch (node.Type)
    214. {
    215. case LexerNodeType.Space:
    216. if (IsSpace(src, node.Start, out end, out type))
    217. {
    218. AddLevel2Token(node.Start, node.End, type);
    219. }
    220. else
    221. {
    222. AddLevel2Token(node.Start, node.End, Unknown);
    223. }
    224. break;
    225. case LexerNodeType.Word:
    226. if (IsKeyword(src, node.Start, out end, out type) && node.End == end)
    227. {
    228. AddLevel2Token(node.Start, node.End, type);
    229. }
    230. else
    231. {
    232. AddLevel2Token(node.Start, node.End, Unknown);
    233. }
    234. break;
    235. case LexerNodeType.Number:
    236. AddLevel2Token(node.Start, node.End, Numbers);
    237. break;
    238. case LexerNodeType.Symbol:
    239. if (IsSymbol(src, node.Start, out end, out type))
    240. {
    241. // skip next symbol
    242. int delta = end - node.End;
    243. while (delta > 0)
    244. {
    245. GetNextLvl1Node();
    246. delta--;
    247. }
    248. // end skip
    249. AddLevel2Token(node.Start, end, type);
    250. }
    251. else
    252. {
    253. AddLevel2Token(node.Start, node.End, Unknown);
    254. }
    255. break;
    256. default:
    257. throw new NotSupportedException(node.Type.ToString());
    258. }
    259. }
    260. }, ct);
    261. }
    262. private LexerNode GetNextLvl1Node()
    263. {
    264. // TODO:
    265. LexerNode node;
    266. while(!_level1Queue.TryDequeue(out node))
    267. { }
    268. return node;
    269. }
    270. private void AddLevel2Token(int start, int end, LexerTokenType type)
    271. {
    272. _level2Queue.Enqueue(new LexerToken(type, start, end));
    273. }
    274. public static bool IsKeyword(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    275. {
    276. switch (src[i])
    277. {
    278. case 'b': // byte, bool
    279. return IsByteOrBool(src, i, out end, out type);
    280. case 'c': // class, char
    281. return IsClassOrChar(src, i, out end, out type);
    282. case 'd': // do...while, double
    283. return IsDoOrDouble(src, i, out end, out type);
    284. case 'e': // enum
    285. return IsEnum(src, i, out end, out type);
    286. case 'f': // false, for, foreach, float
    287. return IsFalseOrForOrForeachOrFloat(src, i, out end, out type);
    288. case 'g': // get
    289. return IsGet(src, i, out end, out type);
    290. case 'i': // interface, if
    291. return IsInterfaceOrIf(src, i, out end, out type);
    292. case 'n': // null
    293. return IsNull(src, i, out end, out type);
    294. case 'p': // public, private
    295. return IsPublicOrPrivate(src, i, out end, out type);
    296. case 'r': // return
    297. return IsReturn(src, i, out end, out type);
    298. case 's': // struct, sbyte, stackalloc, static
    299. return IsSetOrStructOrStackallocOrStaticOrSByte(src, i, out end, out type);
    300. case 't': // true, this
    301. return IsTrueOrThis(src, i, out end, out type);
    302. case 'u': // ushort, uint, ulong, using [union]
    303. return IsUsingOrUShortOrUIntOrULong(src, i, out end, out type);
    304. case 'v': // var, void
    305. return IsVarOrVoid(src, i, out end, out type);
    306. case 'w': // while
    307. return IsWhile(src, i, out end, out type);
    308. case '#': // #define
    309. return IsPragma(src, i, out end, out type);
    310. default:
    311. end = 0;
    312. type = Unknown;
    313. return false;
    314. }
    315. }
    316. private static bool IsByteOrBool(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    317. {
    318. i++;
    319. switch (src[i])
    320. {
    321. case 'y': // byte
    322. if (src[i + 1] is 't' && src[i + 2] is 'e')
    323. {
    324. end = i + 3;
    325. type = Byte;
    326. return true;
    327. }
    328. break;
    329. case 'o': // bool
    330. if (src[i + 1] is 'o' && src[i + 2] is 'l')
    331. {
    332. end = i + 3;
    333. type = Bool;
    334. return true;
    335. }
    336. break;
    337. }
    338. end = 0;
    339. type = Unknown;
    340. return false;
    341. }
    342. private static bool IsClassOrChar(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    343. {
    344. i++;
    345. switch (src[i])
    346. {
    347. case 'l': // Class
    348. if (src[i + 1] is 'a' && src[i + 2] is 's' && src[i + 3] is 's')
    349. {
    350. end = i + 4;
    351. type = Class;
    352. return true;
    353. }
    354. break;
    355. case 'h': // Char
    356. if (src[i + 1] is 'a' && src[i + 2] is 'r')
    357. {
    358. end = i + 3;
    359. type = Char;
    360. return true;
    361. }
    362. break;
    363. }
    364. end = 0;
    365. type = Unknown;
    366. return false;
    367. }
    368. private static bool IsDoOrDouble(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    369. {
    370. i++;
    371. if (src[i] is 'o') // do
    372. {
    373. if(src[i + 1] is 'u' && src[i + 2] is 'b' && src[i + 3] is 'l' && src[i + 4] is 'e')
    374. {
    375. end = i + 5;
    376. type = Double;
    377. return true;
    378. }
    379. end = i + 1;
    380. type = Do;
    381. return true;
    382. }
    383. end = 0;
    384. type = Unknown;
    385. return false;
    386. }
    387. private static bool IsEnum(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    388. {
    389. i++;
    390. if (src[i] is 'n' && src[i + 1] is 'u' && src[i + 2] is 'm') // enum
    391. {
    392. end = i + 3;
    393. type = Enum;
    394. return true;
    395. }
    396. end = 0;
    397. type = Unknown;
    398. return false;
    399. }
    400. private static bool IsFalseOrForOrForeachOrFloat(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    401. {
    402. i++;
    403. switch (src[i])
    404. {
    405. case 'a': // false
    406. if (src[i + 1] is 'l' && src[i + 2] is 's' && src[i + 3] is 'e')
    407. {
    408. end = i + 4;
    409. type = False;
    410. return true;
    411. }
    412. break;
    413. case 'o': // for or foreach
    414. if (src[i + 1] is 'r')
    415. {
    416. if (src[i + 2] is 'e' && src[i + 3] is 'a' && src[i + 4] is 'c' && src[i + 5] is 'h') // foreach
    417. {
    418. end = i + 6;
    419. type = Foreach;
    420. return true;
    421. }
    422. // for
    423. end = i + 2;
    424. type = For;
    425. return true;
    426. }
    427. break;
    428. case 'l': // float
    429. if(src[i + 1] is 'o' && src[i + 2] is 'a' && src[i + 3] is 't')
    430. {
    431. end = i + 4;
    432. type = Float;
    433. return true;
    434. }
    435. break;
    436. }
    437. end = 0;
    438. type = Unknown;
    439. return false;
    440. }
    441. private static bool IsGet(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    442. {
    443. i++;
    444. if (src[i] is 'e' && src[i + 1] is 't') // get
    445. {
    446. end = i + 2;
    447. type = Get;
    448. return true;
    449. }
    450. end = 0;
    451. type = Unknown;
    452. return false;
    453. }
    454. private static bool IsInterfaceOrIf(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    455. {
    456. i++;
    457. switch (src[i])
    458. {
    459. case 'n': // interface
    460. if (src[i + 1] is 't' && src[i + 2] is 'e' && src[i + 3] is 'r' && src[i + 4] is 'f' && src[i + 5] is 'a' && src[i + 6] is 'c' && src[i + 7] is 'e')
    461. {
    462. end = i + 8;
    463. type = Interface;
    464. return true;
    465. }
    466. break;
    467. case 'f': // if
    468. if (src[i] is 'f')
    469. {
    470. end = i + 1;
    471. type = If;
    472. return true;
    473. }
    474. break;
    475. }
    476. end = 0;
    477. type = Unknown;
    478. return false;
    479. }
    480. private static bool IsNull(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    481. {
    482. i++;
    483. if (src[i] is 'u' && src[i + 1] is 'l' && src[i + 2] is 'l') // null
    484. {
    485. end = i + 3;
    486. type = Null;
    487. return true;
    488. }
    489. end = 0;
    490. type = Unknown;
    491. return false;
    492. }
    493. private static bool IsPublicOrPrivate(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    494. {
    495. i++;
    496. switch (src[i])
    497. {
    498. case 'u': // public
    499. if (src[i + 1] is 'b' && src[i + 2] is 'l' && src[i + 3] is 'i' && src[i + 4] is 'c')
    500. {
    501. end = i + 5;
    502. type = Public;
    503. return true;
    504. }
    505. break;
    506. case 'r': // private
    507. if (src[i + 1] is 'i' && src[i + 2] is 'v' && src[i + 3] is 'a' && src[i + 4] is 't' && src[i + 5] is 'e')
    508. {
    509. end = i + 6;
    510. type = Private;
    511. return true;
    512. }
    513. break;
    514. }
    515. end = 0;
    516. type = Unknown;
    517. return false;
    518. }
    519. private static bool IsReturn(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    520. {
    521. i++;
    522. if (src[i] is 'e' && src[i + 1] is 't' && src[i + 2] is 'u' && src[i + 3] is 'r' && src[i + 4] is 'n') // return
    523. {
    524. end = i + 5;
    525. type = Return;
    526. return true;
    527. }
    528. end = 0;
    529. type = Unknown;
    530. return false;
    531. }
    532. private static bool IsSetOrStructOrStackallocOrStaticOrSByte(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    533. {
    534. i++;
    535. switch (src[i])
    536. {
    537. case 'e': // set
    538. if (src[i + 1] is 't')
    539. {
    540. end = i + 2;
    541. type = Set;
    542. return true;
    543. }
    544. break;
    545. case 't': // struct, stackalloc, static
    546. switch (src[i + 1])
    547. {
    548. case 'a': // stackalloc, static
    549. switch (src[i + 2])
    550. {
    551. case 'c': // stackalloc
    552. if (src[i + 3] is 'k' && src[i + 4] is 'a' && src[i + 5] is 'l' && src[i + 6] is 'l' && src[i + 7] is 'o' && src[i + 8] is 'c')
    553. {
    554. end = i + 9;
    555. type = Stackalloc;
    556. return true;
    557. }
    558. break;
    559. case 't': // static
    560. if (src[i + 3] is 'i' && src[i + 4] is 'c')
    561. {
    562. end = i + 5;
    563. type = Static;
    564. return true;
    565. }
    566. break;
    567. }
    568. break;
    569. case 'r': // struct
    570. if (src[i + 1] is 'u' && src[i + 2] is 'c' && src[i + 3] is 't') // Struct
    571. {
    572. end = i + 4;
    573. type = Struct;
    574. return true;
    575. }
    576. break;
    577. }
    578. break;
    579. case 'b': // Sbyte
    580. if (src[i + 1] is 'y' && src[i + 2] is 't' && src[i + 3] is 'e') // Sbyte
    581. {
    582. end = i + 4;
    583. type = SByte;
    584. return true;
    585. }
    586. break;
    587. }
    588. end = 0;
    589. type = Unknown;
    590. return false;
    591. }
    592. private static bool IsTrueOrThis(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    593. {
    594. i++;
    595. switch (src[i])
    596. {
    597. case 'r': // true
    598. if (src[i + 1] is 'u' && src[i + 2] is 'e')
    599. {
    600. end = i + 3;
    601. type = True;
    602. return true;
    603. }
    604. break;
    605. case 'h': // this
    606. if(src[i + 1] is 'i' && src[i + 2] is 's')
    607. {
    608. end = i + 3;
    609. type = This;
    610. return true;
    611. }
    612. break;
    613. }
    614. end = 0;
    615. type = Unknown;
    616. return false;
    617. }
    618. private static bool IsUsingOrUShortOrUIntOrULong(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    619. {
    620. i++;
    621. switch (src[i])
    622. {
    623. case 's': // using, ushort
    624. switch (src[i + 1])
    625. {
    626. case 'i': // using
    627. if (src[i + 2] is 'n' && src[i + 3] is 'g') // using
    628. {
    629. end = i + 4;
    630. type = Using;
    631. return true;
    632. }
    633. break;
    634. case 'h': // ushort
    635. if (src[i + 2] is 'o' && src[i + 3] is 'r' && src[i + 4] is 't') // ushort
    636. {
    637. end = i + 5;
    638. type = UShort;
    639. return true;
    640. }
    641. break;
    642. }
    643. break;
    644. case 'i': // uint
    645. if (src[i + 1] is 'n' && src[i + 2] is 't') // uint
    646. {
    647. end = i + 3;
    648. type = UInt;
    649. return true;
    650. }
    651. break;
    652. case 'l': // ulong
    653. if (src[i + 1] is 'o' && src[i + 2] is 'n' && src[i + 3] is 'g') // ulong
    654. {
    655. end = i + 4;
    656. type = Ulong;
    657. return true;
    658. }
    659. break;
    660. }
    661. end = 0;
    662. type = Unknown;
    663. return false;
    664. }
    665. private static bool IsVarOrVoid(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    666. {
    667. i++;
    668. switch (src[i])
    669. {
    670. case 'a':// var
    671. if (src[i + 1] is 'r')
    672. {
    673. end = i + 2;
    674. type = Var;
    675. return true;
    676. }
    677. break;
    678. case 'o': // void
    679. if (src[i + 1] is 'i' && src[i + 2] is 't')
    680. {
    681. end = i + 3;
    682. type = Void;
    683. return true;
    684. }
    685. break;
    686. }
    687. end = 0;
    688. type = Unknown;
    689. return false;
    690. }
    691. private static bool IsWhile(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    692. {
    693. i++;
    694. if (src[i] is 'h' && src[i + 1] is 'i' && src[i + 2] is 'l' && src[i + 3] is 'e') // while
    695. {
    696. end = i + 4;
    697. type = While;
    698. return true;
    699. }
    700. end = 0;
    701. type = Unknown;
    702. return false;
    703. }
    704. private static bool IsPragma(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    705. {
    706. i++;
    707. if (src[i] is 'p' && src[i + 1] is 'r' && src[i + 2] is 'a' && src[i + 3] is 'g' && src[i + 4] is 'm' && src[i + 5] is 'a') // pragma
    708. {
    709. end = i + 6;
    710. type = Define;
    711. return true;
    712. }
    713. end = 0;
    714. type = Unknown;
    715. return false;
    716. }
    717. private static bool IsSpace(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    718. {
    719. return IsSpaceOrTab(src, i, out end, out type) || IsNewline(src, i, out end, out type);
    720. }
    721. private static bool IsSpaceOrTab(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    722. {
    723. switch (src[i])
    724. {
    725. case ' ':
    726. end = i + 1;
    727. type = Space;
    728. return true;
    729. case '\t':
    730. end = i + 1;
    731. type = Tab;
    732. return true;
    733. }
    734. end = 0;
    735. type = Unknown;
    736. return false;
    737. }
    738. private static bool IsNewline(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    739. {
    740. switch (src[i])
    741. {
    742. case '\r':
    743. if (src[i + 1] is '\n')
    744. {
    745. end = i + 2;
    746. type = Newline;
    747. return true;
    748. }
    749. break;
    750. case '\n':
    751. end = i + 1;
    752. type = Newline;
    753. return true;
    754. }
    755. end = 0;
    756. type = Unknown;
    757. return false;
    758. }
    759. private static bool IsSymbol(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    760. {
    761. switch (src[i])
    762. {
    763. case '.':
    764. end = i + 1;
    765. type = Dot;
    766. return true;
    767. case ',':
    768. end = i + 1;
    769. type = Comma;
    770. return true;
    771. case '\'':
    772. end = i + 1;
    773. type = SingleQuotationMark;
    774. return true;
    775. case '"':
    776. end = i + 1;
    777. type = DoubleQuotationMark;
    778. return true;
    779. case ':':
    780. end = i + 1;
    781. type = Colon;
    782. return true;
    783. case ';':
    784. end = i + 1;
    785. type = Semicolon;
    786. return true;
    787. case '!':
    788. end = i + 1;
    789. type = ExclamationMark;
    790. return true;
    791. case '(':
    792. end = i + 1;
    793. type = OpenParenthesis;
    794. return true;
    795. case ')':
    796. end = i + 1;
    797. type = CloseParenthesis;
    798. return true;
    799. case '[':
    800. end = i + 1;
    801. type = OpenBrackets;
    802. return true;
    803. case ']':
    804. end = i + 1;
    805. type = CloseBrackets;
    806. return true;
    807. case '{':
    808. end = i + 1;
    809. type = OpenBraces;
    810. return true;
    811. case '}':
    812. end = i + 1;
    813. type = CloseBraces;
    814. return true;
    815. }
    816. // if all false then end became 0 and type unknown
    817. return IsAssignOperator(src, i, out end, out type)
    818. || IsOperator(src, i, out end, out type)
    819. || IsCompareOperator(src, i, out end, out type);
    820. }
    821. private static bool IsAssignOperator(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    822. {
    823. if (IsBinaryAssign(src, i, out end, out type) || IsMathmaticalAssign(src, i, out end, out type))
    824. {
    825. return true;
    826. }
    827. else if (src[i] is '=')
    828. {
    829. end = i + 1;
    830. type = Assign;
    831. return true;
    832. }
    833. end = 0;
    834. type = Unknown;
    835. return false;
    836. }
    837. private static bool IsBinaryAssign(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    838. {
    839. if (src[i + 1] is '=')
    840. {
    841. switch (src[i])
    842. {
    843. case '&':
    844. end = i + 2;
    845. type = AndEquals;
    846. return true;
    847. case '|':
    848. end = i + 2;
    849. type = OrEquals;
    850. return true;
    851. case '^':
    852. end = i + 2;
    853. type = XorEquals;
    854. return true;
    855. case '~':
    856. end = i + 2;
    857. type = InvertEquals;
    858. return true;
    859. }
    860. }
    861. end = 0;
    862. type = Unknown;
    863. return false;
    864. }
    865. private static bool IsMathmaticalAssign(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    866. {
    867. switch (src[i + 1])
    868. {
    869. case '=':
    870. switch (src[i])
    871. {
    872. case '+':
    873. end = i + 2;
    874. type = PlusEquals;
    875. return true;
    876. case '-':
    877. end = i + 2;
    878. type = MinusEquals;
    879. return true;
    880. case '*':
    881. end = i + 2;
    882. type = MultiplicationEquals;
    883. return true;
    884. case '/':
    885. end = i + 2;
    886. type = DivisionEquals;
    887. return true;
    888. case '%':
    889. end = i + 2;
    890. type = ModuloEquals;
    891. return true;
    892. }
    893. break;
    894. case '*':
    895. if (src[i] is '*' && src[i + 2] is '=')
    896. {
    897. end = i + 3;
    898. type = PowerEquals;
    899. return true;
    900. }
    901. break;
    902. }
    903. end = 0;
    904. type = Unknown;
    905. return false;
    906. }
    907. private static bool IsOperator(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    908. {
    909. return IsBinaryOperator(src, i, out end, out type)
    910. || IsMathmaticalOperatorOrComment(src, i, out end, out type);
    911. }
    912. private static bool IsBinaryOperator(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    913. {
    914. switch (src[i])
    915. {
    916. case '~':
    917. end = i + 1;
    918. type = Invert;
    919. return true;
    920. case '^':
    921. end = i + 1;
    922. type = Xor;
    923. return true;
    924. case '|':
    925. end = i + 1;
    926. type = Or;
    927. return true;
    928. case '&':
    929. end = i + 1;
    930. type = And;
    931. return true;
    932. }
    933. end = 0;
    934. type = Unknown;
    935. return false;
    936. }
    937. private static bool IsMathmaticalOperatorOrComment(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    938. {
    939. switch (src[i])
    940. {
    941. case '+':
    942. end = i + 1;
    943. type = Plus;
    944. return true;
    945. case '-':
    946. end = i + 1;
    947. type = Minus;
    948. return true;
    949. case '*':
    950. switch (src[i + 1])
    951. {
    952. case '*': // power
    953. end = i + 2;
    954. type = Power;
    955. return true;
    956. case '/': // comment
    957. end = i + 2;
    958. type = InlineCommentEnd;
    959. return true;
    960. }
    961. end = i + 1;
    962. type = Multiplication;
    963. return true;
    964. case '/':
    965. switch (src[i + 1])
    966. {
    967. case '*': // inline comment
    968. end = i + 2;
    969. type = InlineCommentStart;
    970. return true;
    971. case '/': // comment
    972. end = i + 2;
    973. type = SingleLineComment;
    974. return true;
    975. }
    976. end = i + 1;
    977. type = Division;
    978. return true;
    979. case '%':
    980. end = i + 1;
    981. type = Modulo;
    982. return true;
    983. }
    984. end = 0;
    985. type = Unknown;
    986. return false;
    987. }
    988. private static bool IsCompareOperator(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    989. {
    990. if (src[i + 1] is '=')
    991. {
    992. switch (src[i])
    993. {
    994. case '<':
    995. end = i + 2;
    996. type = LessThen;
    997. return true;
    998. case '>':
    999. end = i + 2;
    1000. type = GreaterThen;
    1001. return true;
    1002. case '=':
    1003. end = i + 2;
    1004. type = EqualsThen;
    1005. return true;
    1006. case '!':
    1007. end = i + 2;
    1008. type = NotEqualsThen;
    1009. return true;
    1010. }
    1011. }
    1012. end = 0;
    1013. type = Unknown;
    1014. return false;
    1015. }
    1016. // todo remove into the parser
    1017. private static bool IsIdentifier(ReadOnlySpan<char> src, int i, out int end, out LexerTokenType type)
    1018. {
    1019. // starts with [A-Za-z]
    1020. if (src[i] is '_' or (>= 'A' and <= 'Z') or (>= 'a' and <= 'z'))
    1021. {
    1022. i++;
    1023. // followed by [A-Za-z0-9_]*
    1024. while (i < src.Length && src[i] is '_' or (>= 'A' and <= 'Z') or (>= 'a' and <= 'z') or (>= '0' and <= '9'))
    1025. {
    1026. i++;
    1027. }
    1028. end = i;
    1029. type = Unknown;
    1030. return true;
    1031. }
    1032. end = 0;
    1033. type = Unknown;
    1034. return false;
    1035. }
    1036. }

    Lexer.cs
    Spoiler anzeigen

    C#-Quellcode

    1. using System.Collections.Concurrent;
    2. namespace CNative;
    3. internal class Lexer
    4. {
    5. private string _src;
    6. public ConcurrentQueue<LexerNode> level1LexerQueue;
    7. public ConcurrentQueue<LexerToken> level2LexerQueue;
    8. public Lexer(string src)
    9. {
    10. _src = src;
    11. }
    12. public async Task AnalyseAsync()
    13. {
    14. level1LexerQueue = new ConcurrentQueue<LexerNode>();
    15. level2LexerQueue = new ConcurrentQueue<LexerToken>();
    16. LexerLevel1 level1 = new LexerLevel1(_src, ref level1LexerQueue);
    17. LexerLevel2 level2 = new LexerLevel2(_src, ref level1LexerQueue, ref level2LexerQueue);
    18. CancellationTokenSource cts = new CancellationTokenSource();
    19. Task level1Task = level1.AnalyseAsync(cts, cts.Token);
    20. Task level2Task = level2.AnalyseAsync(cts.Token);
    21. await level1Task;
    22. await level2Task;
    23. }
    24. private static bool IsAlphabetic(ReadOnlySpan<char> src, int i, out int end)
    25. {
    26. if(src[i] is (>= 'A' and <= 'Z') or (>= 'a' and <= 'z'))
    27. {
    28. end = i + 1;
    29. return true;
    30. }
    31. end = 0;
    32. return false;
    33. }
    34. private static bool IsNumericLiteral(ReadOnlySpan<char> src, int i, out int end)
    35. {
    36. if (src[i] is >= '0' and <= '9')
    37. {
    38. end = i + 1;
    39. return true;
    40. }
    41. end = 0;
    42. return false;
    43. }
    44. }

    Example
    Spoiler anzeigen

    C#-Quellcode

    1. using CNative;
    2. Console.WriteLine("Hello, World!");
    3. string example = @"public static double[,] Transpose(this double[,] self)
    4. {
    5. double[,] trans = new double[self.Cols(), self.Rows()];
    6. for (int row = 0; row < self.Rows(); row++)
    7. {
    8. for (int col = 0; col < self.Cols(); col++)
    9. {
    10. trans[col, row] = self[row, col];
    11. }
    12. }
    13. return trans;
    14. }";
    15. Lexer lexer = new Lexer(example);
    16. await lexer.AnalyseAsync();
    17. ; // break me