| | 1 | | /* |
| | 2 | | * Copyright 2017 Stanislav Muhametsin. All rights Reserved. |
| | 3 | | * |
| | 4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
| | 5 | | * you may not use this file except in compliance with the License. |
| | 6 | | * You may obtain a copy of the License at |
| | 7 | | * |
| | 8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
| | 9 | | * |
| | 10 | | * Unless required by applicable law or agreed to in writing, software |
| | 11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
| | 12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| | 13 | | * implied. |
| | 14 | | * |
| | 15 | | * See the License for the specific language governing permissions and |
| | 16 | | * limitations under the License. |
| | 17 | | */ |
| | 18 | | using System; |
| | 19 | | using System.Collections.Generic; |
| | 20 | | using System.IO; |
| | 21 | | using System.Text; |
| | 22 | | using System.Threading.Tasks; |
| | 23 | | using UtilPack; |
| | 24 | |
|
| | 25 | | using TReader = UtilPack.PeekablePotentiallyAsyncReader<System.Char?>; |
| | 26 | |
|
| | 27 | | namespace CBAM.SQL.PostgreSQL.Implementation |
| | 28 | | { |
| | 29 | | internal static class Parser |
| | 30 | | { |
| | 31 | | // Helper class to keep track of how many chars has been read from the underlying reader |
| | 32 | | private sealed class TextReaderWrapper : TextReader |
| | 33 | | { |
| | 34 | | private readonly TextReader _reader; |
| | 35 | | private Int32 _charsRead; |
| | 36 | |
|
| 0 | 37 | | internal TextReaderWrapper( TextReader reader ) |
| | 38 | | { |
| 0 | 39 | | ArgumentValidator.ValidateNotNull( "Reader", reader ); |
| | 40 | |
|
| 0 | 41 | | this._reader = reader; |
| 0 | 42 | | this._charsRead = 0; |
| 0 | 43 | | } |
| | 44 | |
|
| | 45 | | public Int32 CharsRead |
| | 46 | | { |
| | 47 | | get |
| | 48 | | { |
| 0 | 49 | | return this._charsRead; |
| | 50 | | } |
| | 51 | | } |
| | 52 | |
|
| | 53 | | public override Int32 Read() |
| | 54 | | { |
| 0 | 55 | | var retVal = this._reader.Read(); |
| 0 | 56 | | if ( retVal != -1 ) |
| | 57 | | { |
| 0 | 58 | | ++this._charsRead; |
| | 59 | | } |
| 0 | 60 | | return retVal; |
| | 61 | | } |
| | 62 | |
|
| | 63 | | public override Int32 Peek() |
| | 64 | | { |
| 0 | 65 | | return this._reader.Peek(); |
| | 66 | | } |
| | 67 | |
|
| | 68 | | protected override void Dispose( bool disposing ) |
| | 69 | | { |
| | 70 | | // Do nothing - we don't want to close underlying reader. |
| 0 | 71 | | } |
| | 72 | | } |
| | 73 | |
|
| | 74 | | // Returns amount of characters read |
| | 75 | | internal static async ValueTask<Int32[]> ParseStringForNextSQLStatement( |
| | 76 | | TReader reader, |
| | 77 | | Boolean standardConformingStrings, |
| | 78 | | Func<Int32> onParameter |
| | 79 | | ) |
| | 80 | | { |
| 62 | 81 | | var parenthesisLevel = 0; |
| 62 | 82 | | List<Int32> paramIndicesList = null; |
| 62 | 83 | | var queryEndEncountered = false; |
| 124 | 84 | | Char? prev1 = null, prev2 = null; |
| | 85 | | Char? c; |
| | 86 | |
|
| 1671 | 87 | | while ( !queryEndEncountered && ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | 88 | | { |
| 1609 | 89 | | switch ( c ) |
| | 90 | | { |
| | 91 | | case '\'': |
| 90 | 92 | | await ParseSingleQuotes( reader, standardConformingStrings, prev1, prev2 ); |
| 90 | 93 | | break; |
| | 94 | | case '"': |
| 0 | 95 | | await ParseDoubleQuotes( reader ); |
| 0 | 96 | | break; |
| | 97 | | case '-': |
| 0 | 98 | | await ParseLineComment( reader ); |
| 0 | 99 | | break; |
| | 100 | | case '/': |
| 0 | 101 | | await ParseBlockComment( reader ); |
| 0 | 102 | | break; |
| | 103 | | case '$': |
| 0 | 104 | | await ParseDollarQuotes( reader, prev1 ); |
| 0 | 105 | | break; |
| | 106 | | case '(': |
| 28 | 107 | | ++parenthesisLevel; |
| 28 | 108 | | break; |
| | 109 | | case ')': |
| 24 | 110 | | --parenthesisLevel; |
| 24 | 111 | | break; |
| | 112 | | case '?': |
| 14 | 113 | | if ( onParameter != null ) |
| | 114 | | { |
| 14 | 115 | | if ( paramIndicesList == null ) |
| | 116 | | { |
| 10 | 117 | | paramIndicesList = new List<Int32>(); |
| | 118 | | } |
| 14 | 119 | | paramIndicesList.Add( onParameter() ); |
| | 120 | | } |
| 14 | 121 | | break; |
| | 122 | | case ';': |
| 2 | 123 | | if ( parenthesisLevel == 0 ) |
| | 124 | | { |
| 2 | 125 | | queryEndEncountered = true; |
| | 126 | | } |
| | 127 | | break; |
| | 128 | | } |
| 1609 | 129 | | prev2 = prev1; |
| 1609 | 130 | | prev1 = c; |
| | 131 | |
|
| | 132 | | } |
| | 133 | |
|
| 62 | 134 | | return paramIndicesList == null ? null : paramIndicesList.ToArray(); |
| | 135 | |
|
| 62 | 136 | | } |
| | 137 | |
|
| | 138 | |
|
| | 139 | | // See http://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html for String Constants with C-style Escapes |
| | 140 | | // Returns index of the single quote character ending this single quote sequence |
| | 141 | | internal static async ValueTask<Boolean> ParseSingleQuotes( |
| | 142 | | TReader reader, |
| | 143 | | Boolean standardConformingStrings, |
| | 144 | | Char? prev1, |
| | 145 | | Char? prev2 |
| | 146 | | ) |
| | 147 | | { |
| | 148 | | Char? c; |
| 90 | 149 | | if ( !standardConformingStrings |
| 90 | 150 | | && prev1.HasValue |
| 90 | 151 | | && prev2.HasValue |
| 90 | 152 | | && ( prev1 == 'e' || prev1 == 'E' ) |
| 90 | 153 | | && CharTerminatesIdentifier( prev2.Value ) |
| 90 | 154 | | ) |
| | 155 | | { |
| | 156 | | // C-Style escaping |
| | 157 | | // Treat backslashes as escape character |
| 0 | 158 | | Char prev = '\0'; |
| 0 | 159 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | 160 | | { |
| 0 | 161 | | if ( c != '\\' && prev != '\\' && await CheckSingleQuote( reader, c.Value ) ) |
| | 162 | | { |
| | 163 | | break; |
| | 164 | | } |
| 0 | 165 | | prev = c.Value; |
| | 166 | | } |
| 0 | 167 | | } |
| | 168 | | else |
| | 169 | | { |
| | 170 | | // Don't treat backslashes as escape character |
| 1063 | 171 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue && !await CheckSingleQuote( reader, c.Value ) ) ; |
| | 172 | | } |
| | 173 | |
|
| 90 | 174 | | return true; |
| 90 | 175 | | } |
| | 176 | |
|
| | 177 | | internal static async ValueTask<Boolean> ParseDoubleQuotes( |
| | 178 | | TReader reader |
| | 179 | | ) |
| | 180 | | { |
| | 181 | | Char? c; |
| 0 | 182 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | 183 | | { |
| 0 | 184 | | if ( c == '"' ) |
| | 185 | | { |
| | 186 | | // Check for double-doublequote |
| 0 | 187 | | if ( ( await reader.TryPeekAsync() ).IsOfValue( '"' ) ) |
| | 188 | | { |
| 0 | 189 | | await reader.ReadNextAsync(); |
| | 190 | | } |
| | 191 | | else |
| | 192 | | { |
| | 193 | | break; |
| | 194 | | } |
| | 195 | | } |
| | 196 | | } |
| | 197 | |
|
| 0 | 198 | | return true; |
| 0 | 199 | | } |
| | 200 | |
|
| | 201 | | internal static async ValueTask<Boolean> ParseLineComment( |
| | 202 | | TReader reader |
| | 203 | | ) |
| | 204 | | { |
| 0 | 205 | | if ( ( await reader.TryPeekAsync() ).IsOfValue( '-' ) ) |
| | 206 | | { |
| | 207 | | // Line comment starting |
| | 208 | | Char? c; |
| 0 | 209 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue && c != '\r' && c != '\n' ) ; |
| | 210 | | } |
| 0 | 211 | | return true; |
| 0 | 212 | | } |
| | 213 | |
|
| | 214 | |
|
| | 215 | | internal static async ValueTask<Boolean> ParseBlockComment( |
| | 216 | | TReader reader |
| | 217 | | ) |
| | 218 | | { |
| 0 | 219 | | if ( ( await reader.TryPeekAsync() ).IsOfValue( '*' ) ) |
| | 220 | | { |
| | 221 | | // Block comment starting |
| | 222 | | // SQL spec says block comments nest |
| 0 | 223 | | var level = 1; |
| 0 | 224 | | await reader.ReadNextAsync(); |
| 0 | 225 | | Char? prev = null; |
| 0 | 226 | | Char? cur = null; |
| 0 | 227 | | var levelChanged = false; |
| 0 | 228 | | while ( level != 0 && ( cur = await reader.ReadNextAsync() ).HasValue ) |
| | 229 | | { |
| 0 | 230 | | var oldLevel = level; |
| 0 | 231 | | if ( !levelChanged ) // Don't process '*/*' or '/*/' twice |
| | 232 | | { |
| 0 | 233 | | if ( prev.HasValue ) |
| | 234 | | { |
| 0 | 235 | | if ( prev == '*' && cur == '/' ) |
| | 236 | | { |
| | 237 | | // Block comment ending |
| 0 | 238 | | --level; |
| 0 | 239 | | } |
| 0 | 240 | | else if ( prev == '/' && cur == '*' ) |
| | 241 | | { |
| | 242 | | // Nested block comment |
| 0 | 243 | | ++level; |
| | 244 | | } |
| | 245 | | } |
| | 246 | | } |
| | 247 | |
|
| 0 | 248 | | levelChanged = level != oldLevel; |
| 0 | 249 | | prev = cur; |
| | 250 | | } |
| 0 | 251 | | } |
| | 252 | |
|
| 0 | 253 | | return true; |
| 0 | 254 | | } |
| | 255 | |
|
| | 256 | | // See http://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html for dollar quote spec |
| | 257 | | internal static async ValueTask<Boolean> ParseDollarQuotes( |
| | 258 | | TReader reader, |
| | 259 | | Char? prev |
| | 260 | | ) |
| | 261 | | { |
| 0 | 262 | | var c = await reader.TryPeekAsync(); |
| 0 | 263 | | if ( c.HasValue && ( !prev.HasValue || !IsIdentifierContinuationCharacter( prev.Value ) ) ) |
| | 264 | | { |
| 0 | 265 | | Char[] tag = null; |
| 0 | 266 | | if ( c == '$' ) |
| | 267 | | { |
| 0 | 268 | | tag = Empty<Char>.Array; |
| 0 | 269 | | } |
| 0 | 270 | | else if ( IsDollarQuoteTagStartCharacter( c.Value ) ) |
| | 271 | | { |
| 0 | 272 | | var list = new List<Char>(); |
| 0 | 273 | | while ( ( c = await reader.TryPeekAsync() ).HasValue ) |
| | 274 | | { |
| 0 | 275 | | if ( c == '$' ) |
| | 276 | | { |
| 0 | 277 | | tag = list.ToArray(); |
| 0 | 278 | | break; |
| | 279 | | } |
| 0 | 280 | | else if ( !IsDollarQuoteTagContinuationCharacter( c.Value ) ) |
| | 281 | | { |
| | 282 | | break; |
| | 283 | | } |
| | 284 | | else |
| | 285 | | { |
| 0 | 286 | | list.Add( await reader.ReadNextAsync() ); |
| | 287 | | } |
| | 288 | | } |
| 0 | 289 | | } |
| | 290 | |
|
| 0 | 291 | | if ( tag != null ) |
| | 292 | | { |
| | 293 | | // Read the tag-ending dollar sign |
| 0 | 294 | | await reader.ReadNextAsync(); |
| 0 | 295 | | var tagLen = tag.Length; |
| | 296 | |
|
| 0 | 297 | | var isEmptyTag = tagLen == 0; |
| 0 | 298 | | var array = isEmptyTag ? null : new Char[tagLen]; |
| 0 | 299 | | var arrayIdx = tagLen - 1; |
| 0 | 300 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | 301 | | { |
| 0 | 302 | | if ( c == '$' ) |
| | 303 | | { |
| | 304 | | // Check if this is double-dollar-sign for empty tag, or that previous characters are same as tag |
| 0 | 305 | | if ( isEmptyTag && prev == '$' ) |
| | 306 | | { |
| | 307 | | break; |
| | 308 | | } |
| 0 | 309 | | else if ( !isEmptyTag && CheckForCircularlyFilledArray( tag, tagLen, array, arrayIdx ) ) |
| | 310 | | { |
| | 311 | | break; |
| | 312 | | } |
| | 313 | | } |
| | 314 | |
|
| 0 | 315 | | if ( !isEmptyTag ) |
| | 316 | | { |
| 0 | 317 | | if ( tag.Length > 1 ) |
| | 318 | | { |
| 0 | 319 | | if ( arrayIdx == tag.Length - 1 ) |
| | 320 | | { |
| 0 | 321 | | arrayIdx = 0; |
| 0 | 322 | | } |
| | 323 | | else |
| | 324 | | { |
| 0 | 325 | | ++arrayIdx; |
| | 326 | | } |
| | 327 | | } |
| 0 | 328 | | array[arrayIdx] = (Char) c; |
| | 329 | | } |
| | 330 | |
|
| 0 | 331 | | prev = c; |
| | 332 | | } |
| 0 | 333 | | } |
| 0 | 334 | | } |
| | 335 | |
|
| 0 | 336 | | return true; |
| 0 | 337 | | } |
| | 338 | |
|
| | 339 | | // Returns true if this character ends string literal |
| | 340 | | private static async ValueTask<Boolean> CheckSingleQuote( |
| | 341 | | TReader reader, |
| | 342 | | Char prevChar |
| | 343 | | ) |
| | 344 | | { |
| 1063 | 345 | | var retVal = prevChar == '\''; |
| 1063 | 346 | | if ( retVal ) |
| | 347 | | { |
| | 348 | | Char? peek; |
| 90 | 349 | | if ( ( peek = await reader.TryPeekAsync() ).HasValue ) |
| | 350 | | { |
| | 351 | | // Check for double quotes |
| 89 | 352 | | if ( peek == '\'' ) |
| | 353 | | { |
| 0 | 354 | | await reader.ReadNextAsync(); |
| 0 | 355 | | retVal = false; |
| 0 | 356 | | } |
| 89 | 357 | | else if ( peek == '\n' || peek == '\r' ) |
| | 358 | | { |
| | 359 | | // Check for newline-separated string literal ( http://www.postgresql.org/docs/9.1/static/sql-syntax-l |
| 0 | 360 | | while ( peek.HasValue && peek == '\n' || peek == '\r' ) |
| | 361 | | { |
| 0 | 362 | | peek = await reader.ReadNextAsync(); |
| | 363 | | } |
| | 364 | |
|
| 0 | 365 | | if ( peek.HasValue && peek == '\'' ) |
| | 366 | | { |
| 0 | 367 | | retVal = false; |
| | 368 | | } |
| | 369 | | } |
| | 370 | | } |
| | 371 | | } |
| | 372 | |
|
| 1063 | 373 | | return retVal; |
| 1063 | 374 | | } |
| | 375 | |
|
| | 376 | | // Returns true if character terminates identifier in backend parser |
| | 377 | | private static Boolean CharTerminatesIdentifier( Char c ) |
| | 378 | | { |
| 0 | 379 | | return c == '"' || IsSpace( c ) || IsOperatorChar( c ); |
| | 380 | | } |
| | 381 | |
|
| | 382 | | // The functions below must be kept in sync with logic of pgsql/src/backend/parser/scan.l |
| | 383 | |
|
| | 384 | | // Returns true if character is treated as space character in backend parser |
| | 385 | | internal static Boolean IsSpace( Char c ) |
| | 386 | | { |
| 0 | 387 | | return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; |
| | 388 | | } |
| | 389 | |
|
| | 390 | | // Returns true if the given character is a valid character for an operator in backend parser |
| | 391 | | private static Boolean IsOperatorChar( Char c ) |
| | 392 | | { |
| | 393 | | /* |
| | 394 | | * Extracted from operators defined by {self} and {op_chars} |
| | 395 | | * in pgsql/src/backend/parser/scan.l. |
| | 396 | | */ |
| 0 | 397 | | return ",()[].;:+-*/%^<>=~!@#&|`?".IndexOf( c ) != -1; |
| | 398 | | } |
| | 399 | |
|
| | 400 | | // Checks wehether character is valid as second or later character of an identifier |
| | 401 | | private static Boolean IsIdentifierContinuationCharacter( Char c ) |
| | 402 | | { |
| 0 | 403 | | return ( c >= 'a' && c <= 'z' ) |
| 0 | 404 | | || ( c >= 'A' && c <= 'Z' ) |
| 0 | 405 | | || c == '_' |
| 0 | 406 | | || c > 127 |
| 0 | 407 | | || ( c >= '0' && c <= '9' ) |
| 0 | 408 | | || c == '$'; |
| | 409 | | } |
| | 410 | |
|
| | 411 | | // Checks wthether character is valid as first character of dollar quote tag |
| | 412 | | private static Boolean IsDollarQuoteTagStartCharacter( Char c ) |
| | 413 | | { |
| 0 | 414 | | return ( c >= 'a' && c <= 'z' ) |
| 0 | 415 | | || ( c >= 'A' && c <= 'Z' ) |
| 0 | 416 | | || c == '_' |
| 0 | 417 | | || c > 127; |
| | 418 | | } |
| | 419 | |
|
| | 420 | | // Checks whether character is valid as second or later character of dollar quote tag |
| | 421 | | private static Boolean IsDollarQuoteTagContinuationCharacter( Char c ) |
| | 422 | | { |
| 0 | 423 | | return ( c >= 'a' && c <= 'z' ) |
| 0 | 424 | | || ( c >= 'A' && c <= 'Z' ) |
| 0 | 425 | | || c == '_' |
| 0 | 426 | | || ( c >= '0' && c <= '9' ) |
| 0 | 427 | | || c > 127; |
| | 428 | |
|
| | 429 | | } |
| | 430 | |
|
| | 431 | | // auxArrayIndex = index of last set character in auxArray |
| | 432 | | internal static Boolean CheckForCircularlyFilledArray( Char[] referenceDataArray, Int32 refLen, Char[] auxArray, I |
| | 433 | | { |
| 0 | 434 | | var min = auxArrayIndex + 1 - refLen; |
| 0 | 435 | | var i = refLen - 1; |
| 0 | 436 | | if ( min >= 0 ) |
| | 437 | | { |
| | 438 | | // Enough to check that last auxLen chars are same (do check backwards) |
| 0 | 439 | | for ( var j = auxArrayIndex; i >= 0; --i, --j ) |
| | 440 | | { |
| 0 | 441 | | if ( referenceDataArray[i] != auxArray[j] ) |
| | 442 | | { |
| 0 | 443 | | return false; |
| | 444 | | } |
| | 445 | | } |
| 0 | 446 | | } |
| | 447 | | else |
| | 448 | | { |
| 0 | 449 | | var j = auxArrayIndex; |
| 0 | 450 | | for ( ; j >= 0; --j, --i ) |
| | 451 | | { |
| 0 | 452 | | if ( referenceDataArray[i] != auxArray[j] ) |
| | 453 | | { |
| 0 | 454 | | return false; |
| | 455 | | } |
| | 456 | | } |
| | 457 | |
|
| 0 | 458 | | for ( j = auxArray.Length - 1; i >= 0; --i, --j ) |
| | 459 | | { |
| 0 | 460 | | if ( referenceDataArray[i] != auxArray[j] ) |
| | 461 | | { |
| 0 | 462 | | return false; |
| | 463 | | } |
| | 464 | | } |
| | 465 | | } |
| | 466 | |
|
| 0 | 467 | | return true; |
| | 468 | | } |
| | 469 | | } |
| | 470 | | } |
| | 471 | |
|
| | 472 | | public static partial class E_CBAM |
| | 473 | | { |
| | 474 | | internal static Boolean IsOfValue( this Char? nullable, Char value ) |
| | 475 | | { |
| | 476 | | return nullable.HasValue && nullable.Value == value; |
| | 477 | | } |
| | 478 | | } |