| | | 1 | | /* |
| | | 2 | | * Copyright 2017 Stanislav Muhametsin. All rights Reserved. |
| | | 3 | | * |
| | | 4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
| | | 5 | | * you may not use this file except in compliance with the License. |
| | | 6 | | * You may obtain a copy of the License at |
| | | 7 | | * |
| | | 8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
| | | 9 | | * |
| | | 10 | | * Unless required by applicable law or agreed to in writing, software |
| | | 11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
| | | 12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| | | 13 | | * implied. |
| | | 14 | | * |
| | | 15 | | * See the License for the specific language governing permissions and |
| | | 16 | | * limitations under the License. |
| | | 17 | | */ |
| | | 18 | | using System; |
| | | 19 | | using System.Collections.Generic; |
| | | 20 | | using System.IO; |
| | | 21 | | using System.Text; |
| | | 22 | | using System.Threading.Tasks; |
| | | 23 | | using UtilPack; |
| | | 24 | | |
| | | 25 | | using TReader = UtilPack.PeekablePotentiallyAsyncReader<System.Char?>; |
| | | 26 | | |
| | | 27 | | namespace CBAM.SQL.PostgreSQL.Implementation |
| | | 28 | | { |
| | | 29 | | internal static class Parser |
| | | 30 | | { |
| | | 31 | | // Helper class to keep track of how many chars has been read from the underlying reader |
| | | 32 | | private sealed class TextReaderWrapper : TextReader |
| | | 33 | | { |
| | | 34 | | private readonly TextReader _reader; |
| | | 35 | | private Int32 _charsRead; |
| | | 36 | | |
| | 0 | 37 | | internal TextReaderWrapper( TextReader reader ) |
| | | 38 | | { |
| | 0 | 39 | | ArgumentValidator.ValidateNotNull( "Reader", reader ); |
| | | 40 | | |
| | 0 | 41 | | this._reader = reader; |
| | 0 | 42 | | this._charsRead = 0; |
| | 0 | 43 | | } |
| | | 44 | | |
| | | 45 | | public Int32 CharsRead |
| | | 46 | | { |
| | | 47 | | get |
| | | 48 | | { |
| | 0 | 49 | | return this._charsRead; |
| | | 50 | | } |
| | | 51 | | } |
| | | 52 | | |
| | | 53 | | public override Int32 Read() |
| | | 54 | | { |
| | 0 | 55 | | var retVal = this._reader.Read(); |
| | 0 | 56 | | if ( retVal != -1 ) |
| | | 57 | | { |
| | 0 | 58 | | ++this._charsRead; |
| | | 59 | | } |
| | 0 | 60 | | return retVal; |
| | | 61 | | } |
| | | 62 | | |
| | | 63 | | public override Int32 Peek() |
| | | 64 | | { |
| | 0 | 65 | | return this._reader.Peek(); |
| | | 66 | | } |
| | | 67 | | |
| | | 68 | | protected override void Dispose( bool disposing ) |
| | | 69 | | { |
| | | 70 | | // Do nothing - we don't want to close underlying reader. |
| | 0 | 71 | | } |
| | | 72 | | } |
| | | 73 | | |
| | | 74 | | // Returns amount of characters read |
| | | 75 | | internal static async ValueTask<Int32[]> ParseStringForNextSQLStatement( |
| | | 76 | | TReader reader, |
| | | 77 | | Boolean standardConformingStrings, |
| | | 78 | | Func<Int32> onParameter |
| | | 79 | | ) |
| | | 80 | | { |
| | 62 | 81 | | var parenthesisLevel = 0; |
| | 62 | 82 | | List<Int32> paramIndicesList = null; |
| | 62 | 83 | | var queryEndEncountered = false; |
| | 124 | 84 | | Char? prev1 = null, prev2 = null; |
| | | 85 | | Char? c; |
| | | 86 | | |
| | 1671 | 87 | | while ( !queryEndEncountered && ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | | 88 | | { |
| | 1609 | 89 | | switch ( c ) |
| | | 90 | | { |
| | | 91 | | case '\'': |
| | 90 | 92 | | await ParseSingleQuotes( reader, standardConformingStrings, prev1, prev2 ); |
| | 90 | 93 | | break; |
| | | 94 | | case '"': |
| | 0 | 95 | | await ParseDoubleQuotes( reader ); |
| | 0 | 96 | | break; |
| | | 97 | | case '-': |
| | 0 | 98 | | await ParseLineComment( reader ); |
| | 0 | 99 | | break; |
| | | 100 | | case '/': |
| | 0 | 101 | | await ParseBlockComment( reader ); |
| | 0 | 102 | | break; |
| | | 103 | | case '$': |
| | 0 | 104 | | await ParseDollarQuotes( reader, prev1 ); |
| | 0 | 105 | | break; |
| | | 106 | | case '(': |
| | 28 | 107 | | ++parenthesisLevel; |
| | 28 | 108 | | break; |
| | | 109 | | case ')': |
| | 24 | 110 | | --parenthesisLevel; |
| | 24 | 111 | | break; |
| | | 112 | | case '?': |
| | 14 | 113 | | if ( onParameter != null ) |
| | | 114 | | { |
| | 14 | 115 | | if ( paramIndicesList == null ) |
| | | 116 | | { |
| | 10 | 117 | | paramIndicesList = new List<Int32>(); |
| | | 118 | | } |
| | 14 | 119 | | paramIndicesList.Add( onParameter() ); |
| | | 120 | | } |
| | 14 | 121 | | break; |
| | | 122 | | case ';': |
| | 2 | 123 | | if ( parenthesisLevel == 0 ) |
| | | 124 | | { |
| | 2 | 125 | | queryEndEncountered = true; |
| | | 126 | | } |
| | | 127 | | break; |
| | | 128 | | } |
| | 1609 | 129 | | prev2 = prev1; |
| | 1609 | 130 | | prev1 = c; |
| | | 131 | | |
| | | 132 | | } |
| | | 133 | | |
| | 62 | 134 | | return paramIndicesList == null ? null : paramIndicesList.ToArray(); |
| | | 135 | | |
| | 62 | 136 | | } |
| | | 137 | | |
| | | 138 | | |
| | | 139 | | // See http://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html for String Constants with C-style Escapes |
| | | 140 | | // Returns index of the single quote character ending this single quote sequence |
| | | 141 | | internal static async ValueTask<Boolean> ParseSingleQuotes( |
| | | 142 | | TReader reader, |
| | | 143 | | Boolean standardConformingStrings, |
| | | 144 | | Char? prev1, |
| | | 145 | | Char? prev2 |
| | | 146 | | ) |
| | | 147 | | { |
| | | 148 | | Char? c; |
| | 90 | 149 | | if ( !standardConformingStrings |
| | 90 | 150 | | && prev1.HasValue |
| | 90 | 151 | | && prev2.HasValue |
| | 90 | 152 | | && ( prev1 == 'e' || prev1 == 'E' ) |
| | 90 | 153 | | && CharTerminatesIdentifier( prev2.Value ) |
| | 90 | 154 | | ) |
| | | 155 | | { |
| | | 156 | | // C-Style escaping |
| | | 157 | | // Treat backslashes as escape character |
| | 0 | 158 | | Char prev = '\0'; |
| | 0 | 159 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | | 160 | | { |
| | 0 | 161 | | if ( c != '\\' && prev != '\\' && await CheckSingleQuote( reader, c.Value ) ) |
| | | 162 | | { |
| | | 163 | | break; |
| | | 164 | | } |
| | 0 | 165 | | prev = c.Value; |
| | | 166 | | } |
| | 0 | 167 | | } |
| | | 168 | | else |
| | | 169 | | { |
| | | 170 | | // Don't treat backslashes as escape character |
| | 1063 | 171 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue && !await CheckSingleQuote( reader, c.Value ) ) ; |
| | | 172 | | } |
| | | 173 | | |
| | 90 | 174 | | return true; |
| | 90 | 175 | | } |
| | | 176 | | |
| | | 177 | | internal static async ValueTask<Boolean> ParseDoubleQuotes( |
| | | 178 | | TReader reader |
| | | 179 | | ) |
| | | 180 | | { |
| | | 181 | | Char? c; |
| | 0 | 182 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | | 183 | | { |
| | 0 | 184 | | if ( c == '"' ) |
| | | 185 | | { |
| | | 186 | | // Check for double-doublequote |
| | 0 | 187 | | if ( ( await reader.TryPeekAsync() ).IsOfValue( '"' ) ) |
| | | 188 | | { |
| | 0 | 189 | | await reader.ReadNextAsync(); |
| | | 190 | | } |
| | | 191 | | else |
| | | 192 | | { |
| | | 193 | | break; |
| | | 194 | | } |
| | | 195 | | } |
| | | 196 | | } |
| | | 197 | | |
| | 0 | 198 | | return true; |
| | 0 | 199 | | } |
| | | 200 | | |
| | | 201 | | internal static async ValueTask<Boolean> ParseLineComment( |
| | | 202 | | TReader reader |
| | | 203 | | ) |
| | | 204 | | { |
| | 0 | 205 | | if ( ( await reader.TryPeekAsync() ).IsOfValue( '-' ) ) |
| | | 206 | | { |
| | | 207 | | // Line comment starting |
| | | 208 | | Char? c; |
| | 0 | 209 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue && c != '\r' && c != '\n' ) ; |
| | | 210 | | } |
| | 0 | 211 | | return true; |
| | 0 | 212 | | } |
| | | 213 | | |
| | | 214 | | |
| | | 215 | | internal static async ValueTask<Boolean> ParseBlockComment( |
| | | 216 | | TReader reader |
| | | 217 | | ) |
| | | 218 | | { |
| | 0 | 219 | | if ( ( await reader.TryPeekAsync() ).IsOfValue( '*' ) ) |
| | | 220 | | { |
| | | 221 | | // Block comment starting |
| | | 222 | | // SQL spec says block comments nest |
| | 0 | 223 | | var level = 1; |
| | 0 | 224 | | await reader.ReadNextAsync(); |
| | 0 | 225 | | Char? prev = null; |
| | 0 | 226 | | Char? cur = null; |
| | 0 | 227 | | var levelChanged = false; |
| | 0 | 228 | | while ( level != 0 && ( cur = await reader.ReadNextAsync() ).HasValue ) |
| | | 229 | | { |
| | 0 | 230 | | var oldLevel = level; |
| | 0 | 231 | | if ( !levelChanged ) // Don't process '*/*' or '/*/' twice |
| | | 232 | | { |
| | 0 | 233 | | if ( prev.HasValue ) |
| | | 234 | | { |
| | 0 | 235 | | if ( prev == '*' && cur == '/' ) |
| | | 236 | | { |
| | | 237 | | // Block comment ending |
| | 0 | 238 | | --level; |
| | 0 | 239 | | } |
| | 0 | 240 | | else if ( prev == '/' && cur == '*' ) |
| | | 241 | | { |
| | | 242 | | // Nested block comment |
| | 0 | 243 | | ++level; |
| | | 244 | | } |
| | | 245 | | } |
| | | 246 | | } |
| | | 247 | | |
| | 0 | 248 | | levelChanged = level != oldLevel; |
| | 0 | 249 | | prev = cur; |
| | | 250 | | } |
| | 0 | 251 | | } |
| | | 252 | | |
| | 0 | 253 | | return true; |
| | 0 | 254 | | } |
| | | 255 | | |
| | | 256 | | // See http://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html for dollar quote spec |
| | | 257 | | internal static async ValueTask<Boolean> ParseDollarQuotes( |
| | | 258 | | TReader reader, |
| | | 259 | | Char? prev |
| | | 260 | | ) |
| | | 261 | | { |
| | 0 | 262 | | var c = await reader.TryPeekAsync(); |
| | 0 | 263 | | if ( c.HasValue && ( !prev.HasValue || !IsIdentifierContinuationCharacter( prev.Value ) ) ) |
| | | 264 | | { |
| | 0 | 265 | | Char[] tag = null; |
| | 0 | 266 | | if ( c == '$' ) |
| | | 267 | | { |
| | 0 | 268 | | tag = Empty<Char>.Array; |
| | 0 | 269 | | } |
| | 0 | 270 | | else if ( IsDollarQuoteTagStartCharacter( c.Value ) ) |
| | | 271 | | { |
| | 0 | 272 | | var list = new List<Char>(); |
| | 0 | 273 | | while ( ( c = await reader.TryPeekAsync() ).HasValue ) |
| | | 274 | | { |
| | 0 | 275 | | if ( c == '$' ) |
| | | 276 | | { |
| | 0 | 277 | | tag = list.ToArray(); |
| | 0 | 278 | | break; |
| | | 279 | | } |
| | 0 | 280 | | else if ( !IsDollarQuoteTagContinuationCharacter( c.Value ) ) |
| | | 281 | | { |
| | | 282 | | break; |
| | | 283 | | } |
| | | 284 | | else |
| | | 285 | | { |
| | 0 | 286 | | list.Add( await reader.ReadNextAsync() ); |
| | | 287 | | } |
| | | 288 | | } |
| | 0 | 289 | | } |
| | | 290 | | |
| | 0 | 291 | | if ( tag != null ) |
| | | 292 | | { |
| | | 293 | | // Read the tag-ending dollar sign |
| | 0 | 294 | | await reader.ReadNextAsync(); |
| | 0 | 295 | | var tagLen = tag.Length; |
| | | 296 | | |
| | 0 | 297 | | var isEmptyTag = tagLen == 0; |
| | 0 | 298 | | var array = isEmptyTag ? null : new Char[tagLen]; |
| | 0 | 299 | | var arrayIdx = tagLen - 1; |
| | 0 | 300 | | while ( ( c = await reader.TryReadNextAsync() ).HasValue ) |
| | | 301 | | { |
| | 0 | 302 | | if ( c == '$' ) |
| | | 303 | | { |
| | | 304 | | // Check if this is double-dollar-sign for empty tag, or that previous characters are same as tag |
| | 0 | 305 | | if ( isEmptyTag && prev == '$' ) |
| | | 306 | | { |
| | | 307 | | break; |
| | | 308 | | } |
| | 0 | 309 | | else if ( !isEmptyTag && CheckForCircularlyFilledArray( tag, tagLen, array, arrayIdx ) ) |
| | | 310 | | { |
| | | 311 | | break; |
| | | 312 | | } |
| | | 313 | | } |
| | | 314 | | |
| | 0 | 315 | | if ( !isEmptyTag ) |
| | | 316 | | { |
| | 0 | 317 | | if ( tag.Length > 1 ) |
| | | 318 | | { |
| | 0 | 319 | | if ( arrayIdx == tag.Length - 1 ) |
| | | 320 | | { |
| | 0 | 321 | | arrayIdx = 0; |
| | 0 | 322 | | } |
| | | 323 | | else |
| | | 324 | | { |
| | 0 | 325 | | ++arrayIdx; |
| | | 326 | | } |
| | | 327 | | } |
| | 0 | 328 | | array[arrayIdx] = (Char) c; |
| | | 329 | | } |
| | | 330 | | |
| | 0 | 331 | | prev = c; |
| | | 332 | | } |
| | 0 | 333 | | } |
| | 0 | 334 | | } |
| | | 335 | | |
| | 0 | 336 | | return true; |
| | 0 | 337 | | } |
| | | 338 | | |
| | | 339 | | // Returns true if this character ends string literal |
| | | 340 | | private static async ValueTask<Boolean> CheckSingleQuote( |
| | | 341 | | TReader reader, |
| | | 342 | | Char prevChar |
| | | 343 | | ) |
| | | 344 | | { |
| | 1063 | 345 | | var retVal = prevChar == '\''; |
| | 1063 | 346 | | if ( retVal ) |
| | | 347 | | { |
| | | 348 | | Char? peek; |
| | 90 | 349 | | if ( ( peek = await reader.TryPeekAsync() ).HasValue ) |
| | | 350 | | { |
| | | 351 | | // Check for double quotes |
| | 89 | 352 | | if ( peek == '\'' ) |
| | | 353 | | { |
| | 0 | 354 | | await reader.ReadNextAsync(); |
| | 0 | 355 | | retVal = false; |
| | 0 | 356 | | } |
| | 89 | 357 | | else if ( peek == '\n' || peek == '\r' ) |
| | | 358 | | { |
| | | 359 | | // Check for newline-separated string literal ( http://www.postgresql.org/docs/9.1/static/sql-syntax-l |
| | 0 | 360 | | while ( peek.HasValue && peek == '\n' || peek == '\r' ) |
| | | 361 | | { |
| | 0 | 362 | | peek = await reader.ReadNextAsync(); |
| | | 363 | | } |
| | | 364 | | |
| | 0 | 365 | | if ( peek.HasValue && peek == '\'' ) |
| | | 366 | | { |
| | 0 | 367 | | retVal = false; |
| | | 368 | | } |
| | | 369 | | } |
| | | 370 | | } |
| | | 371 | | } |
| | | 372 | | |
| | 1063 | 373 | | return retVal; |
| | 1063 | 374 | | } |
| | | 375 | | |
| | | 376 | | // Returns true if character terminates identifier in backend parser |
| | | 377 | | private static Boolean CharTerminatesIdentifier( Char c ) |
| | | 378 | | { |
| | 0 | 379 | | return c == '"' || IsSpace( c ) || IsOperatorChar( c ); |
| | | 380 | | } |
| | | 381 | | |
| | | 382 | | // The functions below must be kept in sync with logic of pgsql/src/backend/parser/scan.l |
| | | 383 | | |
| | | 384 | | // Returns true if character is treated as space character in backend parser |
| | | 385 | | internal static Boolean IsSpace( Char c ) |
| | | 386 | | { |
| | 0 | 387 | | return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; |
| | | 388 | | } |
| | | 389 | | |
| | | 390 | | // Returns true if the given character is a valid character for an operator in backend parser |
| | | 391 | | private static Boolean IsOperatorChar( Char c ) |
| | | 392 | | { |
| | | 393 | | /* |
| | | 394 | | * Extracted from operators defined by {self} and {op_chars} |
| | | 395 | | * in pgsql/src/backend/parser/scan.l. |
| | | 396 | | */ |
| | 0 | 397 | | return ",()[].;:+-*/%^<>=~!@#&|`?".IndexOf( c ) != -1; |
| | | 398 | | } |
| | | 399 | | |
| | | 400 | | // Checks wehether character is valid as second or later character of an identifier |
| | | 401 | | private static Boolean IsIdentifierContinuationCharacter( Char c ) |
| | | 402 | | { |
| | 0 | 403 | | return ( c >= 'a' && c <= 'z' ) |
| | 0 | 404 | | || ( c >= 'A' && c <= 'Z' ) |
| | 0 | 405 | | || c == '_' |
| | 0 | 406 | | || c > 127 |
| | 0 | 407 | | || ( c >= '0' && c <= '9' ) |
| | 0 | 408 | | || c == '$'; |
| | | 409 | | } |
| | | 410 | | |
| | | 411 | | // Checks wthether character is valid as first character of dollar quote tag |
| | | 412 | | private static Boolean IsDollarQuoteTagStartCharacter( Char c ) |
| | | 413 | | { |
| | 0 | 414 | | return ( c >= 'a' && c <= 'z' ) |
| | 0 | 415 | | || ( c >= 'A' && c <= 'Z' ) |
| | 0 | 416 | | || c == '_' |
| | 0 | 417 | | || c > 127; |
| | | 418 | | } |
| | | 419 | | |
| | | 420 | | // Checks whether character is valid as second or later character of dollar quote tag |
| | | 421 | | private static Boolean IsDollarQuoteTagContinuationCharacter( Char c ) |
| | | 422 | | { |
| | 0 | 423 | | return ( c >= 'a' && c <= 'z' ) |
| | 0 | 424 | | || ( c >= 'A' && c <= 'Z' ) |
| | 0 | 425 | | || c == '_' |
| | 0 | 426 | | || ( c >= '0' && c <= '9' ) |
| | 0 | 427 | | || c > 127; |
| | | 428 | | |
| | | 429 | | } |
| | | 430 | | |
| | | 431 | | // auxArrayIndex = index of last set character in auxArray |
| | | 432 | | internal static Boolean CheckForCircularlyFilledArray( Char[] referenceDataArray, Int32 refLen, Char[] auxArray, I |
| | | 433 | | { |
| | 0 | 434 | | var min = auxArrayIndex + 1 - refLen; |
| | 0 | 435 | | var i = refLen - 1; |
| | 0 | 436 | | if ( min >= 0 ) |
| | | 437 | | { |
| | | 438 | | // Enough to check that last auxLen chars are same (do check backwards) |
| | 0 | 439 | | for ( var j = auxArrayIndex; i >= 0; --i, --j ) |
| | | 440 | | { |
| | 0 | 441 | | if ( referenceDataArray[i] != auxArray[j] ) |
| | | 442 | | { |
| | 0 | 443 | | return false; |
| | | 444 | | } |
| | | 445 | | } |
| | 0 | 446 | | } |
| | | 447 | | else |
| | | 448 | | { |
| | 0 | 449 | | var j = auxArrayIndex; |
| | 0 | 450 | | for ( ; j >= 0; --j, --i ) |
| | | 451 | | { |
| | 0 | 452 | | if ( referenceDataArray[i] != auxArray[j] ) |
| | | 453 | | { |
| | 0 | 454 | | return false; |
| | | 455 | | } |
| | | 456 | | } |
| | | 457 | | |
| | 0 | 458 | | for ( j = auxArray.Length - 1; i >= 0; --i, --j ) |
| | | 459 | | { |
| | 0 | 460 | | if ( referenceDataArray[i] != auxArray[j] ) |
| | | 461 | | { |
| | 0 | 462 | | return false; |
| | | 463 | | } |
| | | 464 | | } |
| | | 465 | | } |
| | | 466 | | |
| | 0 | 467 | | return true; |
| | | 468 | | } |
| | | 469 | | } |
| | | 470 | | } |
| | | 471 | | |
| | | 472 | | public static partial class E_CBAM |
| | | 473 | | { |
| | | 474 | | internal static Boolean IsOfValue( this Char? nullable, Char value ) |
| | | 475 | | { |
| | | 476 | | return nullable.HasValue && nullable.Value == value; |
| | | 477 | | } |
| | | 478 | | } |