QString.cpp is pretty big, it' about 8000 to 9000 line of code, an introductionary description of it is:
431 \class QString
432 \reentrant
433
434 \brief The QString class provides a Unicode character string.
435
436 \ingroup tools
437 \ingroup shared
438 \ingroup string-processing
439
440 QString stores a string of 16-bit \l{QChar}s, where each QChar
441 corresponds one Unicode 4.0 character. (Unicode characters
442 with code values above 65535 are stored using surrogate pairs,
443 i.e., two consecutive \l{QChar}s.)
444
445 \l{Unicode} is an international standard that supports most of the
446 writing systems in use today. It is a superset of US-ASCII (ANSI
447 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
448 characters are available at the same code positions.
449
450 Behind the scenes, QString uses \l{implicit sharing}
451 (copy-on-write) to reduce memory usage and to avoid the needless
452 copying of data. This also helps reduce the inherent overhead of
453 storing 16-bit characters instead of 8-bit characters.
454
455 In addition to QString, Qt also provides the QByteArray class to
456 store raw bytes and traditional 8-bit '\\0'-terminated strings.
457 For most purposes, QString is the class you want to use. It is
458 used throughout the Qt API, and the Unicode support ensures that
459 your applications will be easy to translate if you want to expand
460 your application's market at some point. The two main cases where
461 QByteArray is appropriate are when you need to store raw binary
462 data, and when memory conservation is critical (e.g., with
463 \l{Qt for Embedded Linux}).
(Above it was also a QCharRef class, which it might or might not use
)
So from an exploit point of view, this is pretty good news.
Unicode is still somewhat experimental/complex and always under development/expanding........ the file/class is pretty big... so a high likely hood of some remaining bugs here and there...
Now the question is where are the bugs and did we get lucky for the .arg stuff ?!
=D
(Also the link/web-based approach takes a minute or so to render it in html/ie9 so some patience is needed
)
Things are starting to get a bit fuzzy, since I am not sure which exact version of arg function is being called so it's starting to seem that perhaps real world execution/tracing might be necessary to know for sure.
But if I had to make a bet, I would place my bets on the finally arg function and routines which it calls.
To me it seems highly likely that there is at least some kind of bug in these routines which causes %n to be displayed instead of something else like a number or so:
6374 struct ArgEscapeData
6375 {
6376 int min_escape; // lowest escape sequence number
6377 int occurrences; // number of occurrences of the lowest escape sequence number
6378 int locale_occurrences; // number of occurrences of the lowest escape sequence number that
6379 // contain 'L'
6380 int escape_len; // total length of escape sequences which will be replaced
6381 };
6382
6383 static ArgEscapeData findArgEscapes(const QString &s)
6384 {
6385 const QChar *uc_begin = s.unicode();
6386 const QChar *uc_end = uc_begin + s.length();
6387
6388 ArgEscapeData d;
6389
6390 d.min_escape = INT_MAX;
6391 d.occurrences = 0;
6392 d.escape_len = 0;
6393 d.locale_occurrences = 0;
6394
6395 const QChar *c = uc_begin;
6396 while (c != uc_end) {
6397 while (c != uc_end && c->unicode() != '%')
6398 ++c;
6399
6400 if (c == uc_end)
6401 break;
6402 const QChar *escape_start = c;
6403 if (++c == uc_end)
6404 break;
6405
6406 bool locale_arg = false;
6407 if (c->unicode() == 'L') {
6408 locale_arg = true;
6409 if (++c == uc_end)
6410 break;
6411 }
6412
6413 if (c->digitValue() == -1)
6414 continue;
6415
6416 int escape = c->digitValue();
6417 ++c;
6418
6419 if (c != uc_end && c->digitValue() != -1) {
6420 escape = (10 * escape) + c->digitValue();
6421 ++c;
6422 }
6423
6424 if (escape > d.min_escape)
6425 continue;
6426
6427 if (escape < d.min_escape) {
6428 d.min_escape = escape;
6429 d.occurrences = 0;
6430 d.escape_len = 0;
6431 d.locale_occurrences = 0;
6432 }
6433
6434 ++d.occurrences;
6435 if (locale_arg)
6436 ++d.locale_occurrences;
6437 d.escape_len += c - escape_start;
6438 }
6439 return d;
6440 }
6441
6442 static QString replaceArgEscapes(const QString &s, const ArgEscapeData &d, int field_width,
6443 const QString &arg, const QString &larg, const QChar &fillChar = QLatin1Char(' '))
6444 {
6445 const QChar *uc_begin = s.unicode();
6446 const QChar *uc_end = uc_begin + s.length();
6447
6448 int abs_field_width = qAbs(field_width);
6449 int result_len = s.length()
6450 - d.escape_len
6451 + (d.occurrences - d.locale_occurrences)
6452 *qMax(abs_field_width, arg.length())
6453 + d.locale_occurrences
6454 *qMax(abs_field_width, larg.length());
6455
6456 QString result(result_len, Qt::Uninitialized);
6457 QChar *result_buff = (QChar*) result.unicode();
6458
6459 QChar *rc = result_buff;
6460 const QChar *c = uc_begin;
6461 int repl_cnt = 0;
6462 while (c != uc_end) {
6463 /* We don't have to check if we run off the end of the string with c,
6464 because as long as d.occurrences > 0 we KNOW there are valid escape
6465 sequences. */
6466
6467 const QChar *text_start = c;
6468
6469 while (c->unicode() != '%')
6470 ++c;
6471
6472 const QChar *escape_start = c++;
6473
6474 bool locale_arg = false;
6475 if (c->unicode() == 'L') {
6476 locale_arg = true;
6477 ++c;
6478 }
6479
6480 int escape = c->digitValue();
6481 if (escape != -1) {
6482 if (c + 1 != uc_end && (c + 1)->digitValue() != -1) {
6483 escape = (10 * escape) + (c + 1)->digitValue();
6484 ++c;
6485 }
6486 }
6487
6488 if (escape != d.min_escape) {
6489 memcpy(rc, text_start, (c - text_start)*sizeof(QChar));
6490 rc += c - text_start;
6491 }
6492 else {
6493 ++c;
6494
6495 memcpy(rc, text_start, (escape_start - text_start)*sizeof(QChar));
6496 rc += escape_start - text_start;
6497
6498 uint pad_chars;
6499 if (locale_arg)
6500 pad_chars = qMax(abs_field_width, larg.length()) - larg.length();
6501 else
6502 pad_chars = qMax(abs_field_width, arg.length()) - arg.length();
6503
6504 if (field_width > 0) { // left padded
6505 for (uint i = 0; i < pad_chars; ++i)
6506 (rc++)->unicode() = fillChar.unicode();
6507 }
6508
6509 if (locale_arg) {
6510 memcpy(rc, larg.unicode(), larg.length()*sizeof(QChar));
6511 rc += larg.length();
6512 }
6513 else {
6514 memcpy(rc, arg.unicode(), arg.length()*sizeof(QChar));
6515 rc += arg.length();
6516 }
6517
6518 if (field_width < 0) { // right padded
6519 for (uint i = 0; i < pad_chars; ++i)
6520 (rc++)->unicode() = fillChar.unicode();
6521 }
6522
6523 if (++repl_cnt == d.occurrences) {
6524 memcpy(rc, c, (uc_end - c)*sizeof(QChar));
6525 rc += uc_end - c;
6526 Q_ASSERT(rc - result_buff == result_len);
6527 c = uc_end;
6528 }
6529 }
6530 }
6531 Q_ASSERT(rc == result_buff + result_len);
6532
6533 return result;
6534 }
6535
6536 /*!
6537 Returns a copy of this string with the lowest numbered place marker
6538 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
6539
6540 \a fieldWidth specifies the minimum amount of space that argument \a
6541 a shall occupy. If \a a requires less space than \a fieldWidth, it
6542 is padded to \a fieldWidth with character \a fillChar. A positive
6543 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
6544 produces left-aligned text.
6545
6546 This example shows how we might create a \c status string for
6547 reporting progress while processing a list of files:
6548
6549 \snippet doc/src/snippets/qstring/main.cpp 11
6550
6551 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
6552 %2. Finally, \c arg(fileName) replaces \c %3.
6553
6554 One advantage of using arg() over sprintf() is that the order of the
6555 numbered place markers can change, if the application's strings are
6556 translated into other languages, but each arg() will still replace
6557 the lowest numbered unreplaced place marker, no matter where it
6558 appears. Also, if place marker \c %i appears more than once in the
6559 string, the arg() replaces all of them.
6560
6561 If there is no unreplaced place marker remaining, a warning message
6562 is output and the result is undefined. Place marker numbers must be
6563 in the range 1 to 99.
6564 */
6565 QString QString::arg(const QString &a, int fieldWidth, const QChar &fillChar) const
6566 {
6567 ArgEscapeData d = findArgEscapes(*this);
6568
6569 if (d.occurrences == 0) {
6570 qWarning("QString::arg: Argument missing: %s, %s", toLocal8Bit().data(),
6571 a.toLocal8Bit().data());
6572 return *this;
6573 }
6574 return replaceArgEscapes(*this, d, fieldWidth, a, a, fillChar);
6575 }
Seeing these stack based buffers or perhaps heap based buffer variables, bold statements in the comments about "being sure" lol... and memcopies, and also some sanity checking asserts is going to make some exploit writers/researchers going to water-teeth
The code does indeed look somewhat complex so a high likelyness of being a bug in there
Would be interesting to know if this is indeed the code that is being executed.
So for now I guess I am going to stop my investigation until I or somebody else can actually execute the entire bitcoin+qt code and see/confirm if this is indeed the code being executed
One last possibility to consider is this piece of code in case it's a multi arg call:
6957 static int getEscape(const QChar *uc, int *pos, int len, int maxNumber = 999)
6958 {
6959 int i = *pos;
6960 ++i;
6961 if (i < len && uc
== QLatin1Char('L'))
6962 ++i;
6963 if (i < len) {
6964 int escape = uc.unicode() - '0';
6965 if (uint(escape) >= 10U)
6966 return -1;
6967 ++i;
6968 while (i < len) {
6969 int digit = uc.unicode() - '0';
6970 if (uint(digit) >= 10U)
6971 break;
6972 escape = (escape * 10) + digit;
6973 ++i;
6974 }
6975 if (escape <= maxNumber) {
6976 *pos = i;
6977 return escape;
6978 }
6979 }
6980 return -1;
6981 }
6982
6983 QString QString::multiArg(int numArgs, const QString **args) const
6984 {
6985 QString result;
6986 QMap<int, int> numbersUsed;
6987 const QChar *uc = (const QChar *) d->data;
6988 const int len = d->size;
6989 const int end = len - 1;
6990 int lastNumber = -1;
6991 int i = 0;
6992
6993 // populate the numbersUsed map with the %n's that actually occur in the string
6994 while (i < end) {
6995 if (uc == QLatin1Char('%')) {
6996 int number = getEscape(uc, &i, len);
6997 if (number != -1) {
6998 numbersUsed.insert(number, -1);
6999 continue;
7000 }
7001 }
7002 ++i;
7003 }
7004
7005 // assign an argument number to each of the %n's
7006 QMap<int, int>::iterator j = numbersUsed.begin();
7007 QMap<int, int>::iterator jend = numbersUsed.end();
7008 int arg = 0;
7009 while (j != jend && arg < numArgs) {
7010 *j = arg++;
7011 lastNumber = j.key();
7012 ++j;
7013 }
7014
7015 // sanity
7016 if (numArgs > arg) {
7017 qWarning("QString::arg: %d argument(s) missing in %s", numArgs - arg, toLocal8Bit().data());
7018 numArgs = arg;
7019 }
7020
7021 i = 0;
7022 while (i < len) {
7023 if (uc == QLatin1Char('%') && i != end) {
7024 int number = getEscape(uc, &i, len, lastNumber);
7025 int arg = numbersUsed[number];
7026 if (number != -1 && arg != -1) {
7027 result += *args[arg];
7028 continue;
7029 }
7030 }
7031 result += uc[i++];
7032 }
7033 return result;
7034 }