From 57a0c1cc89b7979eccf59ec7a48b9d40adc5c4f7 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 7 Feb 2025 13:00:56 +0000 Subject: [PATCH 1/2] Cleanup type conversions in java_bytecode_parsert::read We can safely read signed values and don't need to defer the type conversion to the call site. --- .../java_bytecode/java_bytecode_parser.cpp | 44 ++++++++++++------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_parser.cpp b/jbmc/src/java_bytecode/java_bytecode_parser.cpp index e036b06bdc4..c1f5a383aac 100644 --- a/jbmc/src/java_bytecode/java_bytecode_parser.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_parser.cpp @@ -132,9 +132,19 @@ class java_bytecode_parsert final : public parsert T read() { static_assert( - std::is_unsigned::value, "T should be an unsigned integer"); + std::is_unsigned::value || std::is_signed::value, + "T should be a signed or unsigned integer"); const constexpr size_t bytes = sizeof(T); - u8 result = 0; + if(bytes == 1) + { + if(!*in) + { + log.error() << "unexpected end of bytecode file" << messaget::eom; + throw 0; + } + return static_cast(in->get()); + } + T result = 0; for(size_t i = 0; i < bytes; i++) { if(!*in) @@ -145,7 +155,7 @@ class java_bytecode_parsert final : public parsert result <<= 8u; result |= static_cast(in->get()); } - return narrow_cast(result); + return result; } void store_unknown_method_handle(size_t bootstrap_method_index); @@ -700,7 +710,7 @@ void java_bytecode_parsert::rconstant_pool() std::string s; s.resize(bytes); for(auto &ch : s) - ch = read(); + ch = read(); it->s = s; // Add to string table } break; @@ -942,7 +952,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) case 'b': // a signed byte { - const s1 c = read(); + const s1 c = read(); instruction.args.push_back(from_integer(c, signedbv_typet(8))); } address+=1; @@ -950,7 +960,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) case 'o': // two byte branch offset, signed { - const s2 offset = read(); + const s2 offset = read(); // By converting the signed offset into an absolute address (by adding // the current address) the number represented becomes unsigned. instruction.args.push_back( @@ -961,7 +971,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) case 'O': // four byte branch offset, signed { - const s4 offset = read(); + const s4 offset = read(); // By converting the signed offset into an absolute address (by adding // the current address) the number represented becomes unsigned. instruction.args.push_back( @@ -994,7 +1004,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) { const u2 v = read(); instruction.args.push_back(from_integer(v, unsignedbv_typet(16))); - const s2 c = read(); + const s2 c = read(); instruction.args.push_back(from_integer(c, signedbv_typet(16))); address+=4; } @@ -1002,7 +1012,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) { const u1 v = read(); instruction.args.push_back(from_integer(v, unsignedbv_typet(8))); - const s1 c = read(); + const s1 c = read(); instruction.args.push_back(from_integer(c, signedbv_typet(8))); address+=2; } @@ -1032,7 +1042,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) } // now default value - const s4 default_value = read(); + const s4 default_value = read(); // By converting the signed offset into an absolute address (by adding // the current address) the number represented becomes unsigned. instruction.args.push_back( @@ -1045,8 +1055,8 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) for(std::size_t i=0; i(); - const s4 offset = read(); + const s4 match = read(); + const s4 offset = read(); instruction.args.push_back( from_integer(match, signedbv_typet(32))); // By converting the signed offset into an absolute address (by adding @@ -1070,23 +1080,23 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) } // now default value - const s4 default_value = read(); + const s4 default_value = read(); instruction.args.push_back( from_integer(base_offset+default_value, signedbv_typet(32))); address+=4; // now low value - const s4 low_value = read(); + const s4 low_value = read(); address+=4; // now high value - const s4 high_value = read(); + const s4 high_value = read(); address+=4; // there are high-low+1 offsets, and they are signed for(s4 i=low_value; i<=high_value; i++) { - s4 offset = read(); + s4 offset = read(); instruction.args.push_back(from_integer(i, signedbv_typet(32))); // By converting the signed offset into an absolute address (by adding // the current address) the number represented becomes unsigned. @@ -1130,7 +1140,7 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) case 's': // a signed short { - const s2 s = read(); + const s2 s = read(); instruction.args.push_back(from_integer(s, signedbv_typet(16))); } address+=2; From d0e1b6957e8230f52c294e72abb596f7d100d171 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 4 Apr 2025 10:18:29 +0000 Subject: [PATCH 2/2] Whitespace formatting --- .../java_bytecode/java_bytecode_parser.cpp | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_parser.cpp b/jbmc/src/java_bytecode/java_bytecode_parser.cpp index c1f5a383aac..2fe7ccafbf7 100644 --- a/jbmc/src/java_bytecode/java_bytecode_parser.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_parser.cpp @@ -705,15 +705,15 @@ void java_bytecode_parsert::rconstant_pool() break; case CONSTANT_Utf8: - { - const u2 bytes = read(); - std::string s; - s.resize(bytes); - for(auto &ch : s) - ch = read(); - it->s = s; // Add to string table - } + { + const u2 bytes = read(); + std::string s; + s.resize(bytes); + for(auto &ch : s) + ch = read(); + it->s = s; // Add to string table break; + } case CONSTANT_MethodHandle: it->ref1 = read(); @@ -951,34 +951,34 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) break; case 'b': // a signed byte - { - const s1 c = read(); - instruction.args.push_back(from_integer(c, signedbv_typet(8))); - } + { + const s1 c = read(); + instruction.args.push_back(from_integer(c, signedbv_typet(8))); address+=1; break; + } case 'o': // two byte branch offset, signed - { - const s2 offset = read(); - // By converting the signed offset into an absolute address (by adding - // the current address) the number represented becomes unsigned. - instruction.args.push_back( - from_integer(address+offset, unsignedbv_typet(16))); - } + { + const s2 offset = read(); + // By converting the signed offset into an absolute address (by adding + // the current address) the number represented becomes unsigned. + instruction.args.push_back( + from_integer(address + offset, unsignedbv_typet(16))); address+=2; break; + } case 'O': // four byte branch offset, signed - { - const s4 offset = read(); - // By converting the signed offset into an absolute address (by adding - // the current address) the number represented becomes unsigned. - instruction.args.push_back( - from_integer(address+offset, unsignedbv_typet(32))); - } + { + const s4 offset = read(); + // By converting the signed offset into an absolute address (by adding + // the current address) the number represented becomes unsigned. + instruction.args.push_back( + from_integer(address + offset, unsignedbv_typet(32))); address+=4; break; + } case 'v': // local variable index (one byte) { @@ -1140,8 +1140,8 @@ void java_bytecode_parsert::rbytecode(std::vector &instructions) case 's': // a signed short { - const s2 s = read(); - instruction.args.push_back(from_integer(s, signedbv_typet(16))); + const s2 s = read(); + instruction.args.push_back(from_integer(s, signedbv_typet(16))); } address+=2; break;