Signed-off-by: Nikolay Shustov <Nikolay.Shustov@xxxxxxxxx>
---
git p4 fix for failure to decode p4 errors
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr- git-1926%2Fnshustov%2Fgit-p4-error-decoding-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr- git-1926/nshustov/git-p4-error-decoding-v1
Pull-Request: https://github.com/git/git/pull/1926
git-p4.py | 135 ++++++++++++++++++-------------
t/meson.build | 1 +
t/t9837-git-p4-error-encoding.sh | 53 ++++++++++++
t/t9837/git-p4-error-python3.py | 15 ++++
4 files changed, 149 insertions(+), 55 deletions(-)
create mode 100755 t/t9837-git-p4-error-encoding.sh
create mode 100644 t/t9837/git-p4-error-python3.py
diff --git a/git-p4.py b/git-p4.py
index c0ca7becaf4..72a4c55f99e 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -234,67 +234,91 @@ else:
class MetadataDecodingException(Exception):
- def __init__(self, input_string):
+ def __init__(self, input_string, error=None):
self.input_string = input_string
+ self.error = error
def __str__(self):
- return """Decoding perforce metadata failed!
+ message = """Decoding perforce metadata failed!
The failing string was:
---
{}
---
Consider setting the git-p4.metadataDecodingStrategy config option to
'fallback', to allow metadata to be decoded using a fallback encoding,
-defaulting to cp1252.""".format(self.input_string)
+defaulting to cp1252."""
+ if verbose and self.error is not None:
+ message += """
+---
+Error:
+---
+{}"""
+ return message.format(self.input_string, self.error)
-encoding_fallback_warning_issued = False
-encoding_escape_warning_issued = False
-def metadata_stream_to_writable_bytes(s):
- encodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or defaultMetadataDecodingStrategy
- fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or defaultFallbackMetadataEncoding
- if not isinstance(s, bytes):
- return s.encode('utf_8')
- if encodingStrategy == 'passthrough':
- return s
- try:
- s.decode('utf_8')
- return s
- except UnicodeDecodeError:
- if encodingStrategy == 'fallback' and fallbackEncoding:
- global encoding_fallback_warning_issued
- global encoding_escape_warning_issued
- try:
- if not encoding_fallback_warning_issued:
- print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding, s))
- print("\n(this warning is only displayed once during an import)")
- encoding_fallback_warning_issued = True
- return s.decode(fallbackEncoding).encode('utf_8')
- except Exception as exc:
- if not encoding_escape_warning_issued:
- print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding, s))
- print("\n(this warning is only displayed once during an import)")
- encoding_escape_warning_issued = True
- escaped_bytes = b''
- # bytes and strings work very differently in python2 vs python3...
- if str is bytes:
- for byte in s:
- byte_number = struct.unpack('>B', byte)[0]
- if byte_number > 127:
- escaped_bytes += b'%'
- escaped_bytes += hex(byte_number) [2:].upper()
- else:
- escaped_bytes += byte
- else:
- for byte_number in s:
- if byte_number > 127:
- escaped_bytes += b'%'
- escaped_bytes += hex(byte_number).upper().encode()[2:]
- else:
- escaped_bytes += bytes([byte_number])
- return escaped_bytes
+class MetadataTranscoder:
+ def __init__(self, default_metadata_decoding_strategy, default_fallback_metadata_encoding):
+ self.decoding_fallback_warning_issued = False
+ self.decoding_escape_warning_issued = False
+ self.decodingStrategy = gitConfig('git- p4.metadataDecodingStrategy') or default_metadata_decoding_strategy
+ self.fallbackEncoding = gitConfig('git- p4.metadataFallbackEncoding') or default_fallback_metadata_encoding
+
+ def decode_metadata(self, s, error_from_fallback=True):
+ try:
+ return [s.decode('utf_8'), 'utf_8']
+ except UnicodeDecodeError as decode_exception:
+ error = decode_exception
+ if self.decodingStrategy == 'fallback' and self.fallbackEncoding:
+ try:
+ if not self.decoding_fallback_warning_issued:
+ print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (self.fallbackEncoding, s))
+ print("\n(this warning is only displayed once during an import)")
+ self.decoding_fallback_warning_issued = True
+ return [s.decode(self.fallbackEncoding), self.fallbackEncoding]
+ except Exception as decode_exception:
+ if not error_from_fallback:
+ return [s, None]
+ error = decode_exception
+ raise MetadataDecodingException(s, error)
+
+ def metadata_stream_to_writable_bytes(self, s):
+ if not isinstance(s, bytes):
+ return s.encode('utf_8')
+ if self.decodingStrategy == 'passthrough':
+ return s
+
+ [text, encoding] = self.decode_metadata(s, False)
+ if encoding == 'utf_8':
+ # s is of utf-8 already
+ return s
+
+ if encoding is None:
+ # could not decode s, even with fallback encoding
+ if not self.decoding_escape_warning_issued:
+ print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (self.fallbackEncoding, s))
+ print("\n(this warning is only displayed once during an import)")
+ self.decoding_escape_warning_issued = True
+ escaped_bytes = b''
+ # bytes and strings work very differently in python2 vs python3...
+ if str is bytes:
+ for byte in s:
+ byte_number = struct.unpack('>B', byte)[0]
+ if byte_number > 127:
+ escaped_bytes += b'%'
+ escaped_bytes += hex(byte_number)[2:].upper()
+ else:
+ escaped_bytes += byte
+ else:
+ for byte_number in s:
+ if byte_number > 127:
+ escaped_bytes += b'%'
+ escaped_bytes += hex(byte_number).upper().encode()[2:]
+ else:
+ escaped_bytes += bytes([byte_number])
+ return escaped_bytes
- raise MetadataDecodingException(s)
+ # were able to decode but not to utf-8
+ return text.encode('utf_8')
def decode_path(path):
@@ -898,14 +922,14 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
decoded_entry[key] = value
# Parse out data if it's an error response
if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
- decoded_entry['data'] = decoded_entry['data'].decode()
+ decoded_entry['data'] = metadataTranscoder.decode_metadata(decoded_entry['data'])
entry = decoded_entry
if skip_info:
if 'code' in entry and entry['code'] == 'info':
continue
for key in p4KeysContainingNonUtf8Chars():
if key in entry:
- entry[key] = metadata_stream_to_writable_bytes(entry[key])
+ entry[key] = metadataTranscoder.metadata_stream_to_writable_bytes(entry[key])
if cb is not None:
cb(entry)
else:
@@ -1718,7 +1742,7 @@ class P4UserMap:
# python2 or python3. To support
# git-p4.metadataDecodingStrategy=fallback, self.users dict values
# are always bytes, ready to be written to git.
- emailbytes = metadata_stream_to_writable_bytes(output["Email"])
+ emailbytes = metadataTranscoder.metadata_stream_to_writable_bytes(output["Email"])
self.users[output["User"]] = output["FullName"] + b" <" + emailbytes + b">"
self.emails[output["Email"]] = output["User"]
@@ -1730,12 +1754,12 @@ class P4UserMap:
fullname = mapUser[0][1]
email = mapUser[0][2]
fulluser = fullname + " <" + email + ">"
- self.users[user] = metadata_stream_to_writable_bytes(fulluser)
+ self.users[user] = metadataTranscoder.metadata_stream_to_writable_bytes(fulluser)
self.emails[email] = user
s = b''
for (key, val) in self.users.items():
- keybytes = metadata_stream_to_writable_bytes(key)
+ keybytes = metadataTranscoder.metadata_stream_to_writable_bytes(key)
s += b"%s\t%s\n" % (keybytes.expandtabs(1), val.expandtabs(1))
open(self.getUserCacheFilename(), 'wb').write(s)
@@ -3349,7 +3373,7 @@ class P4Sync(Command, P4UserMap):
if userid in self.users:
return self.users[userid]
else:
- userid_bytes = metadata_stream_to_writable_bytes(userid)
+ userid_bytes = metadataTranscoder.metadata_stream_to_writable_bytes(userid)
return b"%s <a@b>" % userid_bytes
def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
@@ -4561,6 +4585,7 @@ commands = {
"unshelve": P4Unshelve,
}
+metadataTranscoder = MetadataTranscoder(defaultMetadataDecodingStrategy, defaultFallbackMetadataEncoding)
def main():
if len(sys.argv[1:]) == 0:
diff --git a/t/meson.build b/t/meson.build
index a59da26be3f..656424fdff3 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -1090,6 +1090,7 @@ integration_tests = [
't9834-git-p4-file-dir-bug.sh',
't9835-git-p4-metadata-encoding-python2.sh',
't9836-git-p4-metadata-encoding-python3.sh',
+ 't9837-git-p4-error-encoding.sh',
't9850-shell.sh',
't9901-git-web--browse.sh',
't9902-completion.sh',
diff --git a/t/t9837-git-p4-error-encoding.sh b/t/t9837-git-p4-error- encoding.sh
new file mode 100755
index 00000000000..1ea774afb1b
--- /dev/null
+++ b/t/t9837-git-p4-error-encoding.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+test_description='git p4 error encoding
+
+This test checks that the import process handles inconsistent text
+encoding in p4 error messages without failing'
+
+. ./lib-git-p4.sh
+
+###############################
+## SECTION REPEATED IN t9835 ##
+###############################
+
+# These tests require Perforce with non-unicode setup.
+out=$(2>&1 P4CHARSET=utf8 p4 client -o)
+if test $? -eq 0
+then
+ skip_all="skipping git p4 error encoding tests; Perforce is setup with unicode"
+ test_done
+fi
+
+# These tests are specific to Python 3. Write a custom script that executes
+# git-p4 directly with the Python 3 interpreter to ensure that we use that
+# version even if Git was compiled with Python 2.
+python_target_binary=$(which python3)
+if test -n "$python_target_binary"
+then
+ mkdir temp_python
+ PATH="$(pwd)/temp_python:$PATH"
+ export PATH
+
+ write_script temp_python/git-p4-python3 <<-EOF
+ exec "$python_target_binary" "$(git --exec-path)/git-p4" "\$@"
+ EOF
+fi
+
+git p4-python3 >err
+if ! grep 'valid commands' err
+then
+ skip_all="skipping python3 git p4 tests; python3 not available"
+ test_done
+fi
+
+test_expect_success 'start p4d' '
+ start_p4d
+'
+
+test_expect_success 'see if Perforce error with characters not convertable to utf-8 will be processed correctly' '
+ test_when_finished cleanup_git &&
+ $python_target_binary "$TEST_DIRECTORY"/t9837/git-p4-error- python3.py "$TEST_DIRECTORY"
+'
+
+test_done
diff --git a/t/t9837/git-p4-error-python3.py b/t/t9837/git-p4-error- python3.py
new file mode 100644
index 00000000000..fb65aee386e
--- /dev/null
+++ b/t/t9837/git-p4-error-python3.py
@@ -0,0 +1,15 @@
+import os
+import sys
+from importlib.machinery import SourceFileLoader
+
+def main():
+ if len(sys.argv[1:]) != 1:
+ print("Expected test directory name")
+
+ gitp4_path = sys.argv[1] + "/../git-p4.py"
+ gitp4 = SourceFileLoader("gitp4", gitp4_path).load_module()
+ gitp4.p4CmdList(["edit", b'\xFEfile'])
+
+if __name__ == '__main__':
+ main()
+
base-commit: 683c54c999c301c2cd6f715c411407c413b1d84e