2014-02-10 02:10:30 +01:00
/**************************************************************************/
/* translation_loader_po.cpp */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
2018-01-05 00:50:27 +01:00
2014-02-10 02:10:30 +01:00
# include "translation_loader_po.h"
2018-09-11 18:13:45 +02:00
2021-06-11 14:51:48 +02:00
# include "core/io/file_access.h"
2020-11-07 23:33:38 +01:00
# include "core/string/translation.h"
# include "core/string/translation_po.h"
2014-02-10 02:10:30 +01:00
2022-05-03 01:43:50 +02:00
Ref < Resource > TranslationLoaderPO : : load_translation ( Ref < FileAccess > f , Error * r_error ) {
2020-05-14 16:41:43 +02:00
if ( r_error ) {
2015-08-24 01:15:56 +02:00
* r_error = ERR_FILE_CORRUPT ;
2020-05-14 16:41:43 +02:00
}
2014-02-10 02:10:30 +01:00
2020-07-14 13:16:49 +02:00
const String path = f - > get_path ( ) ;
2022-03-18 13:19:17 +01:00
Ref < TranslationPO > translation = Ref < TranslationPO > ( memnew ( TranslationPO ) ) ;
String config ;
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
uint32_t magic = f - > get_32 ( ) ;
if ( magic = = 0x950412de ) {
// Load binary MO file.
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
uint16_t version_maj = f - > get_16 ( ) ;
uint16_t version_min = f - > get_16 ( ) ;
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( version_maj > 1 , Ref < Resource > ( ) , vformat ( " Unsupported MO file %s, version %d.%d. " , path , version_maj , version_min ) ) ;
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
uint32_t num_strings = f - > get_32 ( ) ;
uint32_t id_table_offset = f - > get_32 ( ) ;
uint32_t trans_table_offset = f - > get_32 ( ) ;
// Read string tables.
for ( uint32_t i = 0 ; i < num_strings ; i + + ) {
String msg_id ;
String msg_id_plural ;
String msg_context ;
// Read id strings and context.
{
Vector < uint8_t > data ;
f - > seek ( id_table_offset + i * 8 ) ;
uint32_t str_start = 0 ;
uint32_t str_len = f - > get_32 ( ) ;
uint32_t str_offset = f - > get_32 ( ) ;
data . resize ( str_len + 1 ) ;
f - > seek ( str_offset ) ;
f - > get_buffer ( data . ptrw ( ) , str_len ) ;
data . write [ str_len ] = 0 ;
bool is_plural = false ;
for ( uint32_t j = 0 ; j < str_len + 1 ; j + + ) {
if ( data [ j ] = = 0x04 ) {
msg_context . parse_utf8 ( ( const char * ) data . ptr ( ) , j ) ;
str_start = j + 1 ;
}
if ( data [ j ] = = 0x00 ) {
if ( is_plural ) {
msg_id_plural . parse_utf8 ( ( const char * ) ( data . ptr ( ) + str_start ) , j - str_start ) ;
} else {
msg_id . parse_utf8 ( ( const char * ) ( data . ptr ( ) + str_start ) , j - str_start ) ;
is_plural = true ;
}
str_start = j + 1 ;
}
}
2020-07-16 10:52:06 +02:00
}
2022-03-18 13:19:17 +01:00
// Read translated strings.
{
Vector < uint8_t > data ;
f - > seek ( trans_table_offset + i * 8 ) ;
uint32_t str_len = f - > get_32 ( ) ;
uint32_t str_offset = f - > get_32 ( ) ;
data . resize ( str_len + 1 ) ;
f - > seek ( str_offset ) ;
f - > get_buffer ( data . ptrw ( ) , str_len ) ;
data . write [ str_len ] = 0 ;
if ( msg_id . is_empty ( ) ) {
config = String : : utf8 ( ( const char * ) data . ptr ( ) , str_len ) ;
// Record plural rule.
int p_start = config . find ( " Plural-Forms " ) ;
if ( p_start ! = - 1 ) {
int p_end = config . find ( " \n " , p_start ) ;
translation - > set_plural_rule ( config . substr ( p_start , p_end - p_start ) ) ;
}
} else {
2022-04-05 12:40:26 +02:00
uint32_t str_start = 0 ;
2022-03-18 13:19:17 +01:00
Vector < String > plural_msg ;
for ( uint32_t j = 0 ; j < str_len + 1 ; j + + ) {
if ( data [ j ] = = 0x00 ) {
if ( msg_id_plural . is_empty ( ) ) {
translation - > add_message ( msg_id , String : : utf8 ( ( const char * ) ( data . ptr ( ) + str_start ) , j - str_start ) , msg_context ) ;
} else {
plural_msg . push_back ( String : : utf8 ( ( const char * ) ( data . ptr ( ) + str_start ) , j - str_start ) ) ;
}
str_start = j + 1 ;
}
}
if ( ! plural_msg . is_empty ( ) ) {
translation - > add_plural_message ( msg_id , plural_msg , msg_context ) ;
2020-07-16 10:52:06 +02:00
}
}
}
}
2022-03-18 13:19:17 +01:00
} else {
// Try to load as text PO file.
f - > seek ( 0 ) ;
enum Status {
STATUS_NONE ,
STATUS_READING_ID ,
STATUS_READING_STRING ,
STATUS_READING_CONTEXT ,
STATUS_READING_PLURAL ,
} ;
Status status = STATUS_NONE ;
String msg_id ;
String msg_str ;
String msg_context ;
Vector < String > msgs_plural ;
if ( r_error ) {
* r_error = ERR_FILE_CORRUPT ;
}
int line = 1 ;
int plural_forms = 0 ;
int plural_index = - 1 ;
bool entered_context = false ;
bool skip_this = false ;
bool skip_next = false ;
bool is_eof = false ;
while ( ! is_eof ) {
String l = f - > get_line ( ) . strip_edges ( ) ;
is_eof = f - > eof_reached ( ) ;
// If we reached last line and it's not a content line, break, otherwise let processing that last loop
if ( is_eof & & l . is_empty ( ) ) {
if ( status = = STATUS_READING_ID | | status = = STATUS_READING_CONTEXT | | ( status = = STATUS_READING_PLURAL & & plural_index ! = plural_forms - 1 ) ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_V_MSG ( Ref < Resource > ( ) , " Unexpected EOF while reading PO file at: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
} else {
break ;
}
2014-02-10 02:10:30 +01:00
}
2022-03-18 13:19:17 +01:00
if ( l . begins_with ( " msgctxt " ) ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( status ! = STATUS_READING_STRING & & status ! = STATUS_READING_PLURAL , Ref < Resource > ( ) , " Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
// In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
// and set "entered_context" to true to prevent adding twice.
if ( ! skip_this & & ! msg_id . is_empty ( ) ) {
2020-07-16 10:52:06 +02:00
if ( status = = STATUS_READING_STRING ) {
translation - > add_message ( msg_id , msg_str , msg_context ) ;
} else if ( status = = STATUS_READING_PLURAL ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( plural_index ! = plural_forms - 1 , Ref < Resource > ( ) , " Number of 'msgstr[]' doesn't match with number of plural forms: " + path + " : " + itos ( line ) ) ;
2020-07-16 10:52:06 +02:00
translation - > add_plural_message ( msg_id , msgs_plural , msg_context ) ;
}
2020-05-14 16:41:43 +02:00
}
2022-03-18 13:19:17 +01:00
msg_context = " " ;
l = l . substr ( 7 , l . length ( ) ) . strip_edges ( ) ;
status = STATUS_READING_CONTEXT ;
entered_context = true ;
}
if ( l . begins_with ( " msgid_plural " ) ) {
if ( plural_forms = = 0 ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_V_MSG ( Ref < Resource > ( ) , " PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
} else if ( status ! = STATUS_READING_ID ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_V_MSG ( Ref < Resource > ( ) , " Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
}
// We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already.
// We just have to reset variables related to plurals for "msgstr[]" later on.
l = l . substr ( 12 , l . length ( ) ) . strip_edges ( ) ;
plural_index = - 1 ;
msgs_plural . clear ( ) ;
msgs_plural . resize ( plural_forms ) ;
status = STATUS_READING_PLURAL ;
} else if ( l . begins_with ( " msgid " ) ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( status = = STATUS_READING_ID , Ref < Resource > ( ) , " Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
if ( ! msg_id . is_empty ( ) ) {
if ( ! skip_this & & ! entered_context ) {
if ( status = = STATUS_READING_STRING ) {
translation - > add_message ( msg_id , msg_str , msg_context ) ;
} else if ( status = = STATUS_READING_PLURAL ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( plural_index ! = plural_forms - 1 , Ref < Resource > ( ) , " Number of 'msgstr[]' doesn't match with number of plural forms: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
translation - > add_plural_message ( msg_id , msgs_plural , msg_context ) ;
}
}
} else if ( config . is_empty ( ) ) {
config = msg_str ;
// Record plural rule.
int p_start = config . find ( " Plural-Forms " ) ;
if ( p_start ! = - 1 ) {
int p_end = config . find ( " \n " , p_start ) ;
translation - > set_plural_rule ( config . substr ( p_start , p_end - p_start ) ) ;
plural_forms = translation - > get_plural_forms ( ) ;
}
}
l = l . substr ( 5 , l . length ( ) ) . strip_edges ( ) ;
status = STATUS_READING_ID ;
// If we did not encounter msgctxt, we reset context to empty to reset it.
if ( ! entered_context ) {
msg_context = " " ;
}
msg_id = " " ;
msg_str = " " ;
skip_this = skip_next ;
skip_next = false ;
entered_context = false ;
2020-05-14 16:41:43 +02:00
}
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
if ( l . begins_with ( " msgstr[ " ) ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( status ! = STATUS_READING_PLURAL , Ref < Resource > ( ) , " Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
plural_index + + ; // Increment to add to the next slot in vector msgs_plural.
l = l . substr ( 9 , l . length ( ) ) . strip_edges ( ) ;
} else if ( l . begins_with ( " msgstr " ) ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( status ! = STATUS_READING_ID , Ref < Resource > ( ) , " Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
l = l . substr ( 6 , l . length ( ) ) . strip_edges ( ) ;
status = STATUS_READING_STRING ;
2020-07-16 10:52:06 +02:00
}
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
if ( l . is_empty ( ) | | l . begins_with ( " # " ) ) {
if ( l . contains ( " fuzzy " ) ) {
skip_next = true ;
}
line + + ;
continue ; // Nothing to read or comment.
2020-07-16 10:52:06 +02:00
}
2022-03-18 13:19:17 +01:00
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( ! l . begins_with ( " \" " ) | | status = = STATUS_NONE , Ref < Resource > ( ) , " Invalid line ' " + l + " ' while parsing: " + path + " : " + itos ( line ) ) ;
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
l = l . substr ( 1 , l . length ( ) ) ;
// Find final quote, ignoring escaped ones (\").
// The escape_next logic is necessary to properly parse things like \\"
// where the backslash is the one being escaped, not the quote.
int end_pos = - 1 ;
bool escape_next = false ;
for ( int i = 0 ; i < l . length ( ) ; i + + ) {
if ( l [ i ] = = ' \\ ' & & ! escape_next ) {
escape_next = true ;
continue ;
}
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
if ( l [ i ] = = ' " ' & & ! escape_next ) {
end_pos = i ;
break ;
}
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
escape_next = false ;
2020-03-20 08:47:43 +01:00
}
2014-02-10 02:10:30 +01:00
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( end_pos = = - 1 , Ref < Resource > ( ) , " Expected ' \" ' at end of message while parsing: " + path + " : " + itos ( line ) ) ;
2020-03-20 08:47:43 +01:00
2022-03-18 13:19:17 +01:00
l = l . substr ( 0 , end_pos ) ;
l = l . c_unescape ( ) ;
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
if ( status = = STATUS_READING_ID ) {
msg_id + = l ;
} else if ( status = = STATUS_READING_STRING ) {
msg_str + = l ;
} else if ( status = = STATUS_READING_CONTEXT ) {
msg_context + = l ;
} else if ( status = = STATUS_READING_PLURAL & & plural_index > = 0 ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( plural_index > = plural_forms , Ref < Resource > ( ) , " Unexpected plural form while parsing: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
msgs_plural . write [ plural_index ] = msgs_plural [ plural_index ] + l ;
}
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
line + + ;
2020-05-14 16:41:43 +02:00
}
2014-02-10 02:10:30 +01:00
2022-03-18 13:19:17 +01:00
// Add the last set of data from last iteration.
if ( status = = STATUS_READING_STRING ) {
if ( ! msg_id . is_empty ( ) ) {
if ( ! skip_this ) {
translation - > add_message ( msg_id , msg_str , msg_context ) ;
}
} else if ( config . is_empty ( ) ) {
config = msg_str ;
2020-05-14 16:41:43 +02:00
}
2022-03-18 13:19:17 +01:00
} else if ( status = = STATUS_READING_PLURAL ) {
if ( ! skip_this & & ! msg_id . is_empty ( ) ) {
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( plural_index ! = plural_forms - 1 , Ref < Resource > ( ) , " Number of 'msgstr[]' doesn't match with number of plural forms: " + path + " : " + itos ( line ) ) ;
2022-03-18 13:19:17 +01:00
translation - > add_plural_message ( msg_id , msgs_plural , msg_context ) ;
2020-07-16 10:52:06 +02:00
}
}
2018-07-24 13:32:37 +02:00
}
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( config . is_empty ( ) , Ref < Resource > ( ) , " No config found in file: " + path + " . " ) ;
2014-02-10 02:10:30 +01:00
Vector < String > configs = config . split ( " \n " ) ;
for ( int i = 0 ; i < configs . size ( ) ; i + + ) {
String c = configs [ i ] . strip_edges ( ) ;
int p = c . find ( " : " ) ;
2020-05-14 16:41:43 +02:00
if ( p = = - 1 ) {
2014-02-10 02:10:30 +01:00
continue ;
2020-05-14 16:41:43 +02:00
}
2014-02-10 02:10:30 +01:00
String prop = c . substr ( 0 , p ) . strip_edges ( ) ;
String value = c . substr ( p + 1 , c . length ( ) ) . strip_edges ( ) ;
2018-05-28 05:21:05 +02:00
if ( prop = = " X-Language " | | prop = = " Language " ) {
2014-02-10 02:10:30 +01:00
translation - > set_locale ( value ) ;
}
}
2020-05-14 16:41:43 +02:00
if ( r_error ) {
2015-08-24 01:15:56 +02:00
* r_error = OK ;
2020-05-14 16:41:43 +02:00
}
2014-02-10 02:10:30 +01:00
return translation ;
2016-05-28 00:58:28 +02:00
}
2022-05-03 01:43:50 +02:00
Ref < Resource > TranslationLoaderPO : : load ( const String & p_path , const String & p_original_path , Error * r_error , bool p_use_sub_threads , float * r_progress , CacheMode p_cache_mode ) {
2020-05-14 16:41:43 +02:00
if ( r_error ) {
2016-05-28 00:58:28 +02:00
* r_error = ERR_CANT_OPEN ;
2020-05-14 16:41:43 +02:00
}
2016-05-28 00:58:28 +02:00
2022-03-23 10:08:58 +01:00
Ref < FileAccess > f = FileAccess : : open ( p_path , FileAccess : : READ ) ;
2022-05-03 01:43:50 +02:00
ERR_FAIL_COND_V_MSG ( f . is_null ( ) , Ref < Resource > ( ) , " Cannot open file ' " + p_path + " '. " ) ;
2014-02-10 02:10:30 +01:00
2016-05-28 00:58:28 +02:00
return load_translation ( f , r_error ) ;
2014-02-10 02:10:30 +01:00
}
void TranslationLoaderPO : : get_recognized_extensions ( List < String > * p_extensions ) const {
p_extensions - > push_back ( " po " ) ;
2022-03-18 13:19:17 +01:00
p_extensions - > push_back ( " mo " ) ;
2014-02-10 02:10:30 +01:00
}
2020-05-14 14:29:06 +02:00
2014-02-10 02:10:30 +01:00
bool TranslationLoaderPO : : handles_type ( const String & p_type ) const {
return ( p_type = = " Translation " ) ;
}
String TranslationLoaderPO : : get_resource_type ( const String & p_path ) const {
2022-03-18 13:19:17 +01:00
if ( p_path . get_extension ( ) . to_lower ( ) = = " po " | | p_path . get_extension ( ) . to_lower ( ) = = " mo " ) {
2014-02-10 02:10:30 +01:00
return " Translation " ;
2020-05-14 16:41:43 +02:00
}
2014-02-10 02:10:30 +01:00
return " " ;
}