中野智文のブログ

データ・マエショリストのメモ

BigQuery で base64 から uuid の形式へ変換する

背景

BigQuery で base64 から uuid への変換をUDFを使って行おうと思ったのでメモ。

経緯

atob さえあればそれほど大変ではないはず。ところが、 Google Developers Blog: Breaking the SQL Barrier: Google BigQuery User-Defined Functions によると、

Note that not all JavaScript functionality supported in the browser is available in BigQuery. For example, anything related to the browser DOM is unsupported, including Window and Document objects, and any functions that require them, such as atob() / btoa().

だそうで、面倒なことに。

解決

次の stack overflow の関数を拝借して解決…

create temp function base64_uuid(str string)
returns string
language js as """

  // [https://stackoverflow.com/questions/44836246]
  var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';
  function InvalidCharacterError(message) {
    this.message = message;
  }
  InvalidCharacterError.prototype = new Error;
  InvalidCharacterError.prototype.name = 'InvalidCharacterError';
  // decoder                                                                                                                                                                                                                                                                              
  // [https://gist.github.com/1020396] by [https://github.com/atk]                                                                                                                                                                                                                        
  atob = function (input) {
    var str = String(input).replace(/[=]+$/, ''); // #31: ExtendScript bad parse of /=                                                                                                                                                                                                    
    if (str.length % 4 == 1) {
      throw new InvalidCharacterError("'atob' failed: The string to be decoded is not correctly encoded.");
    }
    for (
      // initialize result and counters                                                                                                                                                                                                                                                   
      var bc = 0, bs, buffer, idx = 0, output = '';
      // get next character                                                                                                                                                                                                                                                               
      buffer = str.charAt(idx++);
      // character found in table? initialize bit storage and add its ascii value;                                                                                                                                                                                                        
      ~buffer && (bs = bc % 4 ? bs * 64 + buffer : buffer,
        // and if not first of each 4 characters,                                                                                                                                                                                                                                         
        // convert the first 8 bits to one ascii character                                                                                                                                                                                                                                
        bc++ % 4) ? output += String.fromCharCode(255 & bs >> (-2 * bc & 6)) : 0
    ) {
      // try to find character in table (0-63, not found => -1)                                                                                                                                                                                                                           
      buffer = chars.indexOf(buffer);
    }
    return output;
  };
  
try {
  bin = atob(str);
  // [https://stackoverflow.com/questions/39460182/]
  h = bin.split('').map(function(b) {
    return ('0' + b.charCodeAt(0).toString(16)).slice(-2);
  });
  
  return [h[0],h[1],h[2],h[3],'-',h[4],h[5],'-',h[6],h[7],'-',h[8],h[9],'-',h[10],h[11],h[12],h[13],h[14],h[15]].join('')
} catch(e) {
  return ''
}
""";


select base64_uuid("1/DhINolQvadQcBOL4itew==")