AFLOW
 
Loading...
Searching...
No Matches
aurostd_xhttp.cpp
Go to the documentation of this file.
1// ***************************************************************************
2// * *
3// * Aflow STEFANO CURTAROLO - Duke University 2003-2024 *
4// * *
5// ***************************************************************************
6// First written by Frisco Rose in 2018
7// Complete rewrite by Hagen Eckert in 2022
8// Changed to use CURL by Hagen Eckert in 2025
9// hagen.eckert@duke.edu
10
11#ifndef _AUROSTD_XHTTP_CPP_
12#define _AUROSTD_XHTTP_CPP_
13
14#include "aurostd_xhttp.h"
15
16#include <array>
17#include <cerrno>
18#include <cstdio>
19#include <cstring>
20#include <iomanip>
21#include <ios>
22#include <iostream>
23#include <map>
24#include <sstream>
25#include <string>
26#include <vector>
27
28#include <curl/curl.h>
29#include <curl/easy.h>
30#include <curl/header.h>
31
32#include "aurostd.h"
34#include "aurostd_xerror.h"
35#include "aurostd_xfile.h"
36
37#include "aflow_xhost.h" // todo required for XPID use and XHOST.DEBUG use
38
39using std::cerr;
40using std::endl;
41using std::string;
42using std::stringstream;
43using std::vector;
44
45#define _DEBUG_XHTTP_ false
46
47namespace {
48 // anonymous namespace for functions that are only used locally, providing basic implementation
49
59 size_t writeData2string(void* ptr, size_t size, size_t nmemb, void* data) {
60 static_cast<std::string*>(data)->append(static_cast<char*>(ptr), size * nmemb);
61 return size * nmemb;
62 }
63
73 size_t writeData2File(void* ptr, size_t size, size_t nmemb, void* stream) {
74 const size_t written = fwrite(ptr, size, nmemb, static_cast<FILE*>(stream));
75 if (written != nmemb * size) {
76 std::stringstream message;
77 message << "error " << errno << ": " << strerror(errno);
79 }
80 return written;
81 }
82
89 void checkCURLStatus(const CURLcode& status, const std::array<char, CURL_ERROR_SIZE>& errbuf) {
90 if (status == CURLE_OK) {
91 return;
92 }
93 const string message(errbuf.data());
95 }
96
107 void httpGetBase(const string& url, const std::string& filename, std::string& output, long& response_code, std::map<std::string, std::string>& header) {
108 const bool LDEBUG = (false || XHOST.DEBUG);
109
110 // ensure the initial response code is set
111 response_code = -1;
112 // define a user agent string
113 const std::string agent = "AFLOW/" + string(AFLOW_VERSION);
114 // create the basic curl handle - cleaned up with curl_easy_cleanup at the end of the function
115 CURL* handle = curl_easy_init();
116 // define two header structs to loop through all headers
117 struct curl_header* raw_header = nullptr;
118 struct curl_header* prev_header = nullptr;
119 // reserve space to capture human friendly error information and make sure it is empty
120 std::array<char, CURL_ERROR_SIZE> errbuf;
121 errbuf[0] = 0;
122 // initialize the curl status to be OK before the first use
123 CURLcode status = CURLE_OK;
124
125 // set up the CURL library
126 // set error buffer
127 curl_easy_setopt(handle, CURLOPT_ERRORBUFFER, errbuf.data());
128 // set url (the URL is not parsed until curl_easy_perform)
129 status = curl_easy_setopt(handle, CURLOPT_URL, url.c_str());
130 checkCURLStatus(status, errbuf);
131 // allways follow redirections (30X)
132 status = curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1L); // starting with LIBCURL 8.13.0 1L could be replaced with CURLFOLLOW_ALL
133 checkCURLStatus(status, errbuf);
134 // set the user agent
135 status = curl_easy_setopt(handle, CURLOPT_USERAGENT, agent.c_str());
136 checkCURLStatus(status, errbuf);
137 if constexpr (_DEBUG_XHTTP_) {
138 // be very verbose during debugging
139 status = curl_easy_setopt(handle, CURLOPT_VERBOSE, 1L);
140 checkCURLStatus(status, errbuf);
141 }
142 if (LDEBUG) {
143 // show progressbar during debugging
144 status = curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0L);
145 checkCURLStatus(status, errbuf);
146 } else {
147 // disable progressbar
148 status = curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 1L);
149 checkCURLStatus(status, errbuf);
150 }
151 if (filename.empty()) {
152 // use the write to string callback function
153 status = curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writeData2string);
154 checkCURLStatus(status, errbuf);
155 // set the output location to the output string
156 status = curl_easy_setopt(handle, CURLOPT_WRITEDATA, &output);
157 checkCURLStatus(status, errbuf);
158 // perform the actual interaction with the webpage
159 status = curl_easy_perform(handle);
160 checkCURLStatus(status, errbuf);
161 } else {
162 // use the write to file callback function
163 status = curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writeData2File);
164 checkCURLStatus(status, errbuf);
165 // clean the filename and open it
166 const std::string filename_clean = aurostd::CleanFileName(filename);
167 if (FILE* output_file = fopen(filename_clean.c_str(), "wb")) {
168 // set the file descriptor as the write target if it was created successful
169 status = curl_easy_setopt(handle, CURLOPT_WRITEDATA, output_file);
170 checkCURLStatus(status, errbuf);
171 // perform the actual interaction with the webpage
172 status = curl_easy_perform(handle);
173 checkCURLStatus(status, errbuf);
174 // close the file after writing
175 if (const int r = fclose(output_file); r != 0) {
176 const string message = "Unable to close file " + filename_clean;
178 }
179 } else {
180 // throw an error if the file could not be opened
181 const string message = "Unable to open file " + filename_clean;
183 }
184 }
185 // retrieve the response code
186 status = curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
187 checkCURLStatus(status, errbuf);
188 // collect all relevant headers
189 // CURLH_HEADER The header arrived as a header from the server.
190 // CURLH_1XX The header arrived in an HTTP 1xx response. A 1xx response is an "intermediate" response that might happen before the "real" response.
191 // CURLH_TRAILER The header arrived as a trailer. A header that arrives after the body.
192 constexpr unsigned int header_origin = CURLH_HEADER | CURLH_1XX | CURLH_TRAILER;
193 while ((raw_header = curl_easy_nextheader(handle, header_origin, -1, prev_header)) != nullptr) {
194 header.insert({raw_header->name, raw_header->value});
195 prev_header = raw_header;
196 }
197 // clean up curl lib
198 curl_easy_cleanup(handle);
199 }
200} // namespace
201
202namespace aurostd {
208 void httpGet(const string& url, std::string& output, long& response_code, std::map<std::string, std::string>& header) {
209 httpGetBase(url, "", output, response_code, header);
210 }
211
217 void httpGetFile(const string& url, const std::string& filename, long& response_code, std::map<std::string, std::string>& header) {
218 std::string discarded_output;
219 httpGetBase(url, filename, discarded_output, response_code, header);
220 }
221
225 long httpGetStatus(const std::string& url) {
226 std::string output;
227 long response_code = -1;
228 std::map<std::string, std::string> header;
229 httpGet(url, output, response_code, header);
230 return response_code;
231 }
232
237 long httpGetStatus(const std::string& url, std::string& output) {
238 long response_code = -1;
239 std::map<std::string, std::string> header;
240 httpGet(url, output, response_code, header);
241 return response_code;
242 }
243
249 long httpGetStatus(const std::string& url, std::string& output, std::map<std::string, std::string>& header) {
250 long response_code = -1;
251 httpGet(url, output, response_code, header);
252 return response_code;
253 }
254
261 long httpGetStatus(const std::string& host, const std::string& path, const std::string& query, std::string& output) {
262 const std::string url = "http://" + host + "/" + path + query;
263 long response_code = -1;
264 std::map<std::string, std::string> header;
265 httpGet(url, output, response_code, header);
266 return response_code;
267 }
268
275 long httpGetStatus(const std::string& host, const std::string& path, const std::string& query, std::string& output, std::map<std::string, std::string>& header) {
276 const std::string url = "http://" + host + "/" + path + query;
277 long response_code = -1;
278 httpGet(url, output, response_code, header);
279 return response_code;
280 }
281
285 std::string httpGet(const std::string& url) {
286 std::string output;
287 long response_code = -1;
288 std::map<std::string, std::string> header;
289
290 httpGet(url, output, response_code, header);
291 return output;
292 }
293
298 std::string httpGet(const std::string& url, long& response_code) {
299 std::string output;
300 std::map<std::string, std::string> header;
301
302 response_code = -1;
303 httpGet(url, output, response_code, header);
304 return output;
305 }
306
312 std::string httpGet(const std::string& url, long& response_code, std::map<std::string, std::string>& header) {
313 std::string output;
314 response_code = -1;
315 httpGet(url, output, response_code, header);
316 return output;
317 }
318
323 long httpGetFileStatus(const std::string& url, const std::string& filename) {
324 long response_code = -1;
325 std::map<std::string, std::string> header;
326 httpGetFile(url, filename, response_code, header);
327 return response_code;
328 }
329
335 long httpGetFileStatus(const std::string& url, const std::string& filename, std::map<std::string, std::string>& header) {
336 long response_code = -1;
337 httpGetFile(url, filename, response_code, header);
338 return response_code;
339 }
340
346 template <typename utype> size_t httpGetTokens(const string& url, vector<utype>& tokens, const string& delimiters) {
347 const bool LDEBUG = (false || XHOST.DEBUG);
348 if (LDEBUG) {
349 cerr << __AFLOW_FUNC__ << " Loading url=" << url << endl;
350 }
351 tokens.clear();
352 string content;
353 const long status = httpGetStatus(url, content);
354 if (status != 200 || content.empty()) {
355 return 0;
356 }
357
358 vector<string> stokens;
359 aurostd::string2tokens(content, stokens, delimiters);
360 for (const string& stoken : stokens) {
361 if (!stoken.empty()) {
362 tokens.push_back(aurostd::string2utype<utype>(stoken));
363 }
364 }
365 if (LDEBUG) {
366 cerr << __AFLOW_FUNC__ << "Loaded " << tokens.size() << " tokens from " << url << endl;
367 }
368 return tokens.size();
369 }
370
371#define AST_TEMPLATE(utype) template size_t httpGetTokens(const string&, std::vector<utype>&, const string&);
374#undef AST_TEMPLATE
375
382 std::string httpGetCompressedFileContent(const string& url, long& response_code, std::map<std::string, std::string>& header) {
383 std::string content;
384 const string ext = GetFileExtension(url);
385 if (ext.empty()) {
386 return httpGet(url, response_code, header);
387 }
388 const string temp_file = aurostd::TmpFileCreate("eurl2string") + ext;
389 httpGetFile(url, temp_file, response_code, header);
390 compressfile2string(temp_file, content);
391 return content;
392 }
393
399 std::string httpGetCompressedFileContent(const string& url, long& response_code) {
400 std::map<std::string, std::string> header;
401 return httpGetCompressedFileContent(url, response_code, header);
402 }
403
408 std::string httpGetCompressedFileContent(const string& url) {
409 std::map<std::string, std::string> header;
410 long response_code = -1;
411 return httpGetCompressedFileContent(url, response_code, header);
412 }
413
419 string httpPercentEncodingFull(string work_str) {
420 const bool LDEBUG = (false || XHOST.DEBUG || _DEBUG_XHTTP_);
421
422 const char* allowed =
423 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
424 "abcdefghijklmnopqrstuvwxyz"
425 "0123456789"
426 "-_.~";
427
428 size_t pos = 0;
429 int to_replace = 0;
430 std::stringstream output;
431 if (LDEBUG) {
432 cerr << __AFLOW_FUNC__ << " Escaping '" << work_str << "'" << std::endl;
433 }
434
435 while (!work_str.empty()) {
436 pos = std::strspn(work_str.c_str(), allowed);
437 to_replace = work_str[pos];
438 if (to_replace < 0) {
439 to_replace += 256;
440 }
441 output << work_str.substr(0, pos) << "%" << std::uppercase << std::hex << std::setfill('0') << std::setw(2) << to_replace;
442 if (LDEBUG) {
443 cerr << " Match '" << work_str[pos] << "' (%" << std::uppercase << std::hex << std::setfill('0') << to_replace << std::dec << ")" << std::endl;
444 }
445 work_str.erase(0, pos + 1);
446 }
447 return output.str();
448 }
449} // namespace aurostd
450
451#endif // _AUROSTD_XHTTP_CPP_
_XHOST XHOST
#define __AFLOW_FILE__
Definition aurostd.h:44
#define __AFLOW_FUNC__
Definition aurostd.h:43
This file contains the preprocessor macros to ensure a proper instantiation of all aurostd functions.
#define AST_GEN_1(type_selection)
autogenerate 1D code based on AST_TEMPLATE
#define AST_UTYPE_NUM
#define AST_UTYPE_STRING
#define _RUNTIME_ERROR_
#define _DEBUG_XHTTP_
std::string httpGetCompressedFileContent(const string &url, long &response_code, std::map< std::string, std::string > &header)
get the content of a file from the web, decompress locally if needed
string TmpFileCreate(const string &identifier, const string &tmpdir, const bool hidden)
create a string pointing to the location of a unique temp file
void httpGetFile(const string &url, const std::string &filename, long &response_code, std::map< std::string, std::string > &header)
get a web resource as file
string CleanFileName(const string &fileIN)
cleans file names from obvious things
uint string2tokens(const string &str, vector< string > &tokens, const string &delimiters=" ", bool consecutive=false) __xprototype
size_t httpGetTokens(const string &url, vector< utype > &tokens, const string &delimiters)
get the data split into tokens
string httpPercentEncodingFull(string work_str)
Fully percent encode a string.
long httpGetStatus(const std::string &url)
Retrieve data from an url.
utype string2utype(const string &from, const uint base=10)
void httpGet(const string &url, std::string &output, long &response_code, std::map< std::string, std::string > &header)
get a web resource as string
size_t compressfile2string(const string &FileNameIN, string &content)
compressed file to string
string GetFileExtension(const string &FileName)
returns the file extension
long httpGetFileStatus(const std::string &url, const std::string &filename)
Download data as file.