/*------------------------------------------------------------------------------ * download.c : gnss data downloader * * Copyright (C) 2012-2020 by T.TAKASU, All rights reserved. * * version : $Revision:$ $Date:$ * history : 2012/12/28 1.0 new * 2013/06/02 1.1 replace S_IREAD by S_IRUSR * 2020/11/30 1.2 support protocol https:// and ftps:// * support compressed RINEX (CRX) files * support wild-card (*) in URL for FTP and FTPS * use "=" to separate file name from URL * fix bug on double-free of download paths * limit max number of download paths * use integer types in stdint.h *-----------------------------------------------------------------------------*/ #include #include #include #include "rtklib.h" #define FTP_CMD "wget" /* FTP/HTTP command */ #define FTP_TIMEOUT 60 /* FTP/HTTP timeout (s) */ #define FTP_LISTING ".listing" /* FTP listing file */ #define FTP_NOFILE 2048 /* FTP error no file */ #define HTTP_NOFILE 1 /* HTTP error no file */ #define FTP_RETRY 3 /* FTP number of retry */ #define MAX_PATHS 131072 /* max number of download paths */ /* type definitions ----------------------------------------------------------*/ typedef struct { /* download path type */ char *remot; /* remote path */ char *local; /* local path */ } path_t; typedef struct { /* download paths type */ path_t *path; /* download paths */ int n,nmax; /* number and max number of paths */ } paths_t; /* execute command with test timeout -----------------------------------------*/ extern int execcmd_to(const char *cmd) { #ifdef WIN32 PROCESS_INFORMATION info; STARTUPINFO si={0}; DWORD stat; char cmds[4096]; si.cb=sizeof(si); sprintf(cmds,"cmd /c %s",cmd); if (!CreateProcess(NULL,(LPTSTR)cmds,NULL,NULL,FALSE,CREATE_NO_WINDOW,NULL, NULL,&si,&info)) return -1; while (WaitForSingleObject(info.hProcess,10)==WAIT_TIMEOUT) { showmsg(""); } if (!GetExitCodeProcess(info.hProcess,&stat)) stat=-1; CloseHandle(info.hProcess); CloseHandle(info.hThread); return (int)stat; #else return system(cmd); #endif } /* generate path by replacing keywords ---------------------------------------*/ static void genpath(const char *file, const char *name, gtime_t time, int seqno, char *path) { char buff[1024],*p,*q,*r,*env,var[1024]=""; char l_name[1024]="",u_name[1024]=""; for (p=l_name,q=(char *)name;(*p=(char)tolower(*q));p++,q++) ; for (p=u_name,q=(char *)name;(*p=(char)toupper(*q));p++,q++) ; for (p=buff,q=(char *)file;(*p=*q);p++,q++) { if (*q=='%') q++; else continue; if (*q=='s'||*q=='r') p+=sprintf(p,"%s",l_name)-1; else if (*q=='S'||*q=='R') p+=sprintf(p,"%s",u_name)-1; else if (*q=='N') p+=sprintf(p,"%d",seqno)-1; else if (*q=='{'&&(r=strchr(q+1,'}'))) { strncpy(var,q+1,r-q-1); var[r-q-1]='\0'; if ((env=getenv(var))) p+=sprintf(p,"%s",env)-1; q=r; } else q--; } reppath(buff,path,time,"",""); } /* parse field strings separated by spaces -----------------------------------*/ static char *parse_str(char *buff, char *str, int nmax) { char *p,*q,sep[]=" \r\n"; for (p=buff;*p&&*p==' ';p++) ; if (*p=='"') sep[0]=*p++; /* enclosed within quotation marks */ for (q=str;*p&&!strchr(sep,*p);p++) { if (qn;i++) { free(paths->path[i].remot); free(paths->path[i].local); } free(paths->path); } /* add download paths --------------------------------------------------------*/ static int add_path(paths_t *paths, const char *remot, const char *dir) { path_t *paths_path; char local[1024]; if (paths->n>=paths->nmax) { if ((paths->nmax=paths->nmax<=0?1024:paths->nmax*2)>MAX_PATHS) { return 0; } paths_path=(path_t *)realloc(paths->path,sizeof(path_t)*paths->nmax); if (!paths_path) { return 0; } paths->path=paths_path; } remot2local(remot,dir,local); paths->path[paths->n].remot=paths->path[paths->n].local=NULL; if (!(paths->path[paths->n].remot=(char *)malloc(strlen(remot)+1))|| !(paths->path[paths->n].local=(char *)malloc(strlen(local)+1))) { return 0; } strcpy(paths->path[paths->n].remot,remot); strcpy(paths->path[paths->n].local,local); paths->n++; return 1; } /* generate download path ----------------------------------------------------*/ static int gen_path(gtime_t time, gtime_t time_p, int seqnos, int seqnoe, const url_t *url, const char *sta, const char *dir, paths_t *paths) { char remot[1024],remot_p[1024],dir_t[1024]; int i; if (!*dir) dir=url->dir; if (!*dir) dir="."; if (strstr(url->path,"%N")) { for (i=seqnos;i<=seqnoe;i++) { genpath(url->path,sta,time,i,remot); genpath(dir ,sta,time,i,dir_t); if (time_p.time) { genpath(url->path,sta,time_p,i,remot_p); if (!strcmp(remot_p,remot)) continue; } if (!add_path(paths,remot,dir_t)) return 0; } } else { genpath(url->path,sta,time,0,remot); genpath(dir ,sta,time,0,dir_t); if (time_p.time) { genpath(url->path,sta,time_p,0,remot_p); if (!strcmp(remot_p,remot)) return 1; } if (!add_path(paths,remot,dir_t)) return 0; } return 1; } /* generate download paths ---------------------------------------------------*/ static int gen_paths(gtime_t time, gtime_t time_p, int seqnos, int seqnoe, const url_t *url, char **stas, int nsta, const char *dir, paths_t *paths) { int i; if (strstr(url->path,"%s")||strstr(url->path,"%S")) { for (i=0;in;i++) { for (j=i+1;jn;j++) { if (strcmp(paths->path[i].remot,paths->path[j].remot)) continue; free(paths->path[j].remot); free(paths->path[j].local); for (k=j;kn-1;k++) paths->path[k]=paths->path[k+1]; paths->n--; j--; } } } /* generate local directory recursively --------------------------------------*/ static int mkdir_r(const char *dir) { char pdir[1024],*p; #ifdef WIN32 HANDLE h; WIN32_FIND_DATA data; if (!*dir||!strcmp(dir+1,":\\")) return 1; strcpy(pdir,dir); if ((p=strrchr(pdir,FILEPATHSEP))) { *p='\0'; h=FindFirstFile(pdir,&data); if (h==INVALID_HANDLE_VALUE) { if (!mkdir_r(pdir)) return 0; } else FindClose(h); } if (CreateDirectory(dir,NULL)|| GetLastError()==ERROR_ALREADY_EXISTS) return 1; trace(2,"directory generation error: dir=%s\n",dir); return 0; #else FILE *fp; if (!*dir) return 1; strcpy(pdir,dir); if ((p=strrchr(pdir,FILEPATHSEP))) { *p='\0'; if (!(fp=fopen(pdir,"r"))) { if (!mkdir_r(pdir)) return 0; } else fclose(fp); } if (!mkdir(dir,0777)||errno==EEXIST) return 1; trace(2,"directory generation error: dir=%s\n",dir); return 0; #endif } /* get remote file list for FTP or FTPS --------------------------------------*/ static int get_list(const path_t *path, const char *usr, const char *pwd, const char *proxy) { FILE *fp; char cmd[4096],env[1024]="",remot[1024],*opt="",*opt2="",*p; int stat; #ifndef WIN32 opt2=" -o /dev/null"; #endif remove(FTP_LISTING); strcpy(remot,path->remot); if ((p=strrchr(remot,'/'))) strcpy(p+1,"__REQUEST_LIST__"); else return 0; if (*proxy) { sprintf(env,"set ftp_proxy=http://%s & ",proxy); opt="--proxy=on "; } sprintf(cmd,"%s%s %s --ftp-user=%s --ftp-password=%s --glob=off " "--passive-ftp --no-remove-listing -N %s-t 1 -T %d%s\n", env,FTP_CMD,remot,usr,pwd,opt,FTP_TIMEOUT,opt2); execcmd_to(cmd); if (!(fp=fopen(FTP_LISTING,"r"))) return 0; fclose(fp); return 1; } /* replace wild-card (*) in the paths ----------------------------------------*/ static int rep_paths(path_t *path, const char *file) { char buff1[1024],buff2[1024],*p,*q,*remot,*local; strcpy(buff1,path->remot); strcpy(buff2,path->local); if ((p=strrchr(buff1,'/'))) p++; else p=buff1; if ((q=strrchr(buff2,FILEPATHSEP))) q++; else q=buff2; strcpy(p,file); strcpy(q,file); if (!(remot=(char *)malloc(strlen(buff1)+1)) || !(local=(char *)malloc(strlen(buff2)+1))) { free(remot); return 0; } strcpy(remot,buff1); strcpy(local,buff2); free(path->remot); free(path->local); path->remot=remot; path->local=local; return 1; } /* test file in remote file list ---------------------------------------------*/ static int test_list(path_t *path) { FILE *fp; char buff[1024],*file,*list,*p; int i; if (!(fp=fopen(FTP_LISTING,"r"))) return 1; if ((p=strrchr(path->remot,'/'))) file=p+1; else return 1; /* search file in remote file list */ while (fgets(buff,sizeof(buff),fp)) { /* remove symbolic link */ if ((p=strstr(buff,"->"))) *p='\0'; for (i=strlen(buff)-1;i>=0;i--) { if (strchr(" \r\n",buff[i])) buff[i]='\0'; else break; } /* file as last field */ if ((p=strrchr(buff,' '))) list=p+1; else list=buff; if (!strcmp(file,list)) { fclose(fp); return 1; } /* compare with wild-card (*) */ if (cmp_str(list,file)) { /* replace wild-card (*) in the paths */ if (!rep_paths(path,list)) { fclose(fp); return 0; } fclose(fp); return 1; } } fclose(fp); return 0; } /* execute download ----------------------------------------------------------*/ static int exec_down(path_t *path, char *remot_p, const char *usr, const char *pwd, const char *proxy, int opts, int *n, FILE *fp) { char dir[1024],errfile[1024],tmpfile[1024],cmd[4096],env[1024]=""; char opt[1024]="",*opt2="",*p; int ret,proto; #ifndef WIN32 opt2=" 2> /dev/null"; #endif strcpy(dir,path->local); if ((p=strrchr(dir,FILEPATHSEP))) *p='\0'; if (!strncmp(path->remot,"ftp://" ,6)) proto=0; else if (!strncmp(path->remot,"ftps://" ,7)) proto=2; else if (!strncmp(path->remot,"http://" ,7)) proto=1; else if (!strncmp(path->remot,"https://",8)) proto=1; else { trace(2,"exec_down: invalid path %s\n",path->remot); showmsg("STAT=X"); if (fp) fprintf(fp,"%s ERROR (INVALID PATH)\n",path->remot); n[1]++; return 0; } /* test local file existence */ if (!(opts&DLOPT_FORCE)&&test_file(path->local)) { showmsg("STAT=."); if (fp) fprintf(fp,"%s in %s\n",path->remot,dir); n[2]++; return 0; } showmsg("STAT=_"); /* get remote file list for FTP or FTPS */ if ((proto==0||proto==2)&&(p=strrchr(path->remot,'/'))&& strncmp(path->remot,remot_p,p-path->remot)) { if (get_list(path,usr,pwd,proxy)) { strcpy(remot_p,path->remot); } } /* test file in listing for FTP or FTPS or extend wild-card in file path */ if ((proto==0||proto==2)&&!test_list(path)) { showmsg("STAT=x"); if (fp) fprintf(fp,"%s NO_FILE\n",path->remot); n[1]++; return 0; } /* generate local directory recursively */ if (!mkdir_r(dir)) { showmsg("STAT=X"); if (fp) fprintf(fp,"%s -> %s ERROR (LOCAL DIR)\n",path->remot,dir); n[3]++; return 0; } /* re-test local file existence for file with wild-card */ if (!(opts&DLOPT_FORCE)&&test_file(path->local)) { showmsg("STAT=."); if (fp) fprintf(fp,"%s in %s\n",path->remot,dir); n[2]++; return 0; } /* proxy option */ if (*proxy) { sprintf(env,"set %s_proxy=http://%s & ",proto==0||proto==2?"ftp":"http", proxy); sprintf(opt," --proxy=on "); } /* download command */ sprintf(errfile,"%s.err",path->local); if (proto==0||proto==2) { sprintf(cmd,"%s%s %s --ftp-user=%s --ftp-password=%s --glob=off " "--passive-ftp %s-t %d -T %d -O \"%s\" -o \"%s\"%s\n", env,FTP_CMD,path->remot,usr,pwd,opt,FTP_RETRY,FTP_TIMEOUT, path->local,errfile,opt2); } else { if (*pwd) { sprintf(opt+strlen(opt)," --http-user=%s --http-password=%s ",usr, pwd); } sprintf(cmd,"%s%s %s %s-t %d -T %d -O \"%s\" -o \"%s\"%s\n",env,FTP_CMD, path->remot,opt,FTP_RETRY,FTP_TIMEOUT,path->local,errfile,opt2); } if (fp) fprintf(fp,"%s -> %s",path->remot,dir); /* execute download command */ if ((ret=execcmd_to(cmd))) { if ((proto==0&&ret==FTP_NOFILE)|| (proto==1&&ret==HTTP_NOFILE)) { showmsg("STAT=x"); if (fp) fprintf(fp," NO_FILE\n"); n[1]++; } else { trace(2,"exec_down: error proto=%d %d\n",proto,ret); showmsg("STAT=X"); if (fp) fprintf(fp," ERROR (%d)\n",ret); n[3]++; } remove(path->local); if (!(opts&DLOPT_HOLDERR)) { remove(errfile); } return ret==2; } remove(errfile); /* uncompress download file */ if (!(opts&DLOPT_KEEPCMP)&&(p=strrchr(path->local,'.'))&& (!strcmp(p,".z")||!strcmp(p,".gz")||!strcmp(p,".zip")|| !strcmp(p,".Z")||!strcmp(p,".GZ")||!strcmp(p,".ZIP"))) { if (rtk_uncompress(path->local,tmpfile)) { remove(path->local); } else { trace(2,"exec_down: uncompress error\n"); showmsg("STAT=C"); if (fp) fprintf(fp," ERROR (UNCOMP)\n"); n[3]++; return 0; } } showmsg("STAT=o"); if (fp) fprintf(fp," OK\n"); n[0]++; return 0; } /* test local file -----------------------------------------------------------*/ static int test_local(gtime_t ts, gtime_t te, double ti, const char *path, const char *sta, const char *dir, int *nc, int *nt, FILE *fp) { gtime_t time; char remot[1024],remot_p[1024],dir_t[1024],local[1024],str[1024]; int stat,abort=0; for (time=ts;timediff(time,te)<=1E-3;time=timeadd(time,ti)) { sprintf(str,"%s->%s",path,local); if (showmsg(str)) { abort=1; break; } genpath(path,sta,time,0,remot); genpath(dir ,sta,time,0,dir_t); remot2local(remot,dir_t,local); stat=test_file(local); fprintf(fp," %s",stat==0?"-":(stat==1?"o":"z")); showmsg("STAT=%s",stat==0?"x":(stat==1?"o":"z")); (*nt)++; if (stat) (*nc)++; } fprintf(fp,"\n"); return abort; } /* test local files ----------------------------------------------------------*/ static int test_locals(gtime_t ts, gtime_t te, double ti, const url_t *url, char **stas, int nsta, const char *dir, int *nc, int *nt, FILE *fp) { int i; if (strstr(url->path,"%s")||strstr(url->path,"%S")) { fprintf(fp,"%s\n",url->type); for (i=0;ipath,stas[i],*dir?dir:url->dir,nc+i, nt+i,fp)) { return 1; } } } else { fprintf(fp,"%-12s:",url->type); if (test_local(ts,te,ti,url->path,"",*dir?dir:url->dir,nc,nt,fp)) { return 1; } } return 0; } /* print total count of local files ------------------------------------------*/ static int print_total(const url_t *url, char **stas, int nsta, int *nc, int *nt, FILE *fp) { int i; if (strstr(url->path,"%s")||strstr(url->path,"%S")) { fprintf(fp,"%s\n",url->type); for (i=0;itype,nc[0],nt[0]); return 1; } /* read URL list file ---------------------------------------------------------- * read URL list file for GNSS data * args : char *file I URL list file * char **types I selected types ("*":wildcard) * int ntype I number of selected types * urls_t *urls O URL list * int nmax I max number of URL list * return : number of URL addresses (0:error) * notes : * (1) URL list file contains records containing the following fields * separated by spaces. if a field contains spaces, enclose it within "". * * data_type url_address default_local_directory * * (2) strings after # in a line are treated as comments * (3) url_address should be: * * ftp://host_address/file_path or * ftps://host_address/file_path or * http://host_address/file_path or * https://host_address/file_path * * (4) the field url_address or default_local_directory can include the * follwing keywords replaced by date, time, station names and environment * variables. * * %Y -> yyyy : year (4 digits) (2000-2099) * %y -> yy : year (2 digits) (00-99) * %m -> mm : month (01-12) * %d -> dd : day of month (01-31) * %h -> hh : hours (00-23) * %H -> a : hour code (a-x) * %M -> mm : minutes (00-59) * %n -> ddd : day of year (001-366) * %W -> wwww : gps week (0001-9999) * %D -> d : day of gps week (0-6) * %N -> nnn : general number * %s -> ssss : station name (lower-case) * %S -> SSSS : station name (upper-case) * %r -> rrrr : station name * %{env} -> env : environment variable *-----------------------------------------------------------------------------*/ extern int dl_readurls(const char *file, char **types, int ntype, url_t *urls, int nmax) { FILE *fp; char buff[2048],type[32],path[1024],dir[1024],*p; int i,n=0; if (!(fp=fopen(file,"r"))) { fprintf(stderr,"options file read error %s\n",file); return 0; } for (i=0;i%s (%d/%d)",paths.path[i].remot,paths.path[i].local,i+1, paths.n); if (showmsg(str)) break; /* execute download */ if (exec_down(paths.path+i,remot_p,usr,pwd,proxy,opts,n,fp)) { break; } } if (!(opts&DLOPT_HOLDLST)) { remove(FTP_LISTING); } sprintf(msg,"OK=%d No_File=%d Skip=%d Error=%d (Time=%.1f s)",n[0],n[1],n[2], n[3],(tickget()-tick)*0.001); free_path(&paths); return 1; } /* execute local file test ----------------------------------------------------- * execute local file test * args : gtime_t ts,te I time start and end * double tint I time interval (s) * url_t *urls I remote URL addresses * int nurl I number of remote URL addresses * char **stas I stations * int nsta I number of stations * char *dir I local directory * int ncol I number of column * int datefmt I date format (0:year-dow,1:year-dd/mm,2:week) * FILE *fp IO log test result file pointer * return : status (1:ok,0:error,-1:aborted) *-----------------------------------------------------------------------------*/ extern void dl_test(gtime_t ts, gtime_t te, double ti, const url_t *urls, int nurl, char **stas, int nsta, const char *dir, int ncol, int datefmt, FILE *fp) { gtime_t time; double tow; char year[32],date[32],date_p[32]; int i,j,n,m,*nc,*nt,week,flag,abort=0; if (ncol<1) ncol=1; else if (ncol>200) ncol=200; fprintf(fp,"** LOCAL DATA AVAILABILITY (%s, %s) **\n\n", time_str(timeget(),0),*dir?dir:"*"); for (i=n=0;i=1E-3) break; if (datefmt<=1) { genpath(datefmt==0?"%n":"%d","",time,0,date); fprintf(fp,"%-4s",strcmp(date,date_p)?date:""); } else { if (fabs(time2gpst(time,&week))<1.0) { fprintf(fp,"%04d",week); flag=1; } else { fprintf(fp,"%s",flag?"":" "); flag=0; } } strcpy(date_p,date); } fprintf(fp,"\n"); for (i=j=0;i=0.0) time=te; /* test local files */ abort=test_locals(ts,time,ti,urls+i,stas,nsta,dir,nc+j,nt+j,fp); j+=strstr(urls[i].path,"%s")||strstr(urls[i].path,"%S")?nsta:1; } fprintf(fp,"\n"); } fprintf(fp,"# COUNT : FILES/TOTAL\n"); for (i=j=0;i