VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio and Text. | D2R Server publishing the DBLP Bibliography Database, hosted at L3S Research Center

Property	Value
dcterms:bibliographicCitation	<http://dblp.uni-trier.de/rec/bibtex/conf/nips/AkbariYQCCCG21>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Boqing_Gong>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Hassan_Akbari>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Liangzhe_Yuan>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Rui_Qian>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Shih-Fu_Chang>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Wei-Hong_Chuang>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Yin_Cui>
foaf:homepage	<https://proceedings.neurips.cc/paper/2021/hash/cb3213ada48302953cb0f166464ab356-Abstract.html>
dc:identifier	DBLP conf/nips/AkbariYQCCCG21 (xsd:string)
dcterms:issued	2021 (xsd:gYear)
rdfs:label	VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio and Text. (xsd:string)
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Boqing_Gong>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Hassan_Akbari>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Liangzhe_Yuan>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Rui_Qian>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Shih-Fu_Chang>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Wei-Hong_Chuang>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Yin_Cui>
swrc:pages	24206-24221 (xsd:string)
dcterms:partOf	<https://dblp.l3s.de/d2r/resource/publications/conf/nips/2021>
owl:sameAs	<http://bibsonomy.org/uri/bibtexkey/conf/nips/AkbariYQCCCG21/dblp>
owl:sameAs	<http://dblp.rkbexplorer.com/id/conf/nips/AkbariYQCCCG21>
rdfs:seeAlso	<http://dblp.uni-trier.de/db/conf/nips/neurips2021.html#AkbariYQCCCG21>
rdfs:seeAlso	<https://proceedings.neurips.cc/paper/2021/hash/cb3213ada48302953cb0f166464ab356-Abstract.html>
swrc:series	<https://dblp.l3s.de/d2r/resource/conferences/nips>
dc:title	VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio and Text. (xsd:string)
dc:type	<http://purl.org/dc/dcmitype/Text>
rdf:type	swrc:InProceedings
rdf:type	foaf:Document