Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned. | D2R Server publishing the DBLP Bibliography Database, hosted at L3S Research Center

Property	Value
dcterms:bibliographicCitation	<http://dblp.uni-trier.de/rec/bibtex/journals/corr/abs-2209-07858>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Amanda_Askell>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Andy_Jones>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Anna_Chen>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Ben_Mann>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Catherine_Olsson>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Chris_Olah>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Danny_Hernandez>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Dario_Amodei>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Dawn_Drain>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Deep_Ganguli>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Eli_Tran-Johnson>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Ethan_Perez>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Jack_Clark>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Jackson_Kernion>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Jared_Kaplan>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Josh_Jacobson>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Kamal_Ndousse>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Liane_Lovitt>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Nelson_Elhage>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Nicholas_Joseph>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Nicholas_Schiefer>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Nova_DasSarma>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Sam_Bowman>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Sam_McCandlish>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Sam_Ringer>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Saurav_Kadavath>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Scott_Johnston>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Shauna_Kravec>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Sheer_El_Showk>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Stanislav_Fort>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Tom_Brown>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Tom_Conerly>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Tom_Henighan>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Tristan_Hume>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Yuntao_Bai>
dc:creator	<https://dblp.l3s.de/d2r/resource/authors/Zac_Hatfield-Dodds>
foaf:homepage	<http://dx.doi.org/doi.org%2F10.48550%2FarXiv.2209.07858>
foaf:homepage	<https://doi.org/10.48550/arXiv.2209.07858>
dc:identifier	DBLP journals/corr/abs-2209-07858 (xsd:string)
dc:identifier	DOI doi.org%2F10.48550%2FarXiv.2209.07858 (xsd:string)
dcterms:issued	2022 (xsd:gYear)
swrc:journal	<https://dblp.l3s.de/d2r/resource/journals/corr>
rdfs:label	Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned. (xsd:string)
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Amanda_Askell>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Andy_Jones>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Anna_Chen>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Ben_Mann>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Catherine_Olsson>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Chris_Olah>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Danny_Hernandez>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Dario_Amodei>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Dawn_Drain>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Deep_Ganguli>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Eli_Tran-Johnson>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Ethan_Perez>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Jack_Clark>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Jackson_Kernion>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Jared_Kaplan>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Josh_Jacobson>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Kamal_Ndousse>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Liane_Lovitt>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Nelson_Elhage>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Nicholas_Joseph>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Nicholas_Schiefer>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Nova_DasSarma>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Sam_Bowman>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Sam_McCandlish>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Sam_Ringer>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Saurav_Kadavath>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Scott_Johnston>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Shauna_Kravec>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Sheer_El_Showk>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Stanislav_Fort>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Tom_Brown>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Tom_Conerly>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Tom_Henighan>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Tristan_Hume>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Yuntao_Bai>
foaf:maker	<https://dblp.l3s.de/d2r/resource/authors/Zac_Hatfield-Dodds>
owl:sameAs	<http://bibsonomy.org/uri/bibtexkey/journals/corr/abs-2209-07858/dblp>
owl:sameAs	<http://dblp.rkbexplorer.com/id/journals/corr/abs-2209-07858>
rdfs:seeAlso	<http://dblp.uni-trier.de/db/journals/corr/corr2209.html#abs-2209-07858>
rdfs:seeAlso	<https://doi.org/10.48550/arXiv.2209.07858>
dc:title	Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned. (xsd:string)
dc:type	<http://purl.org/dc/dcmitype/Text>
rdf:type	swrc:Article
rdf:type	foaf:Document
swrc:volume	abs/2209.07858 (xsd:string)